diff --git a/parse.py b/parse.py index 21f8682..a268459 100644 --- a/parse.py +++ b/parse.py @@ -11,59 +11,63 @@ def __repr__(self): return "Token(type %s, value '%s')" % (self.type, self.value) -def tokenize(code): - tokens = [] - token = "" - text = "" - mode = "normal" # normal/note/text - for symbol in code: - if symbol == '\n': - symbol_print = "new line" - else: - symbol_print = "character '%s'" % (symbol) - log.log(log.LEXER, log.TRACE, "Read %s" % (symbol_print)) - if symbol == " " or symbol == "\t" or symbol == "\n": - log.log(log.LEXER, log.TRACE, "Read token '%s'" % (token)) - if token == "": - pass - elif token == "BeginNote": - log.log(log.LEXER, log.TRACE, "Switching to note mode") - mode = "note" - elif token == "EndNote": - log.log(log.LEXER, log.TRACE, "Ending note mode") - mode = "normal" - elif token == "BeginText": - log.log(log.LEXER, log.TRACE, "Switching to text mode") - mode = "text" - elif token == "EndText": - log.log(log.LEXER, log.TRACE, "Ending text mode") - content = text[1:-8] - log.log(log.LEXER, log.DEBUG, "Appending text '%s'" % (content)) - tokens.append(Token("text", content)) - mode = "normal" - text = "" - elif token != "": - if mode == "normal": - keywords = ["NewLang", "Done", "Set", "To", "EndSet", - "If", "Then", "Else", "EndIf"] - if token in keywords: - type = "keyword" - token = token.lower() +class Tokenizer: + def __init__(self, input): + self.code = input + + def tokenize(self): + tokens = [] + token = "" + text = "" + mode = "normal" # normal/note/text + for symbol in self.code: + if symbol == '\n': + symbol_print = "new line" + else: + symbol_print = "character '%s'" % (symbol) + log.log(log.LEXER, log.TRACE, "Read %s" % (symbol_print)) + if symbol == " " or symbol == "\t" or symbol == "\n": + log.log(log.LEXER, log.TRACE, "Read token '%s'" % (token)) + if token == "": + pass + elif token == "BeginNote": + log.log(log.LEXER, log.TRACE, "Switching to note mode") + mode = "note" + elif token == "EndNote": + log.log(log.LEXER, log.TRACE, "Ending note mode") + mode = "normal" + elif token == "BeginText": + log.log(log.LEXER, log.TRACE, "Switching to text mode") + mode = "text" + elif token == "EndText": + log.log(log.LEXER, log.TRACE, "Ending text mode") + content = text[1:-8] + log.log(log.LEXER, log.DEBUG, "Appending text '%s'" % (content)) + tokens.append(Token("text", content)) + mode = "normal" + text = "" + elif token != "": + if mode == "normal": + keywords = ["NewLang", "Done", "Set", "To", "EndSet", + "If", "Then", "Else", "EndIf"] + if token in keywords: + type = "keyword" + token = token.lower() + else: + type = "symbol" + tok = Token(type, token) + log.log(log.LEXER, log.DEBUG, "Appending %s" % (tok)) + tokens.append(tok) else: - type = "symbol" - tok = Token(type, token) - log.log(log.LEXER, log.DEBUG, "Appending %s" % (tok)) - tokens.append(tok) - else: - log.log(log.LEXER, log.TRACE, "Skipping token '%s'" % (token)) - token = "" - else: - token += symbol - if mode == "text": - text += symbol - log.log(log.LEXER, log.TRACE, "Done lexing, adding EOF") - tokens.append(Token("EOF", None)) - return tokens + log.log(log.LEXER, log.TRACE, "Skipping token '%s'" % (token)) + token = "" + else: + token += symbol + if mode == "text": + text += symbol + log.log(log.LEXER, log.TRACE, "Done lexing, adding EOF") + tokens.append(Token("EOF", None)) + return tokens class Reference: def __init__(self, value): @@ -264,7 +268,8 @@ return ast def parse_file(code): - tokens = tokenize(code) + tokenizer = Tokenizer(code) + tokens = tokenizer.tokenize() try: parser = Parser(tokens) return parser.parse_file()