diff --git a/parse.py b/parse.py index a268459..0e84ec2 100644 --- a/parse.py +++ b/parse.py @@ -14,20 +14,29 @@ class Tokenizer: def __init__(self, input): self.code = input + self.pos = 0 + + def next(self): + if self.pos >= len(self.code): + log.log(log.LEXER, log.LEXER, "Reached end of file") + return None + else: + symbol = self.code[self.pos] + symbol_print = "character '%s'" % (symbol) + if symbol == '\n': + symbol_print = "new line" + log.log(log.LEXER, log.LEXER, "Read %s" % (symbol_print)) + self.pos += 1 + return symbol def tokenize(self): tokens = [] token = "" text = "" mode = "normal" # normal/note/text - for symbol in self.code: - if symbol == '\n': - symbol_print = "new line" - else: - symbol_print = "character '%s'" % (symbol) - log.log(log.LEXER, log.TRACE, "Read %s" % (symbol_print)) + symbol = self.next() + while symbol: if symbol == " " or symbol == "\t" or symbol == "\n": - log.log(log.LEXER, log.TRACE, "Read token '%s'" % (token)) if token == "": pass elif token == "BeginNote": @@ -65,6 +74,7 @@ token += symbol if mode == "text": text += symbol + symbol = self.next() log.log(log.LEXER, log.TRACE, "Done lexing, adding EOF") tokens.append(Token("EOF", None)) return tokens