diff --git a/src/parse.py b/src/parse.py index 93a8ebb..149e8ce 100644 --- a/src/parse.py +++ b/src/parse.py @@ -127,13 +127,15 @@ None, "reading text", ParseLocation(line, column, self.filename) ) start = self.pos + end_start = self.pos (token, _, _) = self.read_token() while token and token != "EndText": + end_start = self.pos (token, _, _) = self.read_token() if not token: raise ParseError(context, "Hit end of file before EndText") else: - return self.code[start : self.pos - len(" EndText") - 1] + return self.code[start : end_start - 1] def skip_shebang(self): log.log(log.LEXER, log.TRACE, "Skipping shebang") diff --git a/tests/test_parse_regress.py b/tests/test_parse_regress.py index 1ed111d..65ca9d9 100644 --- a/tests/test_parse_regress.py +++ b/tests/test_parse_regress.py @@ -11,3 +11,21 @@ tokenizer = parse.Tokenizer("Hello", "") tokens = tokenizer.tokenize() assert tokens[0].value == "Hello" + + +# The parser would read text literals by tracking the position just after of +# the BeginText and EndText tokens, then reading the literal text between them. +# It would automatically remove EndText as well as the character after it. +# However, if EndText was the last token, this would cause the text to cut off. +# Make sure the parser can handle reading text at the end of a file. +def test_regress_text_eof(): + text = "Hi there!" + code = "BeginText " + text + " EndText" + tokenizer1 = parse.Tokenizer(code, "") + tokens1 = tokenizer1.tokenize() + tokenizer2 = parse.Tokenizer(code + " ", "") + tokens2 = tokenizer2.tokenize() + assert tokens1[0].type == "text" + assert tokens1[0].value == text + assert tokens2[0].type == "text" + assert tokens2[0].value == text