from hypothesis import given from hypothesis.strategies import binary from src import parse # The parser had some logic along the lines of 'read token until whitespace', # but this didn't account for hitting the end of file. # Make sure the parser can handle tokens terminated by end of file correctly. def test_regress_eof(): tokenizer = parse.Tokenizer("Hello", "") tokens = tokenizer.tokenize() assert tokens[0].value == "Hello" # The parser would read text literals by tracking the position just after of # the BeginText and EndText tokens, then reading the literal text between them. # It would automatically remove EndText as well as the character after it. # However, if EndText was the last token, this would cause the text to cut off. # Make sure the parser can handle reading text at the end of a file. def test_regress_text_eof(): text = "Hi there!" code = "BeginText " + text + " EndText" tokenizer1 = parse.Tokenizer(code, "") tokens1 = tokenizer1.tokenize() tokenizer2 = parse.Tokenizer(code + " ", "") tokens2 = tokenizer2.tokenize() assert tokens1[0].type == "text" assert tokens1[0].value == text assert tokens2[0].type == "text" assert tokens2[0].value == text # The parser would read text literals by reading literal text after BeginText # to the end of the token just before EndText. # This solved the previous bug, but cut off any whitespace between the last # token and EndText. # Make sure the parser can handle trailing whitespace properly now. def test_regress_text_trailing_whitespace(): text = "Hi there!\n\n\n" code = "BeginText " + text + " EndText" tokenizer = parse.Tokenizer(code, "") tokens = tokenizer.tokenize() assert tokens[0].type == "text" assert tokens[0].value == text