Newer
Older
NewLang / tests / test_parse_regress.py
from hypothesis import given
from hypothesis.strategies import binary

from src import parse


# The parser had some logic along the lines of 'read token until whitespace',
# but this didn't account for hitting the end of file.
# Make sure the parser can handle tokens terminated by end of file correctly.
def test_regress_eof():
    tokenizer = parse.Tokenizer("Hello", "")
    tokens = tokenizer.tokenize()
    assert tokens[0].value == "Hello"


# The parser would read text literals by tracking the position just after of
# the StartText and EndText tokens, then reading the literal text between them.
# It would automatically remove EndText as well as the character after it.
# However, if EndText was the last token, this would cause the text to cut off.
# Make sure the parser can handle reading text at the end of a file.
def test_regress_text_eof():
    text = "Hi there!"
    code = "StartText " + text + " EndText"
    tokenizer1 = parse.Tokenizer(code, "")
    tokens1 = tokenizer1.tokenize()
    tokenizer2 = parse.Tokenizer(code + " ", "")
    tokens2 = tokenizer2.tokenize()
    assert tokens1[0].type == "text"
    assert tokens1[0].value == text
    assert tokens2[0].type == "text"
    assert tokens2[0].value == text


# The parser would read text literals by reading literal text after StartText
# to the end of the token just before EndText.
# This solved the previous bug, but cut off any whitespace between the last
# token and EndText.
# Make sure the parser can handle trailing whitespace properly now.
def test_regress_text_trailing_whitespace():
    text = "Hi there!\n\n\n"
    code = "StartText " + text + " EndText"
    tokenizer = parse.Tokenizer(code, "")
    tokens = tokenizer.tokenize()
    assert tokens[0].type == "text"
    assert tokens[0].value == text