NewLang/tests/test_parse_regress.py at f4ecfd92ae36bba12a7eac0cec1bcf79e9751270

Fork: 0

LuminaSensum / NewLang

Find file

Newer

Older

NewLang / tests / test_parse_regress.py

Jookia on 25 Oct 2021 1 KB tests: Add regression test for trailing whitespace in text

Raw Blame History

from hypothesis import given
from hypothesis.strategies import binary

from src import parse


# The parser had some logic along the lines of 'read token until whitespace',
# but this didn't account for hitting the end of file.
# Make sure the parser can handle tokens terminated by end of file correctly.
def test_regress_eof():
    tokenizer = parse.Tokenizer("Hello", "")
    tokens = tokenizer.tokenize()
    assert tokens[0].value == "Hello"


# The parser would read text literals by tracking the position just after of
# the BeginText and EndText tokens, then reading the literal text between them.
# It would automatically remove EndText as well as the character after it.
# However, if EndText was the last token, this would cause the text to cut off.
# Make sure the parser can handle reading text at the end of a file.
def test_regress_text_eof():
    text = "Hi there!"
    code = "BeginText " + text + " EndText"
    tokenizer1 = parse.Tokenizer(code, "")
    tokens1 = tokenizer1.tokenize()
    tokenizer2 = parse.Tokenizer(code + " ", "")
    tokens2 = tokenizer2.tokenize()
    assert tokens1[0].type == "text"
    assert tokens1[0].value == text
    assert tokens2[0].type == "text"
    assert tokens2[0].value == text


# The parser would read text literals by reading literal text after BeginText
# to the end of the token just before EndText.
# This solved the previous bug, but cut off any whitespace between the last
# token and EndText.
# Make sure the parser can handle trailing whitespace properly now.
def test_regress_text_trailing_whitespace():
    text = "Hi there!\n\n\n"
    code = "BeginText " + text + " EndText"
    tokenizer = parse.Tokenizer(code, "")
    tokens = tokenizer.tokenize()
    assert tokens[0].type == "text"
    assert tokens[0].value == text