diff --git a/src/parse.py b/src/parse.py index faaf8f7..85a36b1 100644 --- a/src/parse.py +++ b/src/parse.py @@ -8,6 +8,7 @@ # The type of syntax class SyntaxType(enum.Enum): TOKEN = enum.auto() # pragma: no mutate + TEXT = enum.auto() # pragma: no mutate # Represents a syntax node @@ -67,6 +68,38 @@ return output +# Parses a text syntax node +def parse_text(stream): + buffer = "" + # Parse the starting tokens + s = stream.pop() + if s is None: + return None + elif s.type != SyntaxType.TOKEN: + return None + elif s.value.value != "StartText": + return None + location = s.location + # Parse following tokens + while True: + s = stream.pop() + if s is None: + return None + elif s.type != SyntaxType.TOKEN: + return None + # Don't allow StartText in text + elif s.value.value in ["StartText"]: + return None + # EndText found, end things + elif s.value.value == "EndText": + break + else: + buffer += s.value.value + value = buffer.strip("\n\t ") + type = SyntaxType.TEXT + return Syntax(value, location, type) + + # Parses tokens def parse(tokens): converted = import_tokens(tokens) diff --git a/tests/test_parse.py b/tests/test_parse.py index cb85ab3..d008aef 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: LGPL-2.1-only # Copyright 2022 Jookia -from hypothesis import given +from hypothesis import assume, given from hypothesis.strategies import composite, lists, text, sampled_from from src import tokenize @@ -119,6 +119,56 @@ assert parse.strip_whitespace(input) == syntax +# Draws a random token suitable for text building +@composite +def draw_text_value_token(draw): + token = draw(draw_syntax_token()) + assume(token.value.value not in ["StartText", "EndText"]) + return token + + +# Draws tokens to make a valid text string and its value +@composite +def draw_syntax_text_valid(draw): + tokens = draw(lists(draw_text_value_token())) + value = "" + for token in tokens: + value += token.value.value + s_value = draw(test_tokenize.draw_token_keyword()) + s_value.value = "StartText" + s_location = draw(draw_syntax_location()) + s_type = parse.SyntaxType.TOKEN + start = parse.Syntax(s_value, s_location, s_type) + e_value = draw(test_tokenize.draw_token_keyword()) + e_value.value = "EndText" + e_location = draw(draw_syntax_location()) + e_type = parse.SyntaxType.TOKEN + end = parse.Syntax(e_value, e_location, e_type) + all_tokens = [start] + tokens + [end] + text_value = value.strip("\n\t ") + result = parse.Syntax(text_value, s_location, parse.SyntaxType.TEXT) + return (all_tokens, result) + + +# Tests parse_text works correctly +# We expect the following behaviour: +# - Only the test expression is parsed +# - The resulting text is the value of tokens between StartText and EndText +# - The resulting text has its surrounding whitespace stripped +# - The Syntax's value is the resulting text +# - The Syntax's type is SyntaxType.TEXT +# - The Syntax's location is the StartText location +@given(draw_syntax_random(), draw_syntax_text_valid()) +def test_parse_text_valid(canary, test_data): + (tokens, result) = test_data + stream = parse.SyntaxStream(tokens + [canary]) + parsed = parse.parse_text(stream) + assert parsed is not None + assert parsed == result + assert stream.pop() == canary + assert stream.pop() is None + + # Tests the parser wrapper works correctly # We expect the following behaviour: # - Whitespace tokens are stripped