Newer
Older
NewLang / tests / test_parse.py
# SPDX-License-Identifier: LGPL-2.1-only
# Copyright 2022 Jookia <contact@jookia.org>

from hypothesis import assume, given
from hypothesis.strategies import (
    booleans,
    composite,
    integers,
    lists,
    one_of,
)

from src import parse
from src.syntax import Syntax, SyntaxStream, SyntaxType
from tests import test_tokenize


# Tests that a syntax stream reads items correctly
# We expect the following behaviour:
# - All items are popped in order
# - None is returned at the end of the stream
@given(lists(test_tokenize.draw_syntax_random()))
def test_parse_syntax_stream(nodes):
    stream = SyntaxStream(nodes.copy())
    read = []
    node = stream.pop()
    while node is not None:
        read.append(node)
        node = stream.pop()
    assert read == nodes
    assert stream.pop() is None


# Draws syntax and a syntax without whitespace in it
@composite
def draw_syntax_whitespace(draw):
    input = draw(lists(test_tokenize.draw_syntax_random()))
    syntax = []
    for s in input:
        if s.type != SyntaxType.TOKEN or s.value not in ["\n", " "]:
            syntax.append(s)
    return (input, syntax)


# Tests strip_whitespace works correctly
# We expect the following behaviour:
# - No syntax is modified
# - Tokens of value '\n' or ' ' are removed from the output
@given(draw_syntax_whitespace())
def test_parse_strip_whitespace(test_data):
    (input, syntax) = test_data
    assert parse.strip_whitespace(input) == syntax


# Draws a random token suitable for text building
@composite
def draw_text_value_token(draw):
    token = draw(test_tokenize.draw_syntax_token())
    assume(token.value not in ["StartText", "EndText"])
    return token


@composite
def draw_token_by_value(draw, value):
    location = draw(test_tokenize.draw_token_location())
    type = SyntaxType.TOKEN
    return Syntax(value, location, type)


# Draws tokens to make a valid text string and its value
@composite
def draw_syntax_text_valid(draw):
    tokens = draw(lists(draw_text_value_token()))
    value = ""
    for token in tokens:
        value += token.value
    start = draw(draw_token_by_value("StartText"))
    end = draw(draw_token_by_value("EndText"))
    all_tokens = [start] + tokens + [end]
    text_value = value.strip("\n\t ")
    result = Syntax(text_value, start.location, SyntaxType.TEXT)
    return (all_tokens, result)


# Tests parse_text works correctly
# We expect the following behaviour:
# - Only the text expression is parsed
# - The resulting text is the value of tokens between StartText and EndText
# - The resulting text has its surrounding whitespace stripped
# - The Syntax's value is the resulting text
# - The Syntax's type is SyntaxType.TEXT
# - The Syntax's location is the StartText location
@given(test_tokenize.draw_syntax_random(), draw_syntax_text_valid())
def test_parse_text_valid(canary, test_data):
    (tokens, result) = test_data
    stream = SyntaxStream(tokens + [canary])
    parsed = parse.parse_text(stream)
    assert parsed is not None
    assert parsed == result
    assert stream.pop() == canary
    assert stream.pop() is None


# Generate text without StartText
# We expect the following behaviour:
# - Error if there is no StartText node at all
# - Error if StartText is not a SyntaxType.TOKEN
# - Error if StartText's token value is not "StartText"
@composite
def draw_syntax_text_invalid_nostarttext(draw):
    (tokens, _) = draw(draw_syntax_text_valid())
    if draw(booleans()):
        token = draw(test_tokenize.draw_syntax_random())
        assume(not (token.type == SyntaxType.TOKEN and token.value == "StartText"))
        new_tokens = [token] + tokens[1:0]
        return new_tokens
    else:
        return []


# Generate text with invalid content tokens
# We expect the following behaviour:
# - Error if a content token is not a SyntaxType.TOKEN
@composite
def draw_syntax_text_invalid_invalidcontent(draw):
    (tokens, _) = draw(draw_syntax_text_valid())
    token = draw(test_tokenize.draw_syntax_random())
    assume(token.type != SyntaxType.TOKEN)
    pos = draw(integers(min_value=1, max_value=(len(tokens) - 1)))
    new_tokens = tokens[0:pos] + [token] + tokens[pos:]
    return new_tokens


# Generate text with a StartText token in it
# We expect the following behaviour:
# - Error if a StartText token is in the text content
@composite
def draw_syntax_text_invalid_extrastarttext(draw):
    (tokens, _) = draw(draw_syntax_text_valid())
    start = draw(draw_token_by_value("StartText"))
    pos = draw(integers(min_value=1, max_value=(len(tokens) - 1)))
    new_tokens = tokens[0:pos] + [start] + tokens[pos:]
    return new_tokens


# Generate text without EndText
# We expect the following behaviour:
# - Error if there is no EndText node at all
@composite
def draw_syntax_text_invalid_noendtext(draw):
    (tokens, _) = draw(draw_syntax_text_valid())
    return tokens[0:-1]


# Generate an invalid text case
@composite
def draw_syntax_text_invalid(draw):
    strategies = [
        draw_syntax_text_invalid_nostarttext(),
        draw_syntax_text_invalid_invalidcontent(),
        draw_syntax_text_invalid_extrastarttext(),
        draw_syntax_text_invalid_noendtext(),
    ]
    return draw(one_of(strategies))


# Test that parse_text errors in invalid cases
@given(draw_syntax_text_invalid())
def test_parse_text_invalid(test_data):
    tokens = test_data
    stream = SyntaxStream(tokens)
    parsed = parse.parse_text(stream)
    assert parsed is None


# Tests the parser wrapper works correctly
# We expect the following behaviour:
# - Whitespace tokens are stripped
@given(lists(test_tokenize.draw_token_classified()))
def test_parse_fuzz(tokens):
    stripped = parse.strip_whitespace(tokens)
    parsed = parse.parse(tokens)
    assert stripped == parsed