Newer
Older
NewLang / tests / test_parse.py
# SPDX-License-Identifier: LGPL-2.1-only
# Copyright 2022 Jookia <contact@jookia.org>

from hypothesis import assume, given
from hypothesis.strategies import (
    booleans,
    composite,
    integers,
    lists,
    one_of,
    text,
    sampled_from,
)

from src import tokenize
from src import parse
from tests import test_tokenize


# Draws a random syntax location
@composite
def draw_syntax_location(draw):
    return draw(test_tokenize.draw_token_location())


# Draws a random syntax type
@composite
def draw_syntax_type(draw):
    return draw(sampled_from(list(parse.SyntaxType)))


# Draws a token syntax value
@composite
def draw_syntax_token(draw):
    value = draw(test_tokenize.draw_token_classified())
    location = draw(draw_syntax_location())
    type = parse.SyntaxType.TOKEN
    return parse.Syntax(value, location, type)


# Draws a text syntax value
@composite
def draw_syntax_text(draw):
    value = draw(text())
    location = draw(draw_syntax_location())
    type = parse.SyntaxType.TEXT
    return parse.Syntax(value, location, type)


# Draws a random syntax
@composite
def draw_syntax_random(draw):
    strategies = [
        draw_syntax_token(),
        draw_syntax_text(),
    ]
    return draw(one_of(strategies))


# Test syntax getters
@given(text(), draw_syntax_location(), draw_syntax_type())
def test_parse_syntax_getters(value, location, type):
    # Use text as a somewhat random value
    test = parse.Syntax(value, location, type)
    assert test.value == value
    assert test.location == location
    assert test.type == type


# Test syntax equals
@given(draw_syntax_random(), draw_syntax_random())
def test_parse_syntax_equality(syntax1, syntax2):
    equals = (
        syntax1.type == syntax2.type
        and syntax1.value == syntax2.value
        and syntax1.location == syntax2.location
    )
    assert (syntax1 == syntax2) == equals


# Tests that a syntax stream reads items correctly
# We expect the following behaviour:
# - All items are popped in order
# - None is returned at the end of the stream
@given(lists(draw_syntax_random()))
def test_parse_syntax_stream(nodes):
    stream = parse.SyntaxStream(nodes.copy())
    read = []
    node = stream.pop()
    while node is not None:
        read.append(node)
        node = stream.pop()
    assert read == nodes
    assert stream.pop() is None


# Draws syntax imported from tokens
@composite
def draw_syntax_imported(draw):
    input = draw(lists(test_tokenize.draw_token_random()))
    tokens = []
    for t in input:
        tokens.append(parse.Syntax(t, t.location, parse.SyntaxType.TOKEN))
    return (input, tokens)


# Tests importing tokens works correctly
# We expect the following behaviour:
# - Each token is converted to a Syntax
# - The Syntax's value is the token
# - The Syntax's location is the token location
# - The Syntax's type is SyntaxType.TOKEN
@given(draw_syntax_imported())
def test_parse_import_tokens(test_data):
    (input, syntax) = test_data
    assert parse.import_tokens(input) == syntax


# Draws syntax and a syntax without whitespace in it
@composite
def draw_syntax_whitespace(draw):
    input = draw(lists(draw_syntax_random()))
    syntax = []
    for s in input:
        if s.type != parse.SyntaxType.TOKEN or s.value.type not in [
            tokenize.TokenType.SPACE,
            tokenize.TokenType.NEWLINE,
        ]:
            syntax.append(s)
    return (input, syntax)


# Tests strip_whitespace works correctly
# We expect the following behaviour:
# - No syntax is modified
# - Tokens of type SPACE or NEWLINE are removed from the output
@given(draw_syntax_whitespace())
def test_parse_strip_whitespace(test_data):
    (input, syntax) = test_data
    assert parse.strip_whitespace(input) == syntax


# Draws a random token suitable for text building
@composite
def draw_text_value_token(draw):
    token = draw(draw_syntax_token())
    assume(token.value.value not in ["StartText", "EndText"])
    return token


# Draws tokens to make a valid text string and its value
@composite
def draw_syntax_text_valid(draw):
    tokens = draw(lists(draw_text_value_token()))
    value = ""
    for token in tokens:
        value += token.value.value
    s_value = draw(test_tokenize.draw_token_keyword())
    s_value.value = "StartText"
    s_location = draw(draw_syntax_location())
    s_type = parse.SyntaxType.TOKEN
    start = parse.Syntax(s_value, s_location, s_type)
    e_value = draw(test_tokenize.draw_token_keyword())
    e_value.value = "EndText"
    e_location = draw(draw_syntax_location())
    e_type = parse.SyntaxType.TOKEN
    end = parse.Syntax(e_value, e_location, e_type)
    all_tokens = [start] + tokens + [end]
    text_value = value.strip("\n\t ")
    result = parse.Syntax(text_value, s_location, parse.SyntaxType.TEXT)
    return (all_tokens, result)


# Tests parse_text works correctly
# We expect the following behaviour:
# - Only the text expression is parsed
# - The resulting text is the value of tokens between StartText and EndText
# - The resulting text has its surrounding whitespace stripped
# - The Syntax's value is the resulting text
# - The Syntax's type is SyntaxType.TEXT
# - The Syntax's location is the StartText location
@given(draw_syntax_random(), draw_syntax_text_valid())
def test_parse_text_valid(canary, test_data):
    (tokens, result) = test_data
    stream = parse.SyntaxStream(tokens + [canary])
    parsed = parse.parse_text(stream)
    assert parsed is not None
    assert parsed == result
    assert stream.pop() == canary
    assert stream.pop() is None


# Generate text without StartText
# We expect the following behaviour:
# - Error if there is no StartText node at all
# - Error if StartText is not a SyntaxType.TOKEN
# - Error if StartText's token type is not a SyntaxType.KEYWORD
# - Error if StartText's token value is not "StartText"
@composite
def draw_syntax_text_invalid_nostarttext(draw):
    (tokens, _) = draw(draw_syntax_text_valid())
    if draw(booleans()):
        token = draw(draw_syntax_random())
        assume(
            not (
                token.type == parse.SyntaxType.TOKEN
                and token.value.type == tokenize.TokenType.KEYWORD
                and token.value.value == "StartText"
            )
        )
        new_tokens = [token] + tokens[1:0]
        return new_tokens
    else:
        return []


# Generate text with invalid content tokens
# We expect the following behaviour:
# - Error if a content token is not a SyntaxType.TOKEN
@composite
def draw_syntax_text_invalid_invalidcontent(draw):
    (tokens, _) = draw(draw_syntax_text_valid())
    token = draw(draw_syntax_random())
    assume(token.type != parse.SyntaxType.TOKEN)
    pos = draw(integers(min_value=1, max_value=(len(tokens) - 1)))
    new_tokens = tokens[0:pos] + [token] + tokens[pos:]
    return new_tokens


# Generate text without EndText
# We expect the following behaviour:
# - Error if there is no EndText node at all
@composite
def draw_syntax_text_invalid_noendtext(draw):
    (tokens, _) = draw(draw_syntax_text_valid())
    return tokens[0:-1]


# Generate an invalid text case
@composite
def draw_syntax_text_invalid(draw):
    strategies = [
        draw_syntax_text_invalid_nostarttext(),
        draw_syntax_text_invalid_invalidcontent(),
        draw_syntax_text_invalid_noendtext(),
    ]
    return draw(one_of(strategies))


# Test that parse_text errors in invalid cases
@given(draw_syntax_text_invalid())
def test_parse_text_invalid(test_data):
    tokens = test_data
    stream = parse.SyntaxStream(tokens)
    parsed = parse.parse_text(stream)
    assert parsed is None


# Tests the parser wrapper works correctly
# We expect the following behaviour:
# - Whitespace tokens are stripped
@given(lists(test_tokenize.draw_token_classified()))
def test_parse_fuzz(tokens):
    converted = parse.import_tokens(tokens)
    stripped = parse.strip_whitespace(converted)
    parsed = parse.parse(tokens)
    assert stripped == parsed