Newer
Older
NewLang / tests / test_parse.py
# SPDX-License-Identifier: LGPL-2.1-only
# Copyright 2022 Jookia <contact@jookia.org>

from hypothesis import assume, given
from hypothesis.strategies import (
    booleans,
    composite,
    integers,
    lists,
    one_of,
    sampled_from,
    text,
)

from src.parse import (
    NoteSkipper,
    ParseError,
    ParseErrorException,
    Parser,
    parse,
)
from src.syntax import Syntax, SyntaxStream, SyntaxType
from tests.test_syntax import (
    draw_token_bool,
    draw_token_classified,
    draw_syntax_random,
    draw_syntax_location,
    draw_syntax_token,
)


# Inserts an element at a random place in a list
def insert_random(draw, list, data):
    pos = draw(integers(min_value=1, max_value=(len(list) - 1)))
    new_data = list[0:pos] + [data] + list[pos:]
    return new_data


# Draws a random parse error
@composite
def draw_parse_error(draw):
    return draw(sampled_from(list(ParseError)))


# Draws a random parse error exception
@composite
def draw_parse_error_exception(draw):
    error = draw(draw_parse_error())
    syntax = draw(draw_syntax_random())
    expected = draw(text())
    return ParseErrorException(error, syntax, expected)


# Test parse error exception getters
@given(draw_parse_error(), draw_syntax_random(), text())
def test_syntax_syntax_getters(error, syntax, expected):
    test = ParseErrorException(error, syntax, expected)
    assert test.error == error
    assert test.syntax == syntax
    assert test.expected == expected


# Test parse error exception equals
@given(draw_parse_error_exception(), draw_parse_error_exception())
def test_syntax_syntax_equality(except1, except2):
    equals = (
        except1.error == except2.error
        and except1.syntax == except2.syntax
        and except1.expected == except2.expected
    )
    assert (except1 == except2) == equals


# Draws a random token suitable for text building
@composite
def draw_text_value_token(draw):
    token = draw(draw_syntax_token())
    assume(token.value not in ["StartText", "EndText"])
    return token


# Draws a token with a specific value but random location
@composite
def draw_token_by_value(draw, value):
    location = draw(draw_syntax_location())
    type = SyntaxType.TOKEN
    return Syntax(value, location, type)


# Draws tokens to make a valid text string and its value
@composite
def draw_syntax_text_valid(draw):
    tokens = draw(lists(draw_text_value_token()))
    buffer = ""
    for token in tokens:
        buffer += token.value + " "
    value = buffer[:-1]  # Drop trailing space
    start = draw(draw_token_by_value("StartText"))
    end = draw(draw_token_by_value("EndText"))
    all_tokens = [start] + tokens + [end]
    result = Syntax(value, start.location, SyntaxType.TEXT)
    return (all_tokens, result)


# Tests parse_text works correctly
# We expect the following behaviour:
# - Only the text expression is parsed
# - The resulting text is the value of tokens between StartText and EndText
# - The value of the tokens is joined by U+0020 SPACE code points
# - The Syntax's value is the resulting text
# - The Syntax's type is SyntaxType.TEXT
# - The Syntax's location is the StartText location
@given(draw_syntax_random(), draw_syntax_text_valid())
def test_parse_text_valid(canary, test_data):
    (tokens, result) = test_data
    stream = SyntaxStream(tokens + [canary])
    parsed = Parser().parse_text(stream)
    assert parsed is not None
    assert parsed == result
    assert stream.pop() == canary
    assert stream.pop() is None


# Generate text without StartText
# We expect the following behaviour:
# - Error if there is no StartText node at all
# - Error if StartText is not a SyntaxType.TOKEN
# - Error if StartText's token value is not "StartText"
@composite
def draw_syntax_text_invalid_nostarttext(draw):
    (tokens, _) = draw(draw_syntax_text_valid())
    if draw(booleans()):
        token = draw(draw_syntax_random())
        assume(not (token.type == SyntaxType.TOKEN and token.value == "StartText"))
        new_tokens = [token] + tokens[1:0]
        if token.type == SyntaxType.TOKEN:
            error = ParseErrorException(ParseError.WRONG_TOKEN, token, "StartText")
        else:
            error = ParseErrorException(ParseError.NOT_TOKEN, token, None)
        return (new_tokens, error)
    else:
        error = ParseErrorException(ParseError.NO_TOKEN, None, None)
        return ([], error)


# Generate text with invalid content tokens
# We expect the following behaviour:
# - Error if a content token is not a SyntaxType.TOKEN
@composite
def draw_syntax_text_invalid_invalidcontent(draw):
    (tokens, _) = draw(draw_syntax_text_valid())
    token = draw(draw_syntax_random())
    assume(token.type != SyntaxType.TOKEN)
    new_tokens = insert_random(draw, tokens, token)
    error = ParseErrorException(ParseError.NOT_TOKEN, token, None)
    return (new_tokens, error)


# Generate text with a StartText token in it
# We expect the following behaviour:
# - Error if a StartText token is in the text content
@composite
def draw_syntax_text_invalid_extrastarttext(draw):
    (tokens, _) = draw(draw_syntax_text_valid())
    start = draw(draw_token_by_value("StartText"))
    new_tokens = insert_random(draw, tokens, start)
    error = ParseErrorException(ParseError.FOUND_STARTTEXT, start, None)
    return (new_tokens, error)


# Generate text without EndText
# We expect the following behaviour:
# - Error if there is no EndText node at all
@composite
def draw_syntax_text_invalid_noendtext(draw):
    (tokens, _) = draw(draw_syntax_text_valid())
    error = ParseErrorException(ParseError.NO_TOKEN, None, None)
    return (tokens[0:-1], error)


# Generate an invalid text case
@composite
def draw_syntax_text_invalid(draw):
    strategies = [
        draw_syntax_text_invalid_nostarttext(),
        draw_syntax_text_invalid_invalidcontent(),
        draw_syntax_text_invalid_extrastarttext(),
        draw_syntax_text_invalid_noendtext(),
    ]
    return draw(one_of(strategies))


# Test that parse_text errors in invalid cases
@given(draw_syntax_text_invalid())
def test_parse_text_invalid(test_data):
    (tokens, error) = test_data
    stream = SyntaxStream(tokens)
    try:
        parsed = Parser().parse_text(stream)
        raise AssertionError("Parsed invalid data: %s" % (parsed))
    except ParseErrorException as e:
        assert e == error


# Draws a random token suitable for note building
@composite
def draw_note_value_token(draw):
    token = draw(draw_syntax_token())
    assume(token.value not in ["StartNote", "EndNote"])
    return token


# Draws tokens to make a valid note
@composite
def draw_syntax_note_valid(draw):
    tokens = draw(lists(draw_note_value_token()))
    start = draw(draw_token_by_value("StartNote"))
    end = draw(draw_token_by_value("EndNote"))
    all_tokens = [start] + tokens + [end]
    return all_tokens


# Tests skip_note works correctly
# We expect the following behaviour:
# - Only the note expression is parsed
# - No value is returned
# - All tokens are consumed up to and including EndNote
@given(draw_syntax_random(), draw_syntax_note_valid())
def test_parse_note_valid(canary, test_data):
    tokens = test_data
    stream = SyntaxStream(tokens + [canary])
    skipped = NoteSkipper().skip_note(stream)
    assert skipped is None
    assert stream.pop() == canary
    assert stream.pop() is None


# Generate note without StartNote
# We expect the following behaviour:
# - Error if there is no StartNote node at all
# - Error if StartNote is not a SyntaxType.TOKEN
# - Error if StartNote's token value is not "StartNote"
@composite
def draw_syntax_note_invalid_nostartnote(draw):
    tokens = draw(draw_syntax_note_valid())
    if draw(booleans()):
        token = draw(draw_syntax_random())
        assume(not (token.type == SyntaxType.TOKEN and token.value == "StartNote"))
        new_tokens = [token] + tokens[1:0]
        if token.type == SyntaxType.TOKEN:
            error = ParseErrorException(ParseError.WRONG_TOKEN, token, "StartNote")
        else:
            error = ParseErrorException(ParseError.NOT_TOKEN, token, None)
        return (new_tokens, error)
    else:
        error = ParseErrorException(ParseError.NO_TOKEN, None, None)
        return ([], error)


# Generate note with a StartNote token in it
# We expect the following behaviour:
# - Error if a StartNote token is in the note content
@composite
def draw_syntax_note_invalid_extrastartnote(draw):
    tokens = draw(draw_syntax_note_valid())
    start = draw(draw_token_by_value("StartNote"))
    new_tokens = insert_random(draw, tokens, start)
    error = ParseErrorException(ParseError.FOUND_STARTNOTE, start, None)
    return (new_tokens, error)


# Generate note without EndNote
# We expect the following behaviour:
# - Error if there is no EndNote node at all
@composite
def draw_syntax_note_invalid_noendnote(draw):
    tokens = draw(draw_syntax_note_valid())
    error = ParseErrorException(ParseError.NO_TOKEN, None, None)
    return (tokens[0:-1], error)


# Generate an invalid note case
@composite
def draw_syntax_note_invalid(draw):
    strategies = [
        draw_syntax_note_invalid_nostartnote(),
        draw_syntax_note_invalid_extrastartnote(),
        draw_syntax_note_invalid_noendnote(),
    ]
    return draw(one_of(strategies))


# Test that parse_note errors in invalid cases
@given(draw_syntax_note_invalid())
def test_parse_note_invalid(test_data):
    (tokens, error) = test_data
    stream = SyntaxStream(tokens)
    try:
        parsed = NoteSkipper().skip_note(stream)
        raise AssertionError("Parsed invalid data: %s" % (parsed))
    except ParseErrorException as e:
        assert e == error


# Draws tokens to make a valid boolean
@composite
def draw_syntax_bool_valid(draw):
    token = draw(draw_token_bool())
    value = token.value == "True"
    result = Syntax(value, token.location, SyntaxType.BOOL)
    return (token, result)


# Tests parse_boolean works correctly
# We expect the following behaviour:
# - Only the first token is parsed
# - The resulting boolean is True if the first token is True
# - The resulting boolean is False if the first token is False
# - The Syntax's value is the resulting token
# - The Syntax's type is SyntaxType.BOOL
# - The Syntax's location is the first token's location
@given(draw_syntax_random(), draw_syntax_bool_valid())
def test_parse_bool_valid(canary, test_data):
    (token, result) = test_data
    stream = SyntaxStream([token] + [canary])
    parsed = Parser().parse_bool(stream)
    assert parsed is not None
    assert parsed == result
    assert stream.pop() == canary
    assert stream.pop() is None


# Generate an invalid boolean
# We expect the following behaviour:
# - Error if there isn't a token
# - Error if the token is not a SyntaxType.TOKEN
# - Error if the token is not True or False
@composite
def draw_syntax_bool_invalid(draw):
    if draw(booleans()):
        token = draw(draw_syntax_random())
        assume(
            not (token.type == SyntaxType.TOKEN and token.value in ["True", "False"])
        )
        if token.type == SyntaxType.TOKEN:
            error = ParseErrorException(ParseError.NOT_BOOL, token, None)
        else:
            error = ParseErrorException(ParseError.NOT_TOKEN, token, None)
        return ([token], error)
    else:
        error = ParseErrorException(ParseError.NO_TOKEN, None, None)
        return ([], error)


# Test that parse_bool errors in invalid cases
@given(draw_syntax_bool_invalid())
def test_parse_bool_invalid(test_data):
    (tokens, error) = test_data
    stream = SyntaxStream(tokens)
    try:
        parsed = Parser().parse_bool(stream)
        raise AssertionError("Parsed invalid data: %s" % (parsed))
    except ParseErrorException as e:
        assert e == error


# Dummy parse_note implementation for testing note clearing
# This redefines skip_note to skip the StartNote and not do anything else
def clear_notes_skip_note_valid(stream):
    stream.pop()
    return None


# Dummy parse_note implementation for testing error propgation
# This redefines skip_note to always throw an error
def clear_notes_skip_note_error(stream):
    s = stream.peek()
    raise ParseErrorException(ParseError.TEST_ERROR, s, None)


# Draws a random token suitable for note clearing testing
@composite
def draw_clear_notes_value_token(draw):
    token = draw(draw_syntax_token())
    assume(token.value not in ["EndNote"])
    return token


# Draws tokens to make a valid soup to clear notes
@composite
def draw_syntax_clear_notes_valid(draw):
    tokens = draw(lists(draw_clear_notes_value_token()))
    output = []
    for token in tokens:
        if token.value != "StartNote":
            output.append(token)
    return (tokens, output)


# Tests clear_notes works correctly
# We expect the following behaviour:
# - When StartNote is encountered skip_note is called to skip the note
# - Other tokens are passed through
@given(draw_syntax_clear_notes_valid())
def test_parse_clear_notes_valid(test_data):
    (tokens, result) = test_data
    stream = SyntaxStream(tokens)
    skipper = NoteSkipper()
    skipper.skip_note = clear_notes_skip_note_valid
    cleared = skipper.clear_notes(stream)
    assert cleared == result


# Draws tokens to test clear_notes error propagation
@composite
def draw_syntax_clear_notes_startnote_propagation(draw):
    tokens = draw(lists(draw_clear_notes_value_token()))
    # Ensure we have a StartNote somewhere
    start = draw(draw_token_by_value("StartNote"))
    new_tokens = tokens + [start]
    for token in new_tokens:
        if token.value == "StartNote":
            error = ParseErrorException(ParseError.TEST_ERROR, token, None)
            return (new_tokens, error)
    raise AssertionError("Unable to find StartNote?")


# Tests clear_notes passes through skip_note errors
# We expect the following behaviour:
# - When StartNote is encountered skip_note is called to skip the note
# - Any error skip_note gives is propagated through clear_notes
@given(draw_syntax_clear_notes_startnote_propagation())
def test_parse_clear_notes(test_data):
    (tokens, error) = test_data
    stream = SyntaxStream(tokens)
    skipper = NoteSkipper()
    skipper.skip_note = clear_notes_skip_note_error
    try:
        parsed = skipper.clear_notes(stream)
        raise AssertionError("Parsed invalid data: %s" % (parsed))
    except ParseErrorException as e:
        assert e == error


# Draws tokens to test clear_notes EndNote invalid error
@composite
def draw_syntax_clear_notes_invalid_endnote(draw):
    tokens = draw(lists(draw_clear_notes_value_token()))
    # Ensure we have an EndNote somewhere
    start = draw(draw_token_by_value("EndNote"))
    new_tokens = tokens + [start]
    for token in new_tokens:
        if token.value == "EndNote":
            error = ParseErrorException(ParseError.FOUND_ENDNOTE, token, None)
            return (new_tokens, error)
    raise AssertionError("Unable to find EndNote?")


# Tests clear_notes errors when finding an EndNote
# We expect the following behaviour:
# - When EndNote is encountered a FOUND_ENDNOTE error is raised
@given(draw_syntax_clear_notes_invalid_endnote())
def test_parse_clear_notes_invalid_endnote(test_data):
    (tokens, error) = test_data
    stream = SyntaxStream(tokens)
    skipper = NoteSkipper()
    skipper.skip_note = clear_notes_skip_note_valid
    try:
        parsed = skipper.clear_notes(stream)
        raise AssertionError("Parsed invalid data: %s" % (parsed))
    except ParseErrorException as e:
        assert e == error


# Tests the parser wrapper works correctly
# We expect the following behaviour:
# - Notes to be removed from the tokens
@given(lists(draw_token_classified()))
def test_parse_fuzz(tokens):
    result = None
    try:
        stream = SyntaxStream(tokens.copy())
        result = NoteSkipper().clear_notes(stream)
    except ParseErrorException as e:
        result = e
    try:
        parsed = parse(tokens)
        assert parsed == result
    except ParseErrorException as e:
        assert e == result