Newer
Older
NewLang / tests / test_syntax.py
# SPDX-License-Identifier: LGPL-2.1-only
# Copyright 2022 Jookia <contact@jookia.org>

from hypothesis import given, assume
from hypothesis.strategies import (
    booleans,
    characters,
    composite,
    integers,
    lists,
    one_of,
    sampled_from,
    text,
)

from src.syntax import Syntax, SyntaxLocation, SyntaxStream, SyntaxType

# Keywords recognized by the language
keywords = [
    "Done",
    "Set",
    "To",
    "EndSet",
    "If",
    "Then",
    "Else",
    "EndIf",
    "StartNote",
    "EndNote",
    "StartText",
    "EndText",
]


# Draws a random syntax location
@composite
def draw_syntax_location(draw):
    line = draw(integers())
    offset = draw(integers())
    filename = draw(text())
    return SyntaxLocation(line, offset, filename)


# Test location getters
@given(integers(), integers(), text())
def test_syntax_location_getters(line, offset, filename):
    test = SyntaxLocation(line, offset, filename)
    assert test.line == line
    assert test.offset == offset
    assert test.file == filename


# Test location equals
@given(draw_syntax_location(), draw_syntax_location())
def test_syntax_location_equality(location1, location2):
    equals = (
        location1.line == location2.line
        and location1.offset == location2.offset
        and location1.file == location2.file
    )
    assert (location1 == location2) == equals


# Draws a random syntax type
@composite
def draw_syntax_type(draw):
    return draw(sampled_from(list(SyntaxType)))


# Draws a text syntax value
@composite
def draw_syntax_text(draw):
    value = draw(text())
    location = draw(draw_syntax_location())
    type = SyntaxType.TEXT
    return Syntax(value, location, type)


# Draws a random token
@composite
def draw_token_random(draw):
    value = draw(text())
    location = draw(draw_syntax_location())
    return Syntax(value, location, SyntaxType.TOKEN)


# Values considered spaces
valid_spaces = [
    "\t",  # U+0009 HORIZONTAL TAB
    " ",  # U+0020 SPACE
]

# Single values reserved for new line use
single_newlines = [
    "\n",  # U+000A LINE FEED
    "\v",  # U+000B VERTICAL TAB
    "\f",  # U+000C FORM FEED
    "\r",  # U+000D CARRIAGE RETURN
    "\u0085",  # U+0085 NEXT LINE
    "\u2028",  # U+2028 LINE SEPARATOR
    "\u2029",  # U+2029 PARAGRAPH SEPARATOR
]

# Multi values reserved for new line use
multi_newlines = [
    "\r\n",  # U+000A U+000D CARRIAGE RETURN then LINE FEED
]

# All values reserved for new line use
valid_newlines = single_newlines + multi_newlines


# Draws an unknown token
@composite
def draw_token_unknown(draw):
    reserved = valid_spaces + single_newlines
    token = draw(draw_token_random())
    chars = characters(blacklist_characters=reserved)
    value = draw(text(alphabet=chars, min_size=1))
    for v in multi_newlines:
        assume(v not in value)
    assume(value not in ["True", "False"])
    assume(value not in keywords)
    return Syntax(value, token.location, SyntaxType.TOKEN)


# Draws a space token
@composite
def draw_token_space(draw):
    token = draw(draw_token_random())
    value = draw(sampled_from(valid_spaces))
    return Syntax(value, token.location, SyntaxType.TOKEN)


# Draws a new line token
@composite
def draw_token_newline(draw):
    token = draw(draw_token_random())
    value = draw(sampled_from(valid_newlines))
    return Syntax(value, token.location, SyntaxType.TOKEN)


# Draws a bool token
@composite
def draw_token_bool(draw):
    token = draw(draw_token_random())
    if draw(booleans()):
        value = "True"
    else:
        value = "False"
    return Syntax(value, token.location, SyntaxType.TOKEN)


# Draws a keyword token
@composite
def draw_token_keyword(draw):
    token = draw(draw_token_random())
    value = draw(sampled_from(keywords))
    return Syntax(value, token.location, SyntaxType.TOKEN)


# Draws a classified token
@composite
def draw_token_classified(draw):
    strategies = [
        draw_token_unknown(),
        draw_token_space(),
        draw_token_newline(),
        draw_token_bool(),
        draw_token_keyword(),
    ]
    token = draw(one_of(strategies))
    return token


# Draws a token syntax value
@composite
def draw_syntax_token(draw):
    value = draw(draw_token_classified())
    location = draw(draw_syntax_location())
    type = SyntaxType.TOKEN
    return Syntax(value.value, location, type)


# Draws a random syntax
@composite
def draw_syntax_random(draw):
    strategies = [
        draw_syntax_token(),
        draw_syntax_text(),
    ]
    return draw(one_of(strategies))


# Test syntax getters
@given(text(), draw_syntax_location(), draw_syntax_type())
def test_syntax_syntax_getters(value, location, type):
    # Use text as a somewhat random value
    test = Syntax(value, location, type)
    assert test.value == value
    assert test.location == location
    assert test.type == type


# Test syntax equals
@given(draw_syntax_random(), draw_syntax_random())
def test_syntax_syntax_equality(syntax1, syntax2):
    equals = (
        syntax1.type == syntax2.type
        and syntax1.value == syntax2.value
        and syntax1.location == syntax2.location
    )
    assert (syntax1 == syntax2) == equals


# Tests that a syntax stream pops items correctly
# We expect the following behaviour:
# - All items are popped in order
# - None is returned at the end of the stream
@given(lists(draw_syntax_random()))
def test_syntax_syntax_stream_pop(nodes):
    stream = SyntaxStream(nodes.copy())
    read = []
    node = stream.pop()
    while node is not None:
        read.append(node)
        node = stream.pop()
    assert read == nodes
    assert stream.pop() is None


# Tests that a syntax stream peeks items correctly
# We expect the following behaviour:
# - Peeking does not pop any values
# - None is returned at the end of the stream
@given(lists(draw_syntax_random()), integers(min_value=0, max_value=100))
def test_syntax_syntax_stream_peek(nodes, times):
    stream = SyntaxStream(nodes.copy())
    node_count = len(stream.nodes)
    if node_count == 0:
        real_times = times
        expected = None
    else:
        real_times = times % len(stream.nodes)
        expected = nodes[0]
    for _ in range(0, real_times):
        node = stream.peek()
        assert node == expected


# Tests that peeking and popping don't influence each other
# We expect the following behaviour:
# - Peeking does not influence the next pop call
# - Popping does not influence the next peep call
@given(lists(draw_syntax_random()))
def test_syntax_syntax_stream_mixed(nodes):
    stream = SyntaxStream(nodes.copy())
    read = []
    node1 = stream.peek()
    node2 = stream.pop()
    assert node1 == node2
    while node2 is not None:
        read.append(node2)
        node1 = stream.peek()
        node2 = stream.pop()
        assert node1 == node2
    assert read == nodes
    assert stream.pop() is None