Newer
Older
NewLang / tests / test_tokenize.py
# SPDX-License-Identifier: LGPL-2.1-only
# Copyright 2022 Jookia <contact@jookia.org>

from hypothesis import given
from hypothesis.strategies import (
    booleans,
    characters,
    composite,
    integers,
    just,
    lists,
    sampled_from,
    text,
)

from src import tokenize

# Whitespace that separates lexer words
whitespace = " \n\t"


# Draws a random symbol location
@composite
def draw_symbol_location(draw):
    line = draw(integers())
    column = draw(integers())
    filename = draw(text())
    return tokenize.SymbolLocation(line, column, filename)


# Test location getters
@given(integers(), integers(), text())
def test_tokenize_location_getters(line, column, filename):
    test = tokenize.SymbolLocation(line, column, filename)
    assert test.line == line
    assert test.column == column
    assert test.file == filename


# Test location equals
@given(draw_symbol_location(), draw_symbol_location())
def test_tokenize_location_equality(location1, location2):
    equals = (
        location1.line == location2.line
        and location1.column == location2.column
        and location1.file == location2.file
    )
    assert (location1 == location2) == equals


# Draws a random symbol
@composite
def draw_symbol(draw):
    value = draw(text())
    location = draw(draw_symbol_location())
    return tokenize.Symbol(value, location)


# Test symbol getters
@given(text(), draw_symbol_location())
def test_tokenize_symbol_getters(value, location):
    test = tokenize.Symbol(value, location)
    assert test.value == value
    assert test.location == location


# Test symbol equals
@given(draw_symbol(), draw_symbol())
def test_tokenize_symbol_equality(symbol1, symbol2):
    equals = symbol1.value == symbol2.value and symbol1.location == symbol2.location
    assert (symbol1 == symbol2) == equals


# Draws a tokenizer non-whitespace symbol
@composite
def draw_symbol_nonwhitespace(draw):
    chars = characters(blacklist_characters=whitespace)
    value = draw(text(alphabet=chars, min_size=1))
    location = tokenize.SymbolLocation(1, 1, "")
    return tokenize.Symbol(value, location)


# Draws a tokenizer whitespace symbol
@composite
def draw_symbol_whitespace(draw):
    value = draw(sampled_from(whitespace))
    location = tokenize.SymbolLocation(1, 1, "")
    return tokenize.Symbol(value, location)


# Generates an alternating sequence of symbols
@composite
def draw_symbols_list(draw):
    output = []
    elements = draw(lists(just(True)))
    drawing_whitespace = draw(booleans())
    for _ in elements:
        if drawing_whitespace:
            strategy = draw_symbol_whitespace()
            output += draw(lists(strategy, min_size=1))
        else:
            strategy = draw_symbol_nonwhitespace()
            output.append(draw(strategy))
        drawing_whitespace = not drawing_whitespace
    return output


# Test that we the tokenizer can split symbols properly
@given(draw_symbols_list())
def test_tokenize_split_symbols(symbols):
    input = ""
    for s in symbols:
        input += s.value
    assert tokenize.split_symbols(input) == symbols