Newer
Older
NewLang / tests / test_tokenize.py
# SPDX-License-Identifier: LGPL-2.1-only
# Copyright 2022 Jookia <contact@jookia.org>

from hypothesis import given
from hypothesis.strategies import (
    booleans,
    characters,
    composite,
    just,
    lists,
    sampled_from,
    text,
)

from src import tokenize

# Whitespace that separates lexer words
whitespace = " \n\t"


# Draws tokenizer symbol
@composite
def draw_symbol(draw):
    return draw(text(alphabet=characters(blacklist_characters=whitespace), min_size=1))


# Draws tokenizer whitespace
@composite
def draw_whitespace(draw):
    return "".join(draw(lists(sampled_from(whitespace), min_size=1)))


# Generates an alternating list of symbols and whitespace
@composite
def draw_split_list(draw):
    output = []
    elements = draw(lists(just(True)))
    drawing_whitespace = draw(booleans())
    for _ in elements:
        if drawing_whitespace:
            strategy = draw_whitespace()
        else:
            strategy = draw_symbol()
        output.append(draw(strategy))
        drawing_whitespace = not drawing_whitespace
    return output


# Test that we the tokenizer can split symbols properly
@given(draw_split_list())
def test_tokenizer_split_symbols(split):
    input = "".join(split)
    assert tokenize.split_symbols(input) == split