Newer
Older
NewLang / tests / test_tokenize.py
# SPDX-License-Identifier: LGPL-2.1-only
# Copyright 2022 Jookia <contact@jookia.org>

from hypothesis import given
from hypothesis.strategies import (
    booleans,
    characters,
    composite,
    integers,
    just,
    lists,
    sampled_from,
    text,
)

from src import tokenize

# Whitespace that separates lexer words
whitespace = " \n\t"


# Draws a random token location
@composite
def draw_token_location(draw):
    line = draw(integers())
    column = draw(integers())
    filename = draw(text())
    return tokenize.TokenLocation(line, column, filename)


# Test location getters
@given(integers(), integers(), text())
def test_tokenize_location_getters(line, column, filename):
    test = tokenize.TokenLocation(line, column, filename)
    assert test.line == line
    assert test.column == column
    assert test.file == filename


# Test location equals
@given(draw_token_location(), draw_token_location())
def test_tokenize_location_equality(location1, location2):
    equals = (
        location1.line == location2.line
        and location1.column == location2.column
        and location1.file == location2.file
    )
    assert (location1 == location2) == equals


# Draws a random token type
@composite
def draw_token_type(draw):
    return draw(sampled_from(list(tokenize.TokenType)))


# Draws a random token
@composite
def draw_token(draw):
    value = draw(text())
    location = draw(draw_token_location())
    type = draw(draw_token_type())
    return tokenize.Token(value, location, type)


# Test token getters
@given(text(), draw_token_location(), draw_token_type())
def test_tokenize_token_getters(value, location, type):
    test = tokenize.Token(value, location, type)
    assert test.value == value
    assert test.location == location
    assert test.type == type


# Test token equals
@given(draw_token(), draw_token())
def test_tokenize_token_equality(token1, token2):
    equals = (
        token1.value == token2.value
        and token1.location == token2.location
        and token1.type == token2.type
    )
    assert (token1 == token2) == equals


# Draws a tokenizer non-whitespace token
@composite
def draw_token_nonwhitespace(draw):
    chars = characters(blacklist_characters=whitespace)
    value = draw(text(alphabet=chars, min_size=1))
    location = draw(draw_token_location())
    type = tokenize.TokenType.UNKNOWN
    return tokenize.Token(value, location, type)


# Draws a tokenizer whitespace token
@composite
def draw_token_whitespace(draw):
    value = draw(sampled_from(whitespace))
    location = draw(draw_token_location())
    type = tokenize.TokenType.WHITESPACE
    return tokenize.Token(value, location, type)


# Draws a token with the values split_tokens outputs
@composite
def draw_token_splitted(draw, strategy):
    token = draw(strategy())
    location = tokenize.TokenLocation(1, 1, "")
    type = tokenize.TokenType.UNKNOWN
    return tokenize.Token(token.value, location, type)


# Generates an alternating sequence of tokens
@composite
def draw_tokens_list(draw):
    output = []
    elements = draw(lists(just(True)))
    drawing_whitespace = draw(booleans())
    for _ in elements:
        if drawing_whitespace:
            strategy = draw_token_whitespace
            locationed = draw_token_splitted(strategy)
            output += draw(lists(locationed, min_size=1))
        else:
            strategy = draw_token_nonwhitespace
            locationed = draw_token_splitted(strategy)
            output.append(draw(locationed))
        drawing_whitespace = not drawing_whitespace
    return output


# Test that the tokenizer can split tokens properly
@given(draw_tokens_list())
def test_tokenize_split_tokens(tokens):
    input = ""
    for t in tokens:
        input += t.value
    assert tokenize.split_tokens(input) == tokens


# Generates a list of tokens with correct locations
@composite
def draw_tokens_locations(draw):
    tokens = draw(draw_tokens_list())
    filename = draw(text())
    new_tokens = []
    line = 1
    column = 1
    for t in tokens:
        location = tokenize.TokenLocation(line, column, filename)
        new = tokenize.Token(t.value, location, t.type)
        new_tokens.append(new)
        if t.value == "\n":
            line = line + 1
            column = 1
        else:
            column += len(t.value)
    return new_tokens


# Test that we the tokenizer can determine locations
@given(draw_tokens_locations())
def test_tokenize_locations(tokens):
    input = []
    filename = ""
    location = tokenize.TokenLocation(1, 1, "")
    for t in tokens:
        input.append(tokenize.Token(t.value, location, t.type))
        filename = t.location.file
    assert tokenize.locate_tokens(input, filename) == tokens