# SPDX-License-Identifier: LGPL-2.1-only # Copyright 2022 Jookia <contact@jookia.org> from hypothesis import given from hypothesis.strategies import ( booleans, characters, composite, integers, just, lists, one_of, sampled_from, text, ) from src import tokenize # Whitespace that separates lexer words whitespace = " \n\t" # Draws a random token location @composite def draw_token_location(draw): line = draw(integers()) column = draw(integers()) filename = draw(text()) return tokenize.TokenLocation(line, column, filename) # Test location getters @given(integers(), integers(), text()) def test_tokenize_location_getters(line, column, filename): test = tokenize.TokenLocation(line, column, filename) assert test.line == line assert test.column == column assert test.file == filename # Test location equals @given(draw_token_location(), draw_token_location()) def test_tokenize_location_equality(location1, location2): equals = ( location1.line == location2.line and location1.column == location2.column and location1.file == location2.file ) assert (location1 == location2) == equals # Draws a random token type @composite def draw_token_type(draw): return draw(sampled_from(list(tokenize.TokenType))) # Draws a random token @composite def draw_token_random(draw): value = draw(text()) location = draw(draw_token_location()) type = draw(draw_token_type()) return tokenize.Token(value, location, type) # Test token getters @given(text(), draw_token_location(), draw_token_type()) def test_tokenize_token_getters(value, location, type): test = tokenize.Token(value, location, type) assert test.value == value assert test.location == location assert test.type == type # Test token equals @given(draw_token_random(), draw_token_random()) def test_tokenize_token_equality(token1, token2): equals = ( token1.value == token2.value and token1.location == token2.location and token1.type == token2.type ) assert (token1 == token2) == equals # Draws a tokenizer non-whitespace token @composite def draw_token_nonwhitespace(draw): chars = characters(blacklist_characters=whitespace) value = draw(text(alphabet=chars, min_size=1)) location = draw(draw_token_location()) type = tokenize.TokenType.UNKNOWN return tokenize.Token(value, location, type) # Draws a tokenizer whitespace token @composite def draw_token_whitespace(draw): value = draw(sampled_from(whitespace)) location = draw(draw_token_location()) type = tokenize.TokenType.WHITESPACE return tokenize.Token(value, location, type) # Draws a token with the values split_tokens outputs @composite def draw_token_splitted(draw, strategy): token = draw(strategy()) location = tokenize.TokenLocation(1, 1, "") type = tokenize.TokenType.UNKNOWN return tokenize.Token(token.value, location, type) # Generates an alternating sequence of tokens @composite def draw_tokens_list(draw): output = [] elements = draw(lists(just(True))) drawing_whitespace = draw(booleans()) for _ in elements: if drawing_whitespace: strategy = draw_token_whitespace locationed = draw_token_splitted(strategy) output += draw(lists(locationed, min_size=1)) else: strategy = draw_token_nonwhitespace locationed = draw_token_splitted(strategy) output.append(draw(locationed)) drawing_whitespace = not drawing_whitespace return output # Test that the tokenizer can split tokens properly @given(draw_tokens_list()) def test_tokenize_split_tokens(tokens): input = "" for t in tokens: input += t.value assert tokenize.split_tokens(input) == tokens # Generates a list of tokens with correct locations @composite def draw_tokens_locations(draw): tokens = draw(draw_tokens_list()) filename = draw(text()) new_tokens = [] line = 1 column = 1 for t in tokens: location = tokenize.TokenLocation(line, column, filename) new = tokenize.Token(t.value, location, t.type) new_tokens.append(new) if t.value == "\n": line = line + 1 column = 1 else: column += len(t.value) return new_tokens # Test that we the tokenizer can determine locations @given(draw_tokens_locations()) def test_tokenize_locations(tokens): input = [] filename = "" location = tokenize.TokenLocation(1, 1, "") for t in tokens: input.append(tokenize.Token(t.value, location, t.type)) filename = t.location.file assert tokenize.locate_tokens(input, filename) == tokens # Draw a list of tokens with correct types and unknown types @composite def draw_tokens_classified(draw): strategies = [ draw_token_nonwhitespace(), draw_token_whitespace(), ] tokens = draw(lists(one_of(strategies))) input = [] for t in tokens: type = draw(draw_token_type()) input.append(tokenize.Token(t.value, t.location, type)) return (input, tokens) # Test that classification can add types properly @given(draw_tokens_classified()) def test_tokenize_classification(test_data): (input, tokens) = test_data assert tokenize.classify_tokens(input) == tokens @composite def draw_source_fuzz(draw): strategies = [ draw_token_nonwhitespace(), draw_token_whitespace(), ] tokens = draw(lists(one_of(strategies))) input = "" for t in tokens: input += t.value return input @given(draw_source_fuzz(), text()) def test_tokenize_fuzz(source, filename): split = tokenize.split_tokens(source) located = tokenize.locate_tokens(split, filename) classified = tokenize.classify_tokens(located) tokenized = tokenize.tokenize(source, filename) assert classified == tokenized