# SPDX-License-Identifier: LGPL-2.1-only # Copyright 2022 Jookia <contact@jookia.org> from hypothesis import given from hypothesis.strategies import ( booleans, characters, composite, just, lists, sampled_from, text, ) from src import tokenize # Whitespace that separates lexer words whitespace = " \n\t" # Draws a tokenizer non-whitespace symbol @composite def draw_symbol_nonwhitespace(draw): chars = characters(blacklist_characters=whitespace) value = draw(text(alphabet=chars, min_size=1)) return tokenize.Symbol(value) # Draws a tokenizer whitespace symbol @composite def draw_symbol_whitespace(draw): return tokenize.Symbol(draw(sampled_from(whitespace))) # Generates an alternating sequence of symbols @composite def draw_symbols_list(draw): output = [] elements = draw(lists(just(True))) drawing_whitespace = draw(booleans()) for _ in elements: if drawing_whitespace: strategy = draw_symbol_whitespace() output += draw(lists(strategy, min_size=1)) else: strategy = draw_symbol_whitespace() output.append(draw(strategy)) drawing_whitespace = not drawing_whitespace return output # Test that we the tokenizer can split symbols properly @given(draw_symbols_list()) def test_tokenize_split_symbols(symbols): input = "" for s in symbols: input += s.value assert tokenize.split_symbols(input) == symbols