diff --git a/tests/test_tokenize.py b/tests/test_tokenize.py index 594fe99..332c0e3 100644 --- a/tests/test_tokenize.py +++ b/tests/test_tokenize.py @@ -177,7 +177,8 @@ assert (token1 == token2) == equals -# Draws a token with the values split_tokens outputs +# Draws a token using an existing strategy but with a blank location +# and unknown type, just like split_tokens outputs @composite def draw_token_splitted(draw, strategy): token = draw(strategy) @@ -187,6 +188,7 @@ # Generates an alternating sequence of unknown or whitespace tokens +# intended for splitting in to separate tokens @composite def draw_tokens_to_split(draw): source = "" @@ -195,6 +197,7 @@ drawing_whitespace = draw(booleans()) for _ in elements: if drawing_whitespace: + # Multiple whitespaces get split in to multiple tokens strategy = one_of([draw_token_space(), draw_token_newline()]) locationed = draw_token_splitted(strategy) tokens += draw(lists(locationed, min_size=1)) @@ -209,6 +212,14 @@ # Test that the tokenizer can split tokens properly +# We expect the following behaviour: +# - Whitespace is a tab, space or new line +# - Non-whitespace is anything else +# - Whitespace and non-whitespace are separated in to tokens +# - Whitespace characters are split in to multiple tokens +# - Non-whitespace characters are combined in to a single token +# - Each token type is UNKNOWN +# - Each token location is line 1 column 1 of file "" @given(draw_tokens_to_split()) def test_tokenize_split_tokens(test_data): (source, tokens) = test_data @@ -236,6 +247,15 @@ # Test that the tokenizer can determine locations +# We expect the following behaviour: +# - Only the token location is modified +# - Each token's location filename is the generated filename +# - A token's line is equal to 1 plus the number of tokens with the +# type NEWLINE before the token +# - A token's column is equal to 1 plus the sum of previous token's +# value lengths 'before' it, where 'before' is defined as any +# token between the last NEWLINE (or start of file) before the token +# and the token itself @given(draw_tokens_locations()) def test_tokenize_locations(test_data): (input, located, filename) = test_data @@ -243,6 +263,7 @@ # Draws a token and possibly adds garbage +# This is to ensure that tokens must completely match a value @composite def draw_token_classified_garbled(draw): token = draw(draw_token_classified()) @@ -271,13 +292,22 @@ # Test that classification can add types properly +# We expect the following behaviour: +# - Only the token type is modified +# - The token's type is set based on which following condition its value meets: +# - KEYWORD if it entirely matches a keyword +# - BOOL if it is True or False +# - WHITESPACE if it is a space or tab character +# - NEWLINE if it is a new line character +# - SHEBANG if it starts with a #1 +# - UNKNOWN if it is none of the above @given(draw_tokens_classified()) def test_tokenize_classification(test_data): (input, tokens) = test_data assert tokenize.classify_tokens(input) == tokens -# Draw random source code that might be invalid for fuzzing +# Draw a random string made of token values @composite def draw_source_fuzz(draw): tokens = draw(lists(draw_token_classified())) @@ -288,6 +318,10 @@ # Test that the tokenize function behaves as we expect +# We expect the following behaviour: +# - The tokenizer splits the tokens as expected, then +# - The tokenizer classifies tokens as expected, then +# - The tokenizer sets the token locations as expected @given(draw_source_fuzz(), text()) def test_tokenize_fuzz(source, filename): split = tokenize.split_tokens(source)