diff --git a/src/tokenize.py b/src/tokenize.py index dff076e..39684c0 100644 --- a/src/tokenize.py +++ b/src/tokenize.py @@ -101,3 +101,16 @@ else: column += len(t.value) return new_tokens + + +# Classifies tokens in to types +def classify_tokens(tokens): + new_tokens = [] + for t in tokens: + if is_whitespace(t.value): + type = TokenType.WHITESPACE + else: + type = TokenType.UNKNOWN + new = Token(t.value, t.location, type) + new_tokens.append(new) + return new_tokens diff --git a/tests/test_tokenize.py b/tests/test_tokenize.py index 0de9d43..116fbde 100644 --- a/tests/test_tokenize.py +++ b/tests/test_tokenize.py @@ -9,6 +9,7 @@ integers, just, lists, + one_of, sampled_from, text, ) @@ -169,3 +170,22 @@ input.append(tokenize.Token(t.value, location, t.type)) filename = t.location.file assert tokenize.locate_tokens(input, filename) == tokens + + +@composite +def draw_tokens_classified(draw): + strategies = [ + draw_token_nonwhitespace(), + draw_token_whitespace(), + ] + elements = draw(lists(one_of(strategies))) + return elements + + +@given(draw_tokens_classified()) +def test_tokenize_classification(tokens): + input = [] + type = tokenize.TokenType.UNKNOWN + for t in tokens: + input.append(tokenize.Token(t.value, t.location, type)) + assert tokenize.classify_tokens(input) == tokens