diff --git a/src/tokenize.py b/src/tokenize.py index 2680c89..e705b09 100644 --- a/src/tokenize.py +++ b/src/tokenize.py @@ -6,15 +6,15 @@ return symbol == " " or symbol == "\t" or symbol == "\n" -# Location of a symbol -class SymbolLocation: +# Location of a token +class TokenLocation: def __init__(self, line, column, file): self.line = line self.column = column self.file = file def __repr__(self): - return "SymbolLocation(line %i, column %i, file '%s')" % ( # pragma: no mutate + return "TokenLocation(line %i, column %i, file '%s')" % ( # pragma: no mutate self.line, self.column, self.file, @@ -28,14 +28,14 @@ ) -# Represents a tokenizer symbol -class Symbol: +# Represents a tokenizer token +class Token: def __init__(self, value, location): self.value = value self.location = location def __repr__(self): - return "Symbol(value %s, location %s)" % ( # pragma: no mutate + return "Token(value %s, location %s)" % ( # pragma: no mutate repr(self.value), repr(self.location), ) @@ -45,43 +45,43 @@ # Splits text in to a list of characters and whitespace -def split_symbols(input): +def split_tokens(input): if input == "": return [] - symbols = [] + tokens = [] current = input[0] curr_whitespace = is_whitespace(input[0]) - location = SymbolLocation(1, 1, "") + location = TokenLocation(1, 1, "") for c in input[1:]: c_whitespace = is_whitespace(c) if c_whitespace != curr_whitespace: # Flush current buffer and switch modes - symbols.append(Symbol(current, location)) + tokens.append(Token(current, location)) current = c curr_whitespace = c_whitespace elif curr_whitespace: # Whitespace mode appends each character - symbols.append(Symbol(current, location)) + tokens.append(Token(current, location)) current = c else: - # Symbol mode builds the current buffer + # Token mode builds the current buffer current += c - symbols.append(Symbol(current, location)) - return symbols + tokens.append(Token(current, location)) + return tokens -# Generates a list of symbols with locations -def locate_symbols(symbols, filename): - new_symbols = [] +# Generates a list of tokens with locations +def locate_tokens(tokens, filename): + new_tokens = [] line = 1 column = 1 - for s in symbols: - location = SymbolLocation(line, column, filename) - new = Symbol(s.value, location) - new_symbols.append(new) - if s.value == "\n": + for t in tokens: + location = TokenLocation(line, column, filename) + new = Token(t.value, location) + new_tokens.append(new) + if t.value == "\n": line = line + 1 column = 1 else: - column += len(s.value) - return new_symbols + column += len(t.value) + return new_tokens diff --git a/tests/test_tokenize.py b/tests/test_tokenize.py index 9ce7cac..1c0b85d 100644 --- a/tests/test_tokenize.py +++ b/tests/test_tokenize.py @@ -19,26 +19,26 @@ whitespace = " \n\t" -# Draws a random symbol location +# Draws a random token location @composite -def draw_symbol_location(draw): +def draw_token_location(draw): line = draw(integers()) column = draw(integers()) filename = draw(text()) - return tokenize.SymbolLocation(line, column, filename) + return tokenize.TokenLocation(line, column, filename) # Test location getters @given(integers(), integers(), text()) def test_tokenize_location_getters(line, column, filename): - test = tokenize.SymbolLocation(line, column, filename) + test = tokenize.TokenLocation(line, column, filename) assert test.line == line assert test.column == column assert test.file == filename # Test location equals -@given(draw_symbol_location(), draw_symbol_location()) +@given(draw_token_location(), draw_token_location()) def test_tokenize_location_equality(location1, location2): equals = ( location1.line == location2.line @@ -48,109 +48,109 @@ assert (location1 == location2) == equals -# Draws a random symbol +# Draws a random token @composite -def draw_symbol(draw): +def draw_token(draw): value = draw(text()) - location = draw(draw_symbol_location()) - return tokenize.Symbol(value, location) + location = draw(draw_token_location()) + return tokenize.Token(value, location) -# Test symbol getters -@given(text(), draw_symbol_location()) -def test_tokenize_symbol_getters(value, location): - test = tokenize.Symbol(value, location) +# Test token getters +@given(text(), draw_token_location()) +def test_tokenize_token_getters(value, location): + test = tokenize.Token(value, location) assert test.value == value assert test.location == location -# Test symbol equals -@given(draw_symbol(), draw_symbol()) -def test_tokenize_symbol_equality(symbol1, symbol2): - equals = symbol1.value == symbol2.value and symbol1.location == symbol2.location - assert (symbol1 == symbol2) == equals +# Test token equals +@given(draw_token(), draw_token()) +def test_tokenize_token_equality(token1, token2): + equals = token1.value == token2.value and token1.location == token2.location + assert (token1 == token2) == equals -# Draws a tokenizer non-whitespace symbol +# Draws a tokenizer non-whitespace token @composite -def draw_symbol_nonwhitespace(draw): +def draw_token_nonwhitespace(draw): chars = characters(blacklist_characters=whitespace) value = draw(text(alphabet=chars, min_size=1)) - location = draw(draw_symbol_location()) - return tokenize.Symbol(value, location) + location = draw(draw_token_location()) + return tokenize.Token(value, location) -# Draws a tokenizer whitespace symbol +# Draws a tokenizer whitespace token @composite -def draw_symbol_whitespace(draw): +def draw_token_whitespace(draw): value = draw(sampled_from(whitespace)) - location = draw(draw_symbol_location()) - return tokenize.Symbol(value, location) + location = draw(draw_token_location()) + return tokenize.Token(value, location) -# Draws a symbol with a set location +# Draws a token with a set location @composite -def draw_symbol_with_location(draw, strategy, location): - symbol = draw(strategy()) - return tokenize.Symbol(symbol.value, location) +def draw_token_with_location(draw, strategy, location): + token = draw(strategy()) + return tokenize.Token(token.value, location) -# Generates an alternating sequence of symbols +# Generates an alternating sequence of tokens @composite -def draw_symbols_list(draw): +def draw_tokens_list(draw): output = [] elements = draw(lists(just(True))) drawing_whitespace = draw(booleans()) - location = tokenize.SymbolLocation(1, 1, "") + location = tokenize.TokenLocation(1, 1, "") for _ in elements: if drawing_whitespace: - strategy = draw_symbol_whitespace - locationed = draw_symbol_with_location(strategy, location) + strategy = draw_token_whitespace + locationed = draw_token_with_location(strategy, location) output += draw(lists(locationed, min_size=1)) else: - strategy = draw_symbol_nonwhitespace - locationed = draw_symbol_with_location(strategy, location) + strategy = draw_token_nonwhitespace + locationed = draw_token_with_location(strategy, location) output.append(draw(locationed)) drawing_whitespace = not drawing_whitespace return output -# Test that we the tokenizer can split symbols properly -@given(draw_symbols_list()) -def test_tokenize_split_symbols(symbols): +# Test that the tokenizer can split tokens properly +@given(draw_tokens_list()) +def test_tokenize_split_tokens(tokens): input = "" - for s in symbols: - input += s.value - assert tokenize.split_symbols(input) == symbols + for t in tokens: + input += t.value + assert tokenize.split_tokens(input) == tokens -# Generates a list of symbols with locations +# Generates a list of tokens with locations @composite -def draw_symbols_locations(draw): - symbols = draw(draw_symbols_list()) +def draw_tokens_locations(draw): + tokens = draw(draw_tokens_list()) filename = draw(text()) - new_symbols = [] + new_tokens = [] line = 1 column = 1 - for s in symbols: - location = tokenize.SymbolLocation(line, column, filename) - new = tokenize.Symbol(s.value, location) - new_symbols.append(new) - if s.value == "\n": + for t in tokens: + location = tokenize.TokenLocation(line, column, filename) + new = tokenize.Token(t.value, location) + new_tokens.append(new) + if t.value == "\n": line = line + 1 column = 1 else: - column += len(s.value) - return new_symbols + column += len(t.value) + return new_tokens # Test that we the tokenizer can determine locations -@given(draw_symbols_locations()) -def test_tokenize_locations(symbols): +@given(draw_tokens_locations()) +def test_tokenize_locations(tokens): input = [] filename = "" - location = tokenize.SymbolLocation(1, 1, "") - for s in symbols: - input.append(tokenize.Symbol(s.value, location)) - filename = s.location.file - assert tokenize.locate_symbols(input, filename) == symbols + location = tokenize.TokenLocation(1, 1, "") + for t in tokens: + input.append(tokenize.Token(t.value, location)) + filename = t.location.file + assert tokenize.locate_tokens(input, filename) == tokens