diff --git a/src/tokenize.py b/src/tokenize.py index e705b09..3891efa 100644 --- a/src/tokenize.py +++ b/src/tokenize.py @@ -1,6 +1,9 @@ # SPDX-License-Identifier: LGPL-2.1-only # Copyright 2022 Jookia +import enum + + # Checks whether a symbol is whitespace def is_whitespace(symbol): return symbol == " " or symbol == "\t" or symbol == "\n" @@ -28,20 +31,31 @@ ) +# The type of a token +class TokenType(enum.Enum): + UNKNOWN = enum.auto() # pragma: no mutate + + # Represents a tokenizer token class Token: - def __init__(self, value, location): + def __init__(self, value, location, type): self.value = value self.location = location + self.type = type def __repr__(self): - return "Token(value %s, location %s)" % ( # pragma: no mutate + return "Token(value %s, location %s, type %s)" % ( # pragma: no mutate repr(self.value), repr(self.location), + str(self.type), ) def __eq__(self, other): - return self.value == other.value and self.location == other.location + return ( + self.value == other.value + and self.location == other.location + and self.type == other.type + ) # Splits text in to a list of characters and whitespace @@ -52,21 +66,22 @@ current = input[0] curr_whitespace = is_whitespace(input[0]) location = TokenLocation(1, 1, "") + type = TokenType.UNKNOWN for c in input[1:]: c_whitespace = is_whitespace(c) if c_whitespace != curr_whitespace: # Flush current buffer and switch modes - tokens.append(Token(current, location)) + tokens.append(Token(current, location, type)) current = c curr_whitespace = c_whitespace elif curr_whitespace: # Whitespace mode appends each character - tokens.append(Token(current, location)) + tokens.append(Token(current, location, type)) current = c else: # Token mode builds the current buffer current += c - tokens.append(Token(current, location)) + tokens.append(Token(current, location, type)) return tokens @@ -77,7 +92,7 @@ column = 1 for t in tokens: location = TokenLocation(line, column, filename) - new = Token(t.value, location) + new = Token(t.value, location, t.type) new_tokens.append(new) if t.value == "\n": line = line + 1 diff --git a/tests/test_tokenize.py b/tests/test_tokenize.py index 1c0b85d..4543350 100644 --- a/tests/test_tokenize.py +++ b/tests/test_tokenize.py @@ -53,21 +53,28 @@ def draw_token(draw): value = draw(text()) location = draw(draw_token_location()) - return tokenize.Token(value, location) + type = tokenize.TokenType.UNKNOWN + return tokenize.Token(value, location, type) # Test token getters @given(text(), draw_token_location()) def test_tokenize_token_getters(value, location): - test = tokenize.Token(value, location) + type = tokenize.TokenType.UNKNOWN + test = tokenize.Token(value, location, type) assert test.value == value assert test.location == location + assert test.type == type # Test token equals @given(draw_token(), draw_token()) def test_tokenize_token_equality(token1, token2): - equals = token1.value == token2.value and token1.location == token2.location + equals = ( + token1.value == token2.value + and token1.location == token2.location + and token1.type == token2.type + ) assert (token1 == token2) == equals @@ -77,7 +84,8 @@ chars = characters(blacklist_characters=whitespace) value = draw(text(alphabet=chars, min_size=1)) location = draw(draw_token_location()) - return tokenize.Token(value, location) + type = tokenize.TokenType.UNKNOWN + return tokenize.Token(value, location, type) # Draws a tokenizer whitespace token @@ -85,14 +93,15 @@ def draw_token_whitespace(draw): value = draw(sampled_from(whitespace)) location = draw(draw_token_location()) - return tokenize.Token(value, location) + type = tokenize.TokenType.UNKNOWN + return tokenize.Token(value, location, type) # Draws a token with a set location @composite def draw_token_with_location(draw, strategy, location): token = draw(strategy()) - return tokenize.Token(token.value, location) + return tokenize.Token(token.value, location, token.type) # Generates an alternating sequence of tokens @@ -134,7 +143,7 @@ column = 1 for t in tokens: location = tokenize.TokenLocation(line, column, filename) - new = tokenize.Token(t.value, location) + new = tokenize.Token(t.value, location, t.type) new_tokens.append(new) if t.value == "\n": line = line + 1 @@ -151,6 +160,6 @@ filename = "" location = tokenize.TokenLocation(1, 1, "") for t in tokens: - input.append(tokenize.Token(t.value, location)) + input.append(tokenize.Token(t.value, location, t.type)) filename = t.location.file assert tokenize.locate_tokens(input, filename) == tokens