diff --git a/src/parse.py b/src/parse.py index a626bda..1866b02 100644 --- a/src/parse.py +++ b/src/parse.py @@ -1,29 +1,14 @@ # SPDX-License-Identifier: LGPL-2.1-only # Copyright 2022 Jookia -from src import tokenize - - -# Represents a stream of consumable syntax nodes -class SyntaxStream: - def __init__(self, nodes): - self.nodes = nodes - - def __repr__(self): - return "SyntaxStream(%s)" % (self.nodes) # pragma: no mutate - - def pop(self): - if self.nodes: - return self.nodes.pop(0) - else: - return None +from src.syntax import Syntax, SyntaxType # Removes whitespace syntax tokens def strip_whitespace(syntax): output = [] for s in syntax: - if s.type != tokenize.SyntaxType.TOKEN or s.value not in ["\n", " "]: + if s.type != SyntaxType.TOKEN or s.value not in ["\n", " "]: output.append(s) return output @@ -33,7 +18,7 @@ s = stream.pop() if s is None: return None - elif s.type != tokenize.SyntaxType.TOKEN: + elif s.type != SyntaxType.TOKEN: return None elif value is not None and s.value != value: return None @@ -61,8 +46,8 @@ else: buffer += s.value value = buffer.strip("\n\t ") - type = tokenize.SyntaxType.TEXT - return tokenize.Syntax(value, location, type) + type = SyntaxType.TEXT + return Syntax(value, location, type) # Parses tokens diff --git a/src/syntax.py b/src/syntax.py new file mode 100644 index 0000000..6e03c6f --- /dev/null +++ b/src/syntax.py @@ -0,0 +1,69 @@ +# SPDX-License-Identifier: LGPL-2.1-only +# Copyright 2022 Jookia + +import enum + + +# The type of syntax +class SyntaxType(enum.Enum): + TOKEN = enum.auto() # pragma: no mutate + TEXT = enum.auto() # pragma: no mutate + + +# Represents a syntax node +class Syntax: + def __init__(self, value, location, type): + self.value = value + self.location = location + self.type = type + + def __repr__(self): + return "Syntax(value %s, location %s, type %s)" % ( # pragma: no mutate + repr(self.value), + repr(self.location), + str(self.type), + ) + + def __eq__(self, other): + return ( + self.type == other.type + and self.value == other.value + and self.location == other.location + ) + + +# Location of a syntax node +class SyntaxLocation: + def __init__(self, line, column, file): + self.line = line + self.column = column + self.file = file + + def __repr__(self): + return "SyntaxLocation(line %i, column %i, file '%s')" % ( # pragma: no mutate + self.line, + self.column, + self.file, + ) + + def __eq__(self, other): + return ( + self.line == other.line + and self.column == other.column + and self.file == other.file + ) + + +# Represents a stream of consumable syntax nodes +class SyntaxStream: + def __init__(self, nodes): + self.nodes = nodes + + def __repr__(self): + return "SyntaxStream(%s)" % (self.nodes) # pragma: no mutate + + def pop(self): + if self.nodes: + return self.nodes.pop(0) + else: + return None diff --git a/src/tokenize.py b/src/tokenize.py index 6e512da..2547784 100644 --- a/src/tokenize.py +++ b/src/tokenize.py @@ -1,57 +1,7 @@ # SPDX-License-Identifier: LGPL-2.1-only # Copyright 2022 Jookia -import enum - - -# The type of syntax -class SyntaxType(enum.Enum): - TOKEN = enum.auto() # pragma: no mutate - TEXT = enum.auto() # pragma: no mutate - - -# Represents a syntax node -class Syntax: - def __init__(self, value, location, type): - self.value = value - self.location = location - self.type = type - - def __repr__(self): - return "Syntax(value %s, location %s, type %s)" % ( # pragma: no mutate - repr(self.value), - repr(self.location), - str(self.type), - ) - - def __eq__(self, other): - return ( - self.type == other.type - and self.value == other.value - and self.location == other.location - ) - - -# Location of a syntax node -class SyntaxLocation: - def __init__(self, line, column, file): - self.line = line - self.column = column - self.file = file - - def __repr__(self): - return "SyntaxLocation(line %i, column %i, file '%s')" % ( # pragma: no mutate - self.line, - self.column, - self.file, - ) - - def __eq__(self, other): - return ( - self.line == other.line - and self.column == other.column - and self.file == other.file - ) +from src.syntax import Syntax, SyntaxLocation, SyntaxType # Checks whether a symbol is space diff --git a/tests/test_parse.py b/tests/test_parse.py index 3fd037e..d7d8b91 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -10,7 +10,8 @@ one_of, ) -from src import parse, tokenize +from src import parse +from src.syntax import Syntax, SyntaxStream, SyntaxType from tests import test_tokenize @@ -20,7 +21,7 @@ # - None is returned at the end of the stream @given(lists(test_tokenize.draw_syntax_random())) def test_parse_syntax_stream(nodes): - stream = parse.SyntaxStream(nodes.copy()) + stream = SyntaxStream(nodes.copy()) read = [] node = stream.pop() while node is not None: @@ -36,7 +37,7 @@ input = draw(lists(test_tokenize.draw_syntax_random())) syntax = [] for s in input: - if s.type != tokenize.SyntaxType.TOKEN or s.value not in ["\n", " "]: + if s.type != SyntaxType.TOKEN or s.value not in ["\n", " "]: syntax.append(s) return (input, syntax) @@ -62,8 +63,8 @@ @composite def draw_token_by_value(draw, value): location = draw(test_tokenize.draw_token_location()) - type = tokenize.SyntaxType.TOKEN - return tokenize.Syntax(value, location, type) + type = SyntaxType.TOKEN + return Syntax(value, location, type) # Draws tokens to make a valid text string and its value @@ -77,7 +78,7 @@ end = draw(draw_token_by_value("EndText")) all_tokens = [start] + tokens + [end] text_value = value.strip("\n\t ") - result = tokenize.Syntax(text_value, start.location, tokenize.SyntaxType.TEXT) + result = Syntax(text_value, start.location, SyntaxType.TEXT) return (all_tokens, result) @@ -92,7 +93,7 @@ @given(test_tokenize.draw_syntax_random(), draw_syntax_text_valid()) def test_parse_text_valid(canary, test_data): (tokens, result) = test_data - stream = parse.SyntaxStream(tokens + [canary]) + stream = SyntaxStream(tokens + [canary]) parsed = parse.parse_text(stream) assert parsed is not None assert parsed == result @@ -110,9 +111,7 @@ (tokens, _) = draw(draw_syntax_text_valid()) if draw(booleans()): token = draw(test_tokenize.draw_syntax_random()) - assume( - not (token.type == tokenize.SyntaxType.TOKEN and token.value == "StartText") - ) + assume(not (token.type == SyntaxType.TOKEN and token.value == "StartText")) new_tokens = [token] + tokens[1:0] return new_tokens else: @@ -126,7 +125,7 @@ def draw_syntax_text_invalid_invalidcontent(draw): (tokens, _) = draw(draw_syntax_text_valid()) token = draw(test_tokenize.draw_syntax_random()) - assume(token.type != tokenize.SyntaxType.TOKEN) + assume(token.type != SyntaxType.TOKEN) pos = draw(integers(min_value=1, max_value=(len(tokens) - 1))) new_tokens = tokens[0:pos] + [token] + tokens[pos:] return new_tokens @@ -169,7 +168,7 @@ @given(draw_syntax_text_invalid()) def test_parse_text_invalid(test_data): tokens = test_data - stream = parse.SyntaxStream(tokens) + stream = SyntaxStream(tokens) parsed = parse.parse_text(stream) assert parsed is None diff --git a/tests/test_tokenize.py b/tests/test_tokenize.py index 85e8f47..d150136 100644 --- a/tests/test_tokenize.py +++ b/tests/test_tokenize.py @@ -15,6 +15,7 @@ ) from src import tokenize +from src.syntax import Syntax, SyntaxLocation, SyntaxType # Keywords recognized by the language @@ -41,13 +42,13 @@ line = draw(integers()) column = draw(integers()) filename = draw(text()) - return tokenize.SyntaxLocation(line, column, filename) + return SyntaxLocation(line, column, filename) # Draws a random syntax type @composite def draw_syntax_type(draw): - return draw(sampled_from(list(tokenize.SyntaxType))) + return draw(sampled_from(list(SyntaxType))) # Draws a token syntax value @@ -55,8 +56,8 @@ def draw_syntax_token(draw): value = draw(draw_token_classified()) location = draw(draw_token_location()) - type = tokenize.SyntaxType.TOKEN - return tokenize.Syntax(value.value, location, type) + type = SyntaxType.TOKEN + return Syntax(value.value, location, type) # Draws a text syntax value @@ -64,8 +65,8 @@ def draw_syntax_text(draw): value = draw(text()) location = draw(draw_token_location()) - type = tokenize.SyntaxType.TEXT - return tokenize.Syntax(value, location, type) + type = SyntaxType.TEXT + return Syntax(value, location, type) # Draws a random syntax @@ -82,7 +83,7 @@ @given(text(), draw_token_location(), draw_syntax_type()) def test_tokenize_syntax_getters(value, location, type): # Use text as a somewhat random value - test = tokenize.Syntax(value, location, type) + test = Syntax(value, location, type) assert test.value == value assert test.location == location assert test.type == type @@ -104,7 +105,7 @@ def draw_token_random(draw): value = draw(text()) location = draw(draw_token_location()) - return tokenize.Syntax(value, location, tokenize.SyntaxType.TOKEN) + return Syntax(value, location, SyntaxType.TOKEN) # Draws an unknown token @@ -117,7 +118,7 @@ assume(value not in ["True", "False"]) assume(value not in keywords) assume(value[0:2] != "#!") - return tokenize.Syntax(value, token.location, tokenize.SyntaxType.TOKEN) + return Syntax(value, token.location, SyntaxType.TOKEN) # Draws a space token @@ -126,7 +127,7 @@ space = " \t" token = draw(draw_token_random()) value = draw(sampled_from(space)) - return tokenize.Syntax(value, token.location, tokenize.SyntaxType.TOKEN) + return Syntax(value, token.location, SyntaxType.TOKEN) # Draws a new line token @@ -134,7 +135,7 @@ def draw_token_newline(draw): token = draw(draw_token_random()) value = "\n" - return tokenize.Syntax(value, token.location, tokenize.SyntaxType.TOKEN) + return Syntax(value, token.location, SyntaxType.TOKEN) # Draws a bool token @@ -145,7 +146,7 @@ value = "True" else: value = "False" - return tokenize.Syntax(value, token.location, tokenize.SyntaxType.TOKEN) + return Syntax(value, token.location, SyntaxType.TOKEN) # Draws a keyword token @@ -153,7 +154,7 @@ def draw_token_keyword(draw): token = draw(draw_token_random()) value = draw(sampled_from(keywords)) - return tokenize.Syntax(value, token.location, tokenize.SyntaxType.TOKEN) + return Syntax(value, token.location, SyntaxType.TOKEN) # Draws a shebang token @@ -161,7 +162,7 @@ def draw_token_shebang(draw): token = draw(draw_token_random()) value = "#!" + draw(text()) - return tokenize.Syntax(value, token.location, tokenize.SyntaxType.TOKEN) + return Syntax(value, token.location, SyntaxType.TOKEN) # Draws a classified token @@ -182,7 +183,7 @@ # Test location getters @given(integers(), integers(), text()) def test_tokenize_location_getters(line, column, filename): - test = tokenize.SyntaxLocation(line, column, filename) + test = SyntaxLocation(line, column, filename) assert test.line == line assert test.column == column assert test.file == filename @@ -202,7 +203,7 @@ # Test token getters @given(text(), draw_token_location()) def test_tokenize_token_getters(value, location): - test = tokenize.Syntax(value, location, tokenize.SyntaxType.TOKEN) + test = Syntax(value, location, SyntaxType.TOKEN) assert test.value == value assert test.location == location @@ -218,8 +219,8 @@ @composite def draw_token_splitted(draw, strategy): token = draw(strategy) - location = tokenize.SyntaxLocation(1, 1, "") - return tokenize.Syntax(token.value, location, tokenize.SyntaxType.TOKEN) + location = SyntaxLocation(1, 1, "") + return Syntax(token.value, location, SyntaxType.TOKEN) # Generates an alternating sequence of unknown or whitespace tokens @@ -269,8 +270,8 @@ line = 1 column = 1 for t in tokens: - location = tokenize.SyntaxLocation(line, column, filename) - new = tokenize.Syntax(t.value, location, tokenize.SyntaxType.TOKEN) + location = SyntaxLocation(line, column, filename) + new = Syntax(t.value, location, SyntaxType.TOKEN) located.append(new) if t.value == "\n": line = line + 1 @@ -306,7 +307,7 @@ value = draw(text(min_size=1)) + value if draw(booleans()): value = value + draw(text(min_size=1)) - return tokenize.Syntax(value, token.location, tokenize.SyntaxType.TOKEN) + return Syntax(value, token.location, SyntaxType.TOKEN) # Draw a random string made of token values