diff --git a/src/parse.py b/src/parse.py index 79fe65b..bb3d177 100644 --- a/src/parse.py +++ b/src/parse.py @@ -3,7 +3,7 @@ import enum from src.ast_types import Bool, Text -from src.syntax import SyntaxStream +from src.token import TokenStream # Tasks that happen during parsing @@ -17,17 +17,17 @@ # Context used for parse error exception class ParseContext: - def __init__(self, task, syntax, parent): + def __init__(self, task, token, parent): self.task = task - self.syntax = syntax + self.token = token self.parent = parent def __repr__(self): return ( - "ParseContext(task %s, syntax %s, parent %s)" # pragma: no mutate + "ParseContext(task %s, token %s, parent %s)" # pragma: no mutate % ( # pragma: no mutate self.task, - self.syntax, + self.token, self.parent, ) ) @@ -35,7 +35,7 @@ def __eq__(self, other): return ( self.task == other.task - and self.syntax == other.syntax + and self.token == other.token and self.parent == other.parent ) @@ -53,18 +53,18 @@ # Exception thrown when a parse error is encountered class ParseErrorException(BaseException): - def __init__(self, error, syntax, expected, context): + def __init__(self, error, token, expected, context): self.error = error - self.syntax = syntax + self.token = token self.expected = expected self.context = context def __repr__(self): return ( - "ParseErrorException(error %s, syntax %s, expected %s, context %s)" # pragma: no mutate + "ParseErrorException(error %s, token %s, expected %s, context %s)" # pragma: no mutate % ( # pragma: no mutate self.error, - self.syntax, + self.token, self.expected, self.context, ) @@ -73,7 +73,7 @@ def __eq__(self, other): return ( self.error == other.error - and self.syntax == other.syntax + and self.token == other.token and self.expected == other.expected and self.context == other.context ) @@ -81,12 +81,12 @@ # Reads a token, possibly of a certain value def read_token(stream, value, context): - s = stream.pop() - if s is None: + t = stream.pop() + if t is None: raise ParseErrorException(ParseError.NO_TOKEN, None, None, context) - elif value is not None and s.value != value: - raise ParseErrorException(ParseError.WRONG_TOKEN, s, value, context) - return s + elif value is not None and t.value != value: + raise ParseErrorException(ParseError.WRONG_TOKEN, t, value, context) + return t # The note skipper in a wrapper class for easy testing @@ -96,12 +96,12 @@ context = ParseContext(ParseTask.PARSE_NOTE, stream.peek(), parent_context) read_token(stream, "StartNote", context) while True: - s = read_token(stream, None, context) + t = read_token(stream, None, context) # Don't allow StartNote in notes - if s.value in ["StartNote"]: - raise ParseErrorException(ParseError.FOUND_STARTNOTE, s, None, context) + if t.value in ["StartNote"]: + raise ParseErrorException(ParseError.FOUND_STARTNOTE, t, None, context) # EndNote found, end things - elif s.value == "EndNote": + elif t.value == "EndNote": break return None @@ -128,39 +128,39 @@ # The recursive descent parser in a wrapper class for easy testing class Parser: - # Parses a text syntax node + # Parses a text node def parse_text(self, stream, parent_context): context = ParseContext(ParseTask.PARSE_TEXT, stream.peek(), parent_context) buffer = "" - s = read_token(stream, "StartText", context) + t = read_token(stream, "StartText", context) # Parse following tokens while True: - s = read_token(stream, None, context) + t = read_token(stream, None, context) # Don't allow StartText in text - if s.value in ["StartText"]: - raise ParseErrorException(ParseError.FOUND_STARTTEXT, s, None, context) + if t.value in ["StartText"]: + raise ParseErrorException(ParseError.FOUND_STARTTEXT, t, None, context) # EndText found, end things - elif s.value == "EndText": + elif t.value == "EndText": break else: - buffer += s.value + " " + buffer += t.value + " " value = buffer[:-1] # Drop trailing space return Text(value) - # Parses a boolean syntax node + # Parses a boolean node def parse_bool(self, stream, parent_context): context = ParseContext(ParseTask.PARSE_BOOL, stream.peek(), parent_context) - s = read_token(stream, None, context) - if s.value == "True": + t = read_token(stream, None, context) + if t.value == "True": return Bool(True) - elif s.value == "False": + elif t.value == "False": return Bool(False) else: - raise ParseErrorException(ParseError.NOT_BOOL, s, None, context) + raise ParseErrorException(ParseError.NOT_BOOL, t, None, context) # Parses tokens def parse(tokens, context): - stream = SyntaxStream(tokens) + stream = TokenStream(tokens) cleared = NoteSkipper().clear_notes(stream, context) return cleared diff --git a/src/syntax.py b/src/syntax.py deleted file mode 100644 index 4a17f8b..0000000 --- a/src/syntax.py +++ /dev/null @@ -1,61 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-only -# Copyright 2022 Jookia - - -# Represents a syntax node -class Syntax: - def __init__(self, value, location): - self.value = value - self.location = location - - def __repr__(self): - return "Syntax(value %s, location %s)" % ( # pragma: no mutate - repr(self.value), - repr(self.location), - ) - - def __eq__(self, other): - return self.value == other.value and self.location == other.location - - -# Location of a syntax node -class SyntaxLocation: - def __init__(self, line, offset, file): - self.line = line - self.offset = offset - self.file = file - - def __repr__(self): - return "SyntaxLocation(line %i, offset %i, file '%s')" % ( # pragma: no mutate - self.line, - self.offset, - self.file, - ) - - def __eq__(self, other): - return ( - self.line == other.line - and self.offset == other.offset - and self.file == other.file - ) - - -# Represents a stream of consumable syntax nodes -class SyntaxStream: - def __init__(self, nodes): - self.nodes = nodes - - def __repr__(self): - return "SyntaxStream(%s)" % (self.nodes) # pragma: no mutate - - def pop(self): - if self.nodes: - return self.nodes.pop(0) - else: - return None - - def peek(self): - if self.nodes: - return self.nodes[0] - else: - return None diff --git a/src/token.py b/src/token.py new file mode 100644 index 0000000..ae0a348 --- /dev/null +++ b/src/token.py @@ -0,0 +1,61 @@ +# SPDX-License-Identifier: LGPL-2.1-only +# Copyright 2022 Jookia + + +# Represents a token +class Token: + def __init__(self, value, location): + self.value = value + self.location = location + + def __repr__(self): + return "Token(value %s, location %s)" % ( # pragma: no mutate + repr(self.value), + repr(self.location), + ) + + def __eq__(self, other): + return self.value == other.value and self.location == other.location + + +# Location of a token +class TokenLocation: + def __init__(self, line, offset, file): + self.line = line + self.offset = offset + self.file = file + + def __repr__(self): + return "TokenLocation(line %i, offset %i, file '%s')" % ( # pragma: no mutate + self.line, + self.offset, + self.file, + ) + + def __eq__(self, other): + return ( + self.line == other.line + and self.offset == other.offset + and self.file == other.file + ) + + +# Represents a stream of consumable tokens +class TokenStream: + def __init__(self, tokens): + self.tokens = tokens + + def __repr__(self): + return "TokenStream(%s)" % (self.tokens) # pragma: no mutate + + def pop(self): + if self.tokens: + return self.tokens.pop(0) + else: + return None + + def peek(self): + if self.tokens: + return self.tokens[0] + else: + return None diff --git a/src/tokenize.py b/src/tokenize.py index f6c0886..7a11f38 100644 --- a/src/tokenize.py +++ b/src/tokenize.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: LGPL-2.1-only # Copyright 2022 Jookia -from src.syntax import Syntax, SyntaxLocation +from src.token import Token, TokenLocation # Valid space code points spaces = [ @@ -34,7 +34,7 @@ tokens = [] prev = input[0] buffer = prev - location = SyntaxLocation(1, 1, "") + location = TokenLocation(1, 1, "") for curr in input[1:]: curr_space = is_whitespace(curr) prev_space = is_whitespace(prev) @@ -45,11 +45,11 @@ # Don't flush if we're in the middle of a CR LF sequence flush = switching or (curr_space and not crlf) if flush: - tokens.append(Syntax(buffer, location)) + tokens.append(Token(buffer, location)) buffer = "" buffer += curr prev = curr - tokens.append(Syntax(buffer, location)) + tokens.append(Token(buffer, location)) return tokens @@ -59,8 +59,8 @@ line = 1 offset = 1 for t in tokens: - location = SyntaxLocation(line, offset, filename) - new = Syntax(t.value, location) + location = TokenLocation(line, offset, filename) + new = Token(t.value, location) new_tokens.append(new) if t.value in newlines: line = line + 1 @@ -71,11 +71,11 @@ # Removes whitespace tokens -def strip_whitespace(syntax): +def strip_whitespace(tokens): output = [] - for s in syntax: - if not is_whitespace(s.value): - output.append(s) + for t in tokens: + if not is_whitespace(t.value): + output.append(t) return output diff --git a/tests/parse/templates.py b/tests/parse/templates.py index 7ae2f59..b1db977 100644 --- a/tests/parse/templates.py +++ b/tests/parse/templates.py @@ -4,9 +4,10 @@ from hypothesis import given from hypothesis.strategies import composite, integers -from src.parse import ParseErrorException, SyntaxStream +from src.parse import ParseErrorException +from src.token import TokenStream from tests.parse.test_parse import draw_parse_context -from tests.test_syntax import draw_syntax_random +from tests.test_token import draw_token_random # Inserts an element randomly between the first and last token of a list @@ -22,13 +23,13 @@ # - The decorated function is unused # - Only the supplied tokens are parsed # - The supplied tokens parse to the expected value -# - The Syntax's value is the expected value -# - The Syntax's location is the first token's location +# - The Token's value is the expected value +# - The Token's location is the first token's location def template_parse_valid(parser, draw): - @given(draw_syntax_random(), draw) + @given(draw_token_random(), draw) def do(canary, test_data): (tokens, expected) = test_data - stream = SyntaxStream(tokens + [canary]) + stream = TokenStream(tokens + [canary]) parsed = parser(stream, None) if expected is None: assert parsed is None @@ -59,7 +60,7 @@ # test_data is the output of wrapper def do(test_data): (tokens, error, context) = test_data - stream = SyntaxStream(tokens) + stream = TokenStream(tokens) try: parsed = parser(stream, context) raise AssertionError("Parsed invalid data: %s" % (parsed)) diff --git a/tests/parse/test_bool.py b/tests/parse/test_bool.py index aa60817..49ba67f 100644 --- a/tests/parse/test_bool.py +++ b/tests/parse/test_bool.py @@ -7,12 +7,12 @@ from src.ast_types import Bool from src.parse import ParseContext, ParseError, ParseErrorException, ParseTask, Parser from tests.parse.templates import template_parse_valid, template_parse_invalid -from tests.test_syntax import draw_token_bool, draw_syntax_random +from tests.test_token import draw_token_bool, draw_token_random # Draws tokens to make a valid boolean @composite -def draw_syntax_bool_valid(draw): +def draw_token_bool_valid(draw): token = draw(draw_token_bool()) value = token.value == "True" return ([token], Bool(value)) @@ -20,8 +20,8 @@ # Draws tokens to not make a valid boolean @composite -def draw_syntax_not_bool(draw): - token = draw(draw_syntax_random()) +def draw_token_not_bool(draw): + token = draw(draw_token_random()) assume(token.value not in ["True", "False"]) return token @@ -31,7 +31,7 @@ # - The resulting boolean is True if the first token is True # - The resulting boolean is False if the first token is False # template_parse_valid provides general parsing properties -@template_parse_valid(Parser().parse_bool, draw_syntax_bool_valid()) +@template_parse_valid(Parser().parse_bool, draw_token_bool_valid()) def test_parse_bool_valid(): pass @@ -41,7 +41,7 @@ # - Error if the token is not True or False @template_parse_invalid(Parser().parse_bool) def test_parse_bool_invalid_incorrect(draw, parent_context): - token = draw(draw_syntax_not_bool()) + token = draw(draw_token_not_bool()) context = ParseContext(ParseTask.PARSE_BOOL, token, parent_context) error = ParseErrorException(ParseError.NOT_BOOL, token, None, context) return ([token], error) diff --git a/tests/parse/test_clear_notes.py b/tests/parse/test_clear_notes.py index 8f088eb..a1eb13e 100644 --- a/tests/parse/test_clear_notes.py +++ b/tests/parse/test_clear_notes.py @@ -11,9 +11,9 @@ ParseErrorException, ParseTask, ) -from src.syntax import SyntaxStream +from src.token import TokenStream from tests.parse.templates import template_parse_invalid -from tests.test_syntax import draw_token_by_value, draw_syntax_random +from tests.test_token import draw_token_by_value, draw_token_random # Dummy parse_note implementation for testing note clearing @@ -46,17 +46,17 @@ return skipper -# Draws a random syntax suitable for note clearing testing +# Draws a random token suitable for note clearing testing @composite def draw_clear_notes_value(draw): - token = draw(draw_syntax_random()) + token = draw(draw_token_random()) assume(token.value != "EndNote") return token -# Draws syntax to make a valid soup to clear notes +# Draws token to make a valid soup to clear notes @composite -def draw_syntax_clear_notes_valid(draw): +def draw_token_clear_notes_valid(draw): tokens = draw(lists(draw_clear_notes_value())) output = [] for token in tokens: @@ -70,11 +70,11 @@ # Tests clear_notes works correctly # We expect the following behaviour: # - When StartNote is encountered skip_note is called to skip the note -# - Other syntax is passed through -@given(draw_syntax_clear_notes_valid()) +# - Other token is passed through +@given(draw_token_clear_notes_valid()) def test_parse_clear_notes_valid(test_data): (tokens, result) = test_data - stream = SyntaxStream(tokens) + stream = TokenStream(tokens) cleared = dummy_skipper_valid().clear_notes(stream, None) assert cleared == result diff --git a/tests/parse/test_note.py b/tests/parse/test_note.py index 886618f..729d0a6 100644 --- a/tests/parse/test_note.py +++ b/tests/parse/test_note.py @@ -19,31 +19,31 @@ template_parse_invalid, template_parse_valid, ) -from tests.test_syntax import ( +from tests.test_token import ( draw_token_by_value, - draw_syntax_random, + draw_token_random, ) # Draws a random token suitable for note building @composite def draw_note_value_token(draw): - token = draw(draw_syntax_random()) + token = draw(draw_token_random()) assume(token.value not in ["StartNote", "EndNote"]) return token -# Draws a random syntax that isn't a StartNote token +# Draws a random token that isn't a StartNote token @composite -def draw_syntax_not_startnote(draw): - token = draw(draw_syntax_random()) +def draw_token_not_startnote(draw): + token = draw(draw_token_random()) assume(token.value != "StartNote") return token # Draws tokens to make a valid note @composite -def draw_syntax_note_valid(draw): +def draw_token_note_valid(draw): tokens = draw(lists(draw_note_value_token())) start = draw(draw_token_by_value("StartNote")) end = draw(draw_token_by_value("EndNote")) @@ -55,7 +55,7 @@ # We expect the following behaviour: # - No value is returned # template_parse_valid provides general parsing properties -@template_parse_valid(NoteSkipper().skip_note, draw_syntax_note_valid()) +@template_parse_valid(NoteSkipper().skip_note, draw_token_note_valid()) def test_parse_note_valid(): pass @@ -65,8 +65,8 @@ # - Error if StartNote's token value is not "StartNote" @template_parse_invalid(NoteSkipper().skip_note) def test_parse_note_invalid_nostartnote(draw, parent_context): - (tokens, _) = draw(draw_syntax_note_valid()) - token = draw(draw_syntax_not_startnote()) + (tokens, _) = draw(draw_token_note_valid()) + token = draw(draw_token_not_startnote()) new_tokens = [token] + tokens[1:0] context = ParseContext(ParseTask.PARSE_NOTE, new_tokens[0], parent_context) error = ParseErrorException(ParseError.WRONG_TOKEN, token, "StartNote", context) @@ -88,7 +88,7 @@ # - Error if a StartNote token is in the note content @template_parse_invalid(NoteSkipper().skip_note) def test_parse_note_invalid_extrastartnote(draw, parent_context): - (tokens, _) = draw(draw_syntax_note_valid()) + (tokens, _) = draw(draw_token_note_valid()) start = draw(draw_token_by_value("StartNote")) new_tokens = insert_random_within(draw, tokens, start) context = ParseContext(ParseTask.PARSE_NOTE, new_tokens[0], parent_context) @@ -101,7 +101,7 @@ # - Error if there is no EndNote token at all @template_parse_invalid(NoteSkipper().skip_note) def test_parse_note_invalid_noendnote(draw, parent_context): - (tokens, _) = draw(draw_syntax_note_valid()) + (tokens, _) = draw(draw_token_note_valid()) context = ParseContext(ParseTask.PARSE_NOTE, tokens[0], parent_context) error = ParseErrorException(ParseError.NO_TOKEN, None, None, context) return (tokens[0:-1], error) diff --git a/tests/parse/test_parse.py b/tests/parse/test_parse.py index cdee204..3711dc7 100644 --- a/tests/parse/test_parse.py +++ b/tests/parse/test_parse.py @@ -17,9 +17,9 @@ ParseContext, parse, ) -from src.syntax import SyntaxStream +from src.token import TokenStream from tests.templates import template_test_structure -from tests.test_syntax import draw_syntax_random +from tests.test_token import draw_token_random # Draws a random parse task @@ -32,8 +32,8 @@ @composite def draw_parse_context(draw): task = draw(draw_parse_task()) - syntax = draw(draw_syntax_random()) - return ParseContext(task, syntax, None) + token = draw(draw_token_random()) + return ParseContext(task, token, None) # Test parse context structure @@ -41,7 +41,7 @@ ParseContext, draw_parse_context(), task=draw_parse_task(), - syntax=draw_syntax_random(), + token=draw_token_random(), parent=text(), ) def test_parse_context_structure(): @@ -58,18 +58,18 @@ @composite def draw_parse_error_exception(draw): error = draw(draw_parse_error()) - syntax = draw(draw_syntax_random()) + token = draw(draw_token_random()) expected = draw(text()) context = draw(draw_parse_context()) - return ParseErrorException(error, syntax, expected, context) + return ParseErrorException(error, token, expected, context) # Test parse error exception structure @template_test_structure( ParseErrorException, draw_parse_error_exception(), - error=draw_syntax_random(), - syntax=draw_syntax_random(), + error=draw_token_random(), + token=draw_token_random(), expected=text(), context=draw_parse_context(), ) @@ -80,12 +80,12 @@ # Tests the parser wrapper works correctly # We expect the following behaviour: # - Notes to be removed from the tokens -@given(lists(draw_syntax_random()), draw_parse_context()) +@given(lists(draw_token_random()), draw_parse_context()) def test_parse_fuzz(tokens, context): result = None parsed = None try: - stream = SyntaxStream(tokens.copy()) + stream = TokenStream(tokens.copy()) result = NoteSkipper().clear_notes(stream, context) except ParseErrorException as e: result = e diff --git a/tests/parse/test_text.py b/tests/parse/test_text.py index 3d19890..0ce4c36 100644 --- a/tests/parse/test_text.py +++ b/tests/parse/test_text.py @@ -20,31 +20,31 @@ template_parse_invalid, template_parse_valid, ) -from tests.test_syntax import ( +from tests.test_token import ( draw_token_by_value, - draw_syntax_random, + draw_token_random, ) # Draws a random token suitable for text building @composite def draw_text_value_token(draw): - token = draw(draw_syntax_random()) + token = draw(draw_token_random()) assume(token.value not in ["StartText", "EndText"]) return token -# Draws a random syntax that isn't StartText token +# Draws a random token that isn't StartText token @composite -def draw_syntax_not_starttext(draw): - token = draw(draw_syntax_random()) +def draw_token_not_starttext(draw): + token = draw(draw_token_random()) assume(token.value != "StartText") return token # Draws tokens to make a valid text string and its value @composite -def draw_syntax_text_valid(draw): +def draw_token_text_valid(draw): tokens = draw(lists(draw_text_value_token())) buffer = "" for token in tokens: @@ -60,9 +60,9 @@ # We expect the following behaviour: # - The resulting text is the value of tokens between StartText and EndText # - The value of the tokens is joined by U+0020 SPACE code points -# - The Syntax's value is the resulting text +# - The Token's value is the resulting text # template_parse_valid provides general parsing properties -@template_parse_valid(Parser().parse_text, draw_syntax_text_valid()) +@template_parse_valid(Parser().parse_text, draw_token_text_valid()) def test_parse_text_valid(): pass @@ -72,8 +72,8 @@ # - Error if StartText's token value is not "StartText" @template_parse_invalid(Parser().parse_text) def test_parse_text_invalid_nostarttext(draw, parent_context): - (tokens, _) = draw(draw_syntax_text_valid()) - token = draw(draw_syntax_not_starttext()) + (tokens, _) = draw(draw_token_text_valid()) + token = draw(draw_token_not_starttext()) new_tokens = [token] + tokens[1:0] context = ParseContext(ParseTask.PARSE_TEXT, new_tokens[0], parent_context) error = ParseErrorException(ParseError.WRONG_TOKEN, token, "StartText", context) @@ -95,7 +95,7 @@ # - Error if a StartText token is in the text content @template_parse_invalid(Parser().parse_text) def test_parse_text_invalid_extrastarttext(draw, parent_context): - (tokens, _) = draw(draw_syntax_text_valid()) + (tokens, _) = draw(draw_token_text_valid()) start = draw(draw_token_by_value("StartText")) new_tokens = insert_random_within(draw, tokens, start) context = ParseContext(ParseTask.PARSE_TEXT, new_tokens[0], parent_context) @@ -108,7 +108,7 @@ # - Error if there is no EndText token at all @template_parse_invalid(Parser().parse_text) def test_parse_text_invalid_noendtext(draw, parent_context): - (tokens, _) = draw(draw_syntax_text_valid()) + (tokens, _) = draw(draw_token_text_valid()) context = ParseContext(ParseTask.PARSE_TEXT, tokens[0], parent_context) error = ParseErrorException(ParseError.NO_TOKEN, None, None, context) return (tokens[0:-1], error) diff --git a/tests/test_syntax.py b/tests/test_syntax.py deleted file mode 100644 index 2b02b33..0000000 --- a/tests/test_syntax.py +++ /dev/null @@ -1,220 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-only -# Copyright 2022 Jookia - -from hypothesis import given, assume -from hypothesis.strategies import ( - booleans, - characters, - composite, - integers, - lists, - one_of, - sampled_from, - text, -) - -from src.syntax import Syntax, SyntaxLocation, SyntaxStream -from tests.templates import template_test_structure - -# Keywords recognized by the language -keywords = [ - "Done", - "Set", - "To", - "EndSet", - "If", - "Then", - "Else", - "EndIf", - "StartNote", - "EndNote", - "StartText", - "EndText", -] - -# Literals recognized by the language -literals = [ - "True", - "False", -] - - -# Draws a random syntax location -@composite -def draw_syntax_location(draw): - line = draw(integers()) - offset = draw(integers()) - filename = draw(text()) - return SyntaxLocation(line, offset, filename) - - -# Test syntax location structure -@template_test_structure( - SyntaxLocation, - draw_syntax_location(), - line=integers(), - offset=integers(), - file=text(), -) -def test_syntax_location_structure(): - pass - - -# Draws a token with a specific value but random location -@composite -def draw_token_by_value(draw, value): - location = draw(draw_syntax_location()) - return Syntax(value, location) - - -# Values considered spaces -valid_spaces = [ - "\t", # U+0009 HORIZONTAL TAB - " ", # U+0020 SPACE -] - -# Single values reserved for new line use -single_newlines = [ - "\n", # U+000A LINE FEED - "\v", # U+000B VERTICAL TAB - "\f", # U+000C FORM FEED - "\r", # U+000D CARRIAGE RETURN - "\u0085", # U+0085 NEXT LINE - "\u2028", # U+2028 LINE SEPARATOR - "\u2029", # U+2029 PARAGRAPH SEPARATOR -] - -# Multi values reserved for new line use -multi_newlines = [ - "\r\n", # U+000A U+000D CARRIAGE RETURN then LINE FEED -] - -# All values reserved for new line use -valid_newlines = single_newlines + multi_newlines - - -# Draws an unknown token -@composite -def draw_token_unknown(draw): - reserved = valid_spaces + single_newlines - location = draw(draw_syntax_location()) - chars = characters(blacklist_characters=reserved) - value = draw(text(alphabet=chars, min_size=1)) - for v in multi_newlines: - assume(v not in value) - assume(value not in literals) - assume(value not in keywords) - return Syntax(value, location) - - -# Draws a space token -@composite -def draw_token_space(draw): - location = draw(draw_syntax_location()) - value = draw(sampled_from(valid_spaces)) - return Syntax(value, location) - - -# Draws a new line token -@composite -def draw_token_newline(draw): - location = draw(draw_syntax_location()) - value = draw(sampled_from(valid_newlines)) - return Syntax(value, location) - - -# Draws a bool token -@composite -def draw_token_bool(draw): - location = draw(draw_syntax_location()) - if draw(booleans()): - value = "True" - else: - value = "False" - return Syntax(value, location) - - -# Draws a keyword token -@composite -def draw_token_keyword(draw): - location = draw(draw_syntax_location()) - value = draw(sampled_from(keywords)) - return Syntax(value, location) - - -# Draws a random syntax token -@composite -def draw_syntax_random(draw): - strategies = [ - draw_token_unknown(), - draw_token_space(), - draw_token_newline(), - draw_token_bool(), - draw_token_keyword(), - ] - token = draw(one_of(strategies)) - return token - - -# Test syntax structure -@template_test_structure( - Syntax, - draw_syntax_random(), - value=text(), - location=draw_syntax_location(), -) -def test_syntax_syntax_structure(): - pass - - -# Tests that a syntax stream pops items correctly -# We expect the following behaviour: -# - All items are popped in order -# - None is returned at the end of the stream -@given(lists(draw_syntax_random())) -def test_syntax_syntax_stream_pop(nodes): - stream = SyntaxStream(nodes.copy()) - read = [] - node = stream.pop() - while node is not None: - read.append(node) - node = stream.pop() - assert read == nodes - assert stream.pop() is None - - -# Tests that a syntax stream peeks items correctly -# We expect the following behaviour: -# - Peeking does not pop any values -# - None is returned at the end of the stream -@given(lists(draw_syntax_random()), integers(min_value=0, max_value=100)) -def test_syntax_syntax_stream_peek(nodes, times): - stream = SyntaxStream(nodes.copy()) - node_count = len(stream.nodes) - if node_count == 0: - real_times = times - expected = None - else: - real_times = times % node_count - expected = nodes[0] - for _ in range(0, real_times): - node = stream.peek() - assert node == expected - - -# Tests that peeking and popping don't influence each other -# We expect the following behaviour: -# - Peeking does not influence the next pop call -# - Popping does not influence the next peep call -@given(lists(draw_syntax_random())) -def test_syntax_syntax_stream_mixed(nodes): - stream = SyntaxStream(nodes.copy()) - read = [] - node = True - while node is not None: - peeked = stream.peek() - node = stream.pop() - read.append(node) - assert peeked == node - assert read[:-1] == nodes # Skip None at end - assert stream.pop() is None diff --git a/tests/test_token.py b/tests/test_token.py new file mode 100644 index 0000000..f66695c --- /dev/null +++ b/tests/test_token.py @@ -0,0 +1,220 @@ +# SPDX-License-Identifier: LGPL-2.1-only +# Copyright 2022 Jookia + +from hypothesis import given, assume +from hypothesis.strategies import ( + booleans, + characters, + composite, + integers, + lists, + one_of, + sampled_from, + text, +) + +from src.token import Token, TokenLocation, TokenStream +from tests.templates import template_test_structure + +# Keywords recognized by the language +keywords = [ + "Done", + "Set", + "To", + "EndSet", + "If", + "Then", + "Else", + "EndIf", + "StartNote", + "EndNote", + "StartText", + "EndText", +] + +# Literals recognized by the language +literals = [ + "True", + "False", +] + + +# Draws a random token location +@composite +def draw_token_location(draw): + line = draw(integers()) + offset = draw(integers()) + filename = draw(text()) + return TokenLocation(line, offset, filename) + + +# Test token location structure +@template_test_structure( + TokenLocation, + draw_token_location(), + line=integers(), + offset=integers(), + file=text(), +) +def test_token_location_structure(): + pass + + +# Draws a token with a specific value but random location +@composite +def draw_token_by_value(draw, value): + location = draw(draw_token_location()) + return Token(value, location) + + +# Values considered spaces +valid_spaces = [ + "\t", # U+0009 HORIZONTAL TAB + " ", # U+0020 SPACE +] + +# Single values reserved for new line use +single_newlines = [ + "\n", # U+000A LINE FEED + "\v", # U+000B VERTICAL TAB + "\f", # U+000C FORM FEED + "\r", # U+000D CARRIAGE RETURN + "\u0085", # U+0085 NEXT LINE + "\u2028", # U+2028 LINE SEPARATOR + "\u2029", # U+2029 PARAGRAPH SEPARATOR +] + +# Multi values reserved for new line use +multi_newlines = [ + "\r\n", # U+000A U+000D CARRIAGE RETURN then LINE FEED +] + +# All values reserved for new line use +valid_newlines = single_newlines + multi_newlines + + +# Draws an unknown token +@composite +def draw_token_unknown(draw): + reserved = valid_spaces + single_newlines + location = draw(draw_token_location()) + chars = characters(blacklist_characters=reserved) + value = draw(text(alphabet=chars, min_size=1)) + for v in multi_newlines: + assume(v not in value) + assume(value not in literals) + assume(value not in keywords) + return Token(value, location) + + +# Draws a space token +@composite +def draw_token_space(draw): + location = draw(draw_token_location()) + value = draw(sampled_from(valid_spaces)) + return Token(value, location) + + +# Draws a new line token +@composite +def draw_token_newline(draw): + location = draw(draw_token_location()) + value = draw(sampled_from(valid_newlines)) + return Token(value, location) + + +# Draws a bool token +@composite +def draw_token_bool(draw): + location = draw(draw_token_location()) + if draw(booleans()): + value = "True" + else: + value = "False" + return Token(value, location) + + +# Draws a keyword token +@composite +def draw_token_keyword(draw): + location = draw(draw_token_location()) + value = draw(sampled_from(keywords)) + return Token(value, location) + + +# Draws a random token +@composite +def draw_token_random(draw): + strategies = [ + draw_token_unknown(), + draw_token_space(), + draw_token_newline(), + draw_token_bool(), + draw_token_keyword(), + ] + token = draw(one_of(strategies)) + return token + + +# Test token structure +@template_test_structure( + Token, + draw_token_random(), + value=text(), + location=draw_token_location(), +) +def test_token_token_structure(): + pass + + +# Tests that a token stream pops items correctly +# We expect the following behaviour: +# - All items are popped in order +# - None is returned at the end of the stream +@given(lists(draw_token_random())) +def test_token_token_stream_pop(tokens): + stream = TokenStream(tokens.copy()) + read = [] + token = stream.pop() + while token is not None: + read.append(token) + token = stream.pop() + assert read == tokens + assert stream.pop() is None + + +# Tests that a token stream peeks items correctly +# We expect the following behaviour: +# - Peeking does not pop any values +# - None is returned at the end of the stream +@given(lists(draw_token_random()), integers(min_value=0, max_value=100)) +def test_token_token_stream_peek(tokens, times): + stream = TokenStream(tokens.copy()) + token_count = len(stream.tokens) + if token_count == 0: + real_times = times + expected = None + else: + real_times = times % token_count + expected = tokens[0] + for _ in range(0, real_times): + token = stream.peek() + assert token == expected + + +# Tests that peeking and popping don't influence each other +# We expect the following behaviour: +# - Peeking does not influence the next pop call +# - Popping does not influence the next peep call +@given(lists(draw_token_random())) +def test_token_token_stream_mixed(tokens): + stream = TokenStream(tokens.copy()) + read = [] + token = True + while token is not None: + peeked = stream.peek() + token = stream.pop() + read.append(token) + assert peeked == token + assert read[:-1] == tokens # Skip None at end + assert stream.pop() is None diff --git a/tests/test_tokenize.py b/tests/test_tokenize.py index 227e2d9..528af7e 100644 --- a/tests/test_tokenize.py +++ b/tests/test_tokenize.py @@ -12,9 +12,9 @@ ) from src import tokenize -from src.syntax import Syntax, SyntaxLocation -from tests.test_syntax import ( - draw_syntax_random, +from src.token import Token, TokenLocation +from tests.test_token import ( + draw_token_random, draw_token_newline, draw_token_space, draw_token_unknown, @@ -27,8 +27,8 @@ @composite def draw_token_splitted(draw, strategy): token = draw(strategy) - location = SyntaxLocation(1, 1, "") - return Syntax(token.value, location) + location = TokenLocation(1, 1, "") + return Token(token.value, location) # Merges \r and \n tokens to \r\n tokens @@ -41,7 +41,7 @@ if prev.value == "\r" and curr.value == "\n": # Previous token is \r, don't append it # Instead promote this \n token to \r\n - prev = Syntax("\r\n", prev.location) + prev = Token("\r\n", prev.location) else: # Append the previous token merged.append(prev) @@ -102,14 +102,14 @@ # Generates a list of tokens with correct locations @composite def draw_tokens_locations(draw): - tokens = draw(lists(draw_syntax_random())) + tokens = draw(lists(draw_token_random())) filename = draw(text()) located = [] line = 1 offset = 1 for t in tokens: - location = SyntaxLocation(line, offset, filename) - new = Syntax(t.value, location) + location = TokenLocation(line, offset, filename) + new = Token(t.value, location) located.append(new) if t.value in valid_newlines: line = line + 1 @@ -147,18 +147,18 @@ # Generates two list of tokens: One with whitespace and one without @composite def draw_tokens_whitespace(draw): - input = draw(lists(draw_syntax_random())) + input = draw(lists(draw_token_random())) stripped = [] - for s in input: - is_whitespace = s.value in valid_spaces or s.value in valid_newlines + for t in input: + is_whitespace = t.value in valid_spaces or t.value in valid_newlines if not is_whitespace: - stripped.append(s) + stripped.append(t) return (input, stripped) # Test that the tokenizer can strip whitespace correctly # We expect the following behaviour: -# - No syntax is modified +# - No tokens are modified # - Tokens with the following values are removed from the output: # U+0009 HORIZONTAL TAB # U+000A LINE FEED @@ -172,27 +172,27 @@ # U+2029 PARAGRAPH SEPARATOR @given(draw_tokens_whitespace()) def test_tokenize_strip_whitespace(test_data): - (input, syntax) = test_data - assert tokenize.strip_whitespace(input) == syntax + (input, tokens) = test_data + assert tokenize.strip_whitespace(input) == tokens # Draws a token and possibly add garbage # This is to ensure that tokens must completely match a value @composite -def draw_syntax_random_garbled(draw): - token = draw(draw_syntax_random()) +def draw_token_random_garbled(draw): + token = draw(draw_token_random()) value = token.value if draw(booleans()): value = draw(text(min_size=1)) + value if draw(booleans()): value = value + draw(text(min_size=1)) - return Syntax(value, token.location) + return Token(value, token.location) # Draw a random string made of token values @composite def draw_source_fuzz(draw): - tokens = draw(lists(draw_syntax_random())) + tokens = draw(lists(draw_token_random())) input = "" for t in tokens: input += t.value