diff --git a/env.sh b/env.sh index 1b4c93e..9c71994 100644 --- a/env.sh +++ b/env.sh @@ -33,7 +33,7 @@ dotest_ci() { pytest -q -n auto --hypothesis-profile=ci; } dotest_long() { pytest -q -n auto --hypothesis-profile=long; } docheck() { dotidy && dolint && dotest; } -_envsh_domut() { mutmut run --paths-to-mutate "src/token.py,src/tokenize.py,src/parse.py,src/i18n.py" $@; } +_envsh_domut() { mutmut run --paths-to-mutate "src/parse2/token.py,src/parse2/tokenize.py,src/parse2/parse.py,src/i18n.py" $@; } domut() { _envsh_domut; } domut_ci() { _envsh_domut --no-progress || (mutmut results; exit 1); } dobuild() { python build.py; } diff --git a/src/main.py b/src/main.py index e80c806..1550d31 100644 --- a/src/main.py +++ b/src/main.py @@ -6,8 +6,8 @@ from src import port from src import log -from src import tokenize -from src import parse +from src.parse2 import tokenize as tokenize2 +from src.parse2 import parse as parse2 from src import oldparse from src import interp @@ -15,11 +15,11 @@ def run_file(file, log_level=log.NORMAL): log.set_default_log_level(log_level) ast = None - if os.getenv("NEWLANG_NEW_PARSE"): + if os.getenv("NEWLANG_PARSE2"): try: code = open(file, encoding="utf-8").read() - tokens = tokenize.tokenize(code, file) - ast = parse.parse(tokens, None) + tokens = tokenize2.tokenize(code, file) + ast = parse2.parse(tokens, None) except UnicodeError: pass else: diff --git a/src/parse.py b/src/parse.py deleted file mode 100644 index 57617e9..0000000 --- a/src/parse.py +++ /dev/null @@ -1,400 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-only -# Copyright 2022 Jookia - -from src.i18n import Message -from src.ast_types import Bool, Conditional, Reference, Set, Statement, Text -from src.token import TokenStream - - -# Words that can't be used as references -# This should include keywords and literals -reserved_names = [ - "Done", - "Set", - "To", - "EndSet", - "If", - "Then", - "Else", - "EndIf", - "StartNote", - "EndNote", - "StartText", - "EndText", - "True", - "False", -] - - -# Tasks that happen during parsing -class ParseTask: - TEST_TASK = 1 # pragma: no mutate - PARSE_NOTE = 2 # pragma: no mutate - CLEAR_NOTES = 3 # pragma: no mutate - PARSE_TEXT = 4 # pragma: no mutate - PARSE_BOOL = 5 # pragma: no mutate - PARSE_REFERENCE = 6 # pragma: no mutate - PARSE_VALUE = 7 # pragma: no mutate - PARSE_STATEMENT = 8 # pragma: no mutate - PARSE_SUBJECT = 9 # pragma: no mutate - PARSE_VERB = 10 # pragma: no mutate - PARSE_ARGUMENT = 11 # pragma: no mutate - PARSE_SET = 12 # pragma: no mutate - PARSE_CONDITIONAL = 13 # pragma: no mutate - PARSE_TEST = 14 # pragma: no mutate - PARSE_SUCCESS = 15 # pragma: no mutate - PARSE_FAILURE = 16 # pragma: no mutate - PARSE_DIRECTIVE = 17 # pragma: no mutate - PARSE_FILE = 18 # pragma: no mutate - MAX = 19 # pragma: no mutate - - # Returns a list of all tasks - def list(): - return list(range(1, ParseTask.MAX)) # pragma: no mutate - - -# Message identifiers for ParseTasks -ParseTaskMessageIDs = { - ParseTask.TEST_TASK: "ParseTaskTestTask", - ParseTask.PARSE_NOTE: "ParseTaskNote", - ParseTask.CLEAR_NOTES: "ParseTaskClearNotes", - ParseTask.PARSE_TEXT: "ParseTaskText", - ParseTask.PARSE_BOOL: "ParseTaskBool", - ParseTask.PARSE_REFERENCE: "ParseTaskReference", - ParseTask.PARSE_VALUE: "ParseTaskValue", - ParseTask.PARSE_STATEMENT: "ParseTaskStatement", - ParseTask.PARSE_SUBJECT: "ParseTaskSubject", - ParseTask.PARSE_VERB: "ParseTaskVerb", - ParseTask.PARSE_ARGUMENT: "ParseTaskArgument", - ParseTask.PARSE_SET: "ParseTaskSet", - ParseTask.PARSE_CONDITIONAL: "ParseTaskConditional", - ParseTask.PARSE_TEST: "ParseTaskTest", - ParseTask.PARSE_SUCCESS: "ParseTaskSuccess", - ParseTask.PARSE_FAILURE: "ParseTaskFailure", - ParseTask.PARSE_DIRECTIVE: "ParseTaskDirective", - ParseTask.PARSE_FILE: "ParseTaskFile", -} - - -# Context used for parse error exception -class ParseContext: - def __init__(self, task, token, parent): - self.task = task - self.token = token - self.parent = parent - - def __repr__(self): - return ( - "ParseContext(task %s, token %s, parent\n %s)" # pragma: no mutate - % ( # pragma: no mutate - self.task, - self.token, - self.parent, - ) - ) - - def __eq__(self, other): - if other is None: - return False - return ( - self.task == other.task - and self.token == other.token - and self.parent == other.parent - ) - - -# Errors that can happen when parsing -class ParseError: - TEST_ERROR = 1 # pragma: no mutate - NO_TOKEN = 2 # pragma: no mutate - WRONG_TOKEN = 3 # pragma: no mutate - FOUND_STARTTEXT = 4 # pragma: no mutate - FOUND_STARTNOTE = 5 # pragma: no mutate - NOT_BOOL = 6 # pragma: no mutate - FOUND_ENDNOTE = 7 # pragma: no mutate - RESERVED_NAME = 8 # pragma: no mutate - FOUND_TERMINATOR = 9 # pragma: no mutate - MAX = 10 # pragma: no mutate - - # Returns a list of all errors - def list(): - return list(range(1, ParseError.MAX)) # pragma: no mutate - - -# Message identifiers for ParseErrors -ParseErrorMessageIDs = { - ParseError.TEST_ERROR: "ParseErrorTestError", - ParseError.NO_TOKEN: "ParseErrorNoToken", - ParseError.WRONG_TOKEN: "ParseErrorWrongToken", - ParseError.FOUND_STARTTEXT: "ParseErrorFoundStartText", - ParseError.FOUND_STARTNOTE: "ParseErrorFoundStartNote", - ParseError.NOT_BOOL: "ParseErrorNotBool", - ParseError.FOUND_ENDNOTE: "ParseErrorFoundEndNote", - ParseError.RESERVED_NAME: "ParseErrorReservedName", - ParseError.FOUND_TERMINATOR: "ParseErrorFoundTerminator", -} - - -# Exception thrown when a parse error is encountered -class ParseErrorException(BaseException): - def __init__(self, error, token, expected, context): - self.error = error - self.token = token - self.expected = expected - self.context = context - - def __repr__(self): - return ( - "ParseErrorException(error %s, token %s, expected %s, context %s)" # pragma: no mutate - % ( # pragma: no mutate - self.error, - self.token, - self.expected, - self.context, - ) - ) - - def __eq__(self, other): - if other is None: - return False - return ( - self.error == other.error - and self.token == other.token - and self.expected == other.expected - and self.context == other.context - ) - - -# Reads a token, possibly of a certain value -def read_token(stream, value, context): - t = stream.pop() - if t is None: - raise ParseErrorException(ParseError.NO_TOKEN, None, None, context) - elif value is not None and t.value != value: - raise ParseErrorException(ParseError.WRONG_TOKEN, t, value, context) - return t - - -# Skip a note -def skip_note(stream, parent_context): - context = ParseContext(ParseTask.PARSE_NOTE, stream.peek(), parent_context) - read_token(stream, "StartNote", context) - while True: - t = read_token(stream, None, context) - # Don't allow StartNote in notes - if t.value in ["StartNote"]: - raise ParseErrorException(ParseError.FOUND_STARTNOTE, t, None, context) - # EndNote found, end things - elif t.value == "EndNote": - break - return None - - -# Clear notes -def clear_notes(stream, parent_context): - context = ParseContext(ParseTask.CLEAR_NOTES, stream.peek(), parent_context) - tokens = [] - token = stream.peek() - while token is not None: - # Found a note, skip it - if token.value == "StartNote": - skip_note(stream, context) - # EndNote found outside note - elif token.value == "EndNote": - raise ParseErrorException(ParseError.FOUND_ENDNOTE, token, None, context) - # Add the token if it's not note related - else: - tokens.append(stream.pop()) - token = stream.peek() - return tokens - - -# The recursive descent parser in a wrapper class for easy testing -class Parser: - # Parses a text node - def parse_text(self, stream, parent_context): - context = ParseContext(ParseTask.PARSE_TEXT, stream.peek(), parent_context) - buffer = "" - t = read_token(stream, "StartText", context) - # Parse following tokens - while True: - t = read_token(stream, None, context) - # Don't allow StartText in text - if t.value in ["StartText"]: - raise ParseErrorException(ParseError.FOUND_STARTTEXT, t, None, context) - # EndText found, end things - elif t.value == "EndText": - break - else: - buffer += t.value + " " - value = buffer[:-1] # Drop trailing space - return Text(value) - - # Parses a boolean node - def parse_bool(self, stream, parent_context): - context = ParseContext(ParseTask.PARSE_BOOL, stream.peek(), parent_context) - t = read_token(stream, None, context) - if t.value == "True": - return Bool(True) - elif t.value == "False": - return Bool(False) - else: - raise ParseErrorException(ParseError.NOT_BOOL, t, None, context) - - # Parses a reference node - def parse_reference(self, stream, parent_context): - context = ParseContext(ParseTask.PARSE_REFERENCE, stream.peek(), parent_context) - t = read_token(stream, None, context) - if t.value in reserved_names: - raise ParseErrorException(ParseError.RESERVED_NAME, t, None, context) - return Reference(t.value) - - # Parses a value - def parse_value(self, stream, parent_context): - context = ParseContext(ParseTask.PARSE_VALUE, stream.peek(), parent_context) - t = stream.peek() - if t is None: - raise ParseErrorException(ParseError.NO_TOKEN, None, None, context) - elif t.value in ["True", "False"]: - return self.parse_bool(stream, context) - elif t.value == "StartText": - return self.parse_text(stream, context) - elif t.value in reserved_names: - raise ParseErrorException(ParseError.RESERVED_NAME, t, None, context) - else: - return self.parse_reference(stream, context) - - # Parses a statement until a specified terminator - def parse_statement(self, stream, parent_context, terminator): - context = ParseContext(ParseTask.PARSE_STATEMENT, stream.peek(), parent_context) - peeked_subject = stream.peek() - context_subject = ParseContext(ParseTask.PARSE_SUBJECT, peeked_subject, context) - if peeked_subject is not None and peeked_subject.value == terminator: - raise ParseErrorException( - ParseError.FOUND_TERMINATOR, peeked_subject, None, context_subject - ) - subject = self.parse_value(stream, context_subject) - context_verb = ParseContext(ParseTask.PARSE_VERB, stream.peek(), context) - verb = read_token(stream, None, context_verb) - if verb.value == terminator: - return Statement(subject, None, []) - elif verb.value in reserved_names: - raise ParseErrorException( - ParseError.RESERVED_NAME, verb, None, context_verb - ) - arguments = [] - # Parse following arguments - while True: - peeked_arg = stream.peek() - context_arg = ParseContext(ParseTask.PARSE_ARGUMENT, peeked_arg, context) - if peeked_arg is not None and peeked_arg.value == terminator: - stream.pop() - return Statement(subject, verb.value, arguments) - arg = self.parse_value(stream, context_arg) # pragma: no mutate - arguments.append(arg) - - # Parses a set node - def parse_set(self, stream, parent_context): - context = ParseContext(ParseTask.PARSE_SET, stream.peek(), parent_context) - read_token(stream, "Set", context) - subcontext = ParseContext(ParseTask.PARSE_SUBJECT, stream.peek(), context) - subject = read_token(stream, None, subcontext) - if subject.value in reserved_names: - raise ParseErrorException( - ParseError.RESERVED_NAME, subject, None, subcontext - ) - read_token(stream, "To", context) - statement = self.parse_statement(stream, context, "EndSet") - return Set(subject.value, statement) - - # Parses a conditional node - def parse_conditional(self, stream, parent_context): - context = ParseContext( - ParseTask.PARSE_CONDITIONAL, stream.peek(), parent_context - ) - read_token(stream, "If", context) - test_context = ParseContext(ParseTask.PARSE_TEST, stream.peek(), context) - test = self.parse_statement(stream, test_context, "Then") - success_context = ParseContext(ParseTask.PARSE_SUCCESS, stream.peek(), context) - success = self.parse_statement(stream, success_context, "Else") - failure_context = ParseContext(ParseTask.PARSE_FAILURE, stream.peek(), context) - failure = self.parse_statement(stream, failure_context, "EndIf") - return Conditional(test, success, failure) - - # Parses a directive - def parse_directive(self, stream, parent_context): - context = ParseContext(ParseTask.PARSE_DIRECTIVE, stream.peek(), parent_context) - t = stream.peek() - if t is None: - raise ParseErrorException(ParseError.NO_TOKEN, None, None, context) - elif t.value == "Set": - return self.parse_set(stream, context) - elif t.value == "If": - return self.parse_conditional(stream, context) - else: - return self.parse_statement(stream, context, "Done") - - -# Parses a file -def parse_file(stream, parent_context): - context = ParseContext(ParseTask.PARSE_FILE, stream.peek(), parent_context) - directives = [] - next = stream.peek() - while next is not None: - dir = Parser().parse_directive(stream, context) # pragma: no mutate - directives.append(dir) - next = stream.peek() - return directives - - -# Parses tokens -def parse(tokens, context): - stream = TokenStream(tokens) - cleared = clear_notes(stream, context) - stream2 = TokenStream(cleared) - parsed = parse_file(stream2, context) - return parsed - - -# Formats a ParseContext -def format_context(context): - task = Message(ParseTaskMessageIDs[context.task], []) - if context.token: - file = context.token.location.file - line = context.token.location.line - offset = context.token.location.offset - return Message("ParseContextAt", [task, file, line, offset]) - else: - return Message("ParseContext", [task]) - - -# Formats a ParseErrorException -def format_exception(exception): - has_expected = exception.expected is not None - has_token = exception.token is not None - error = Message(ParseErrorMessageIDs[exception.error], []) - if has_expected: - args = [exception.expected] - else: - args = [error] - if has_token: - file = exception.token.location.file - line = exception.token.location.line - offset = exception.token.location.offset - args = args + [file, line, offset] - ids = [ - ["ParserError", "ParserErrorAt"], - ["ParserErrorExpected", "ParserErrorExpectedAt"], - ] - id = ids[has_expected][has_token] - return Message(id, args) - - -# Formats a ParseErrorException and its contexts -def format_full_error(exception): - formatted = [format_exception(exception)] - context = exception.context - while context is not None: - formatted.append(format_context(context)) - context = context.parent - return formatted diff --git a/src/parse2/parse.py b/src/parse2/parse.py new file mode 100644 index 0000000..70f867e --- /dev/null +++ b/src/parse2/parse.py @@ -0,0 +1,400 @@ +# SPDX-License-Identifier: LGPL-2.1-only +# Copyright 2022 Jookia + +from src.i18n import Message +from src.ast_types import Bool, Conditional, Reference, Set, Statement, Text +from src.parse2.token import TokenStream + + +# Words that can't be used as references +# This should include keywords and literals +reserved_names = [ + "Done", + "Set", + "To", + "EndSet", + "If", + "Then", + "Else", + "EndIf", + "StartNote", + "EndNote", + "StartText", + "EndText", + "True", + "False", +] + + +# Tasks that happen during parsing +class ParseTask: + TEST_TASK = 1 # pragma: no mutate + PARSE_NOTE = 2 # pragma: no mutate + CLEAR_NOTES = 3 # pragma: no mutate + PARSE_TEXT = 4 # pragma: no mutate + PARSE_BOOL = 5 # pragma: no mutate + PARSE_REFERENCE = 6 # pragma: no mutate + PARSE_VALUE = 7 # pragma: no mutate + PARSE_STATEMENT = 8 # pragma: no mutate + PARSE_SUBJECT = 9 # pragma: no mutate + PARSE_VERB = 10 # pragma: no mutate + PARSE_ARGUMENT = 11 # pragma: no mutate + PARSE_SET = 12 # pragma: no mutate + PARSE_CONDITIONAL = 13 # pragma: no mutate + PARSE_TEST = 14 # pragma: no mutate + PARSE_SUCCESS = 15 # pragma: no mutate + PARSE_FAILURE = 16 # pragma: no mutate + PARSE_DIRECTIVE = 17 # pragma: no mutate + PARSE_FILE = 18 # pragma: no mutate + MAX = 19 # pragma: no mutate + + # Returns a list of all tasks + def list(): + return list(range(1, ParseTask.MAX)) # pragma: no mutate + + +# Message identifiers for ParseTasks +ParseTaskMessageIDs = { + ParseTask.TEST_TASK: "ParseTaskTestTask", + ParseTask.PARSE_NOTE: "ParseTaskNote", + ParseTask.CLEAR_NOTES: "ParseTaskClearNotes", + ParseTask.PARSE_TEXT: "ParseTaskText", + ParseTask.PARSE_BOOL: "ParseTaskBool", + ParseTask.PARSE_REFERENCE: "ParseTaskReference", + ParseTask.PARSE_VALUE: "ParseTaskValue", + ParseTask.PARSE_STATEMENT: "ParseTaskStatement", + ParseTask.PARSE_SUBJECT: "ParseTaskSubject", + ParseTask.PARSE_VERB: "ParseTaskVerb", + ParseTask.PARSE_ARGUMENT: "ParseTaskArgument", + ParseTask.PARSE_SET: "ParseTaskSet", + ParseTask.PARSE_CONDITIONAL: "ParseTaskConditional", + ParseTask.PARSE_TEST: "ParseTaskTest", + ParseTask.PARSE_SUCCESS: "ParseTaskSuccess", + ParseTask.PARSE_FAILURE: "ParseTaskFailure", + ParseTask.PARSE_DIRECTIVE: "ParseTaskDirective", + ParseTask.PARSE_FILE: "ParseTaskFile", +} + + +# Context used for parse error exception +class ParseContext: + def __init__(self, task, token, parent): + self.task = task + self.token = token + self.parent = parent + + def __repr__(self): + return ( + "ParseContext(task %s, token %s, parent\n %s)" # pragma: no mutate + % ( # pragma: no mutate + self.task, + self.token, + self.parent, + ) + ) + + def __eq__(self, other): + if other is None: + return False + return ( + self.task == other.task + and self.token == other.token + and self.parent == other.parent + ) + + +# Errors that can happen when parsing +class ParseError: + TEST_ERROR = 1 # pragma: no mutate + NO_TOKEN = 2 # pragma: no mutate + WRONG_TOKEN = 3 # pragma: no mutate + FOUND_STARTTEXT = 4 # pragma: no mutate + FOUND_STARTNOTE = 5 # pragma: no mutate + NOT_BOOL = 6 # pragma: no mutate + FOUND_ENDNOTE = 7 # pragma: no mutate + RESERVED_NAME = 8 # pragma: no mutate + FOUND_TERMINATOR = 9 # pragma: no mutate + MAX = 10 # pragma: no mutate + + # Returns a list of all errors + def list(): + return list(range(1, ParseError.MAX)) # pragma: no mutate + + +# Message identifiers for ParseErrors +ParseErrorMessageIDs = { + ParseError.TEST_ERROR: "ParseErrorTestError", + ParseError.NO_TOKEN: "ParseErrorNoToken", + ParseError.WRONG_TOKEN: "ParseErrorWrongToken", + ParseError.FOUND_STARTTEXT: "ParseErrorFoundStartText", + ParseError.FOUND_STARTNOTE: "ParseErrorFoundStartNote", + ParseError.NOT_BOOL: "ParseErrorNotBool", + ParseError.FOUND_ENDNOTE: "ParseErrorFoundEndNote", + ParseError.RESERVED_NAME: "ParseErrorReservedName", + ParseError.FOUND_TERMINATOR: "ParseErrorFoundTerminator", +} + + +# Exception thrown when a parse error is encountered +class ParseErrorException(BaseException): + def __init__(self, error, token, expected, context): + self.error = error + self.token = token + self.expected = expected + self.context = context + + def __repr__(self): + return ( + "ParseErrorException(error %s, token %s, expected %s, context %s)" # pragma: no mutate + % ( # pragma: no mutate + self.error, + self.token, + self.expected, + self.context, + ) + ) + + def __eq__(self, other): + if other is None: + return False + return ( + self.error == other.error + and self.token == other.token + and self.expected == other.expected + and self.context == other.context + ) + + +# Reads a token, possibly of a certain value +def read_token(stream, value, context): + t = stream.pop() + if t is None: + raise ParseErrorException(ParseError.NO_TOKEN, None, None, context) + elif value is not None and t.value != value: + raise ParseErrorException(ParseError.WRONG_TOKEN, t, value, context) + return t + + +# Skip a note +def skip_note(stream, parent_context): + context = ParseContext(ParseTask.PARSE_NOTE, stream.peek(), parent_context) + read_token(stream, "StartNote", context) + while True: + t = read_token(stream, None, context) + # Don't allow StartNote in notes + if t.value in ["StartNote"]: + raise ParseErrorException(ParseError.FOUND_STARTNOTE, t, None, context) + # EndNote found, end things + elif t.value == "EndNote": + break + return None + + +# Clear notes +def clear_notes(stream, parent_context): + context = ParseContext(ParseTask.CLEAR_NOTES, stream.peek(), parent_context) + tokens = [] + token = stream.peek() + while token is not None: + # Found a note, skip it + if token.value == "StartNote": + skip_note(stream, context) + # EndNote found outside note + elif token.value == "EndNote": + raise ParseErrorException(ParseError.FOUND_ENDNOTE, token, None, context) + # Add the token if it's not note related + else: + tokens.append(stream.pop()) + token = stream.peek() + return tokens + + +# The recursive descent parser in a wrapper class for easy testing +class Parser: + # Parses a text node + def parse_text(self, stream, parent_context): + context = ParseContext(ParseTask.PARSE_TEXT, stream.peek(), parent_context) + buffer = "" + t = read_token(stream, "StartText", context) + # Parse following tokens + while True: + t = read_token(stream, None, context) + # Don't allow StartText in text + if t.value in ["StartText"]: + raise ParseErrorException(ParseError.FOUND_STARTTEXT, t, None, context) + # EndText found, end things + elif t.value == "EndText": + break + else: + buffer += t.value + " " + value = buffer[:-1] # Drop trailing space + return Text(value) + + # Parses a boolean node + def parse_bool(self, stream, parent_context): + context = ParseContext(ParseTask.PARSE_BOOL, stream.peek(), parent_context) + t = read_token(stream, None, context) + if t.value == "True": + return Bool(True) + elif t.value == "False": + return Bool(False) + else: + raise ParseErrorException(ParseError.NOT_BOOL, t, None, context) + + # Parses a reference node + def parse_reference(self, stream, parent_context): + context = ParseContext(ParseTask.PARSE_REFERENCE, stream.peek(), parent_context) + t = read_token(stream, None, context) + if t.value in reserved_names: + raise ParseErrorException(ParseError.RESERVED_NAME, t, None, context) + return Reference(t.value) + + # Parses a value + def parse_value(self, stream, parent_context): + context = ParseContext(ParseTask.PARSE_VALUE, stream.peek(), parent_context) + t = stream.peek() + if t is None: + raise ParseErrorException(ParseError.NO_TOKEN, None, None, context) + elif t.value in ["True", "False"]: + return self.parse_bool(stream, context) + elif t.value == "StartText": + return self.parse_text(stream, context) + elif t.value in reserved_names: + raise ParseErrorException(ParseError.RESERVED_NAME, t, None, context) + else: + return self.parse_reference(stream, context) + + # Parses a statement until a specified terminator + def parse_statement(self, stream, parent_context, terminator): + context = ParseContext(ParseTask.PARSE_STATEMENT, stream.peek(), parent_context) + peeked_subject = stream.peek() + context_subject = ParseContext(ParseTask.PARSE_SUBJECT, peeked_subject, context) + if peeked_subject is not None and peeked_subject.value == terminator: + raise ParseErrorException( + ParseError.FOUND_TERMINATOR, peeked_subject, None, context_subject + ) + subject = self.parse_value(stream, context_subject) + context_verb = ParseContext(ParseTask.PARSE_VERB, stream.peek(), context) + verb = read_token(stream, None, context_verb) + if verb.value == terminator: + return Statement(subject, None, []) + elif verb.value in reserved_names: + raise ParseErrorException( + ParseError.RESERVED_NAME, verb, None, context_verb + ) + arguments = [] + # Parse following arguments + while True: + peeked_arg = stream.peek() + context_arg = ParseContext(ParseTask.PARSE_ARGUMENT, peeked_arg, context) + if peeked_arg is not None and peeked_arg.value == terminator: + stream.pop() + return Statement(subject, verb.value, arguments) + arg = self.parse_value(stream, context_arg) # pragma: no mutate + arguments.append(arg) + + # Parses a set node + def parse_set(self, stream, parent_context): + context = ParseContext(ParseTask.PARSE_SET, stream.peek(), parent_context) + read_token(stream, "Set", context) + subcontext = ParseContext(ParseTask.PARSE_SUBJECT, stream.peek(), context) + subject = read_token(stream, None, subcontext) + if subject.value in reserved_names: + raise ParseErrorException( + ParseError.RESERVED_NAME, subject, None, subcontext + ) + read_token(stream, "To", context) + statement = self.parse_statement(stream, context, "EndSet") + return Set(subject.value, statement) + + # Parses a conditional node + def parse_conditional(self, stream, parent_context): + context = ParseContext( + ParseTask.PARSE_CONDITIONAL, stream.peek(), parent_context + ) + read_token(stream, "If", context) + test_context = ParseContext(ParseTask.PARSE_TEST, stream.peek(), context) + test = self.parse_statement(stream, test_context, "Then") + success_context = ParseContext(ParseTask.PARSE_SUCCESS, stream.peek(), context) + success = self.parse_statement(stream, success_context, "Else") + failure_context = ParseContext(ParseTask.PARSE_FAILURE, stream.peek(), context) + failure = self.parse_statement(stream, failure_context, "EndIf") + return Conditional(test, success, failure) + + # Parses a directive + def parse_directive(self, stream, parent_context): + context = ParseContext(ParseTask.PARSE_DIRECTIVE, stream.peek(), parent_context) + t = stream.peek() + if t is None: + raise ParseErrorException(ParseError.NO_TOKEN, None, None, context) + elif t.value == "Set": + return self.parse_set(stream, context) + elif t.value == "If": + return self.parse_conditional(stream, context) + else: + return self.parse_statement(stream, context, "Done") + + +# Parses a file +def parse_file(stream, parent_context): + context = ParseContext(ParseTask.PARSE_FILE, stream.peek(), parent_context) + directives = [] + next = stream.peek() + while next is not None: + dir = Parser().parse_directive(stream, context) # pragma: no mutate + directives.append(dir) + next = stream.peek() + return directives + + +# Parses tokens +def parse(tokens, context): + stream = TokenStream(tokens) + cleared = clear_notes(stream, context) + stream2 = TokenStream(cleared) + parsed = parse_file(stream2, context) + return parsed + + +# Formats a ParseContext +def format_context(context): + task = Message(ParseTaskMessageIDs[context.task], []) + if context.token: + file = context.token.location.file + line = context.token.location.line + offset = context.token.location.offset + return Message("ParseContextAt", [task, file, line, offset]) + else: + return Message("ParseContext", [task]) + + +# Formats a ParseErrorException +def format_exception(exception): + has_expected = exception.expected is not None + has_token = exception.token is not None + error = Message(ParseErrorMessageIDs[exception.error], []) + if has_expected: + args = [exception.expected] + else: + args = [error] + if has_token: + file = exception.token.location.file + line = exception.token.location.line + offset = exception.token.location.offset + args = args + [file, line, offset] + ids = [ + ["ParserError", "ParserErrorAt"], + ["ParserErrorExpected", "ParserErrorExpectedAt"], + ] + id = ids[has_expected][has_token] + return Message(id, args) + + +# Formats a ParseErrorException and its contexts +def format_full_error(exception): + formatted = [format_exception(exception)] + context = exception.context + while context is not None: + formatted.append(format_context(context)) + context = context.parent + return formatted diff --git a/src/parse2/token.py b/src/parse2/token.py new file mode 100644 index 0000000..cdf9ec9 --- /dev/null +++ b/src/parse2/token.py @@ -0,0 +1,65 @@ +# SPDX-License-Identifier: LGPL-2.1-only +# Copyright 2022 Jookia + + +# Represents a token +class Token: + def __init__(self, value, location): + self.value = value + self.location = location + + def __repr__(self): + return "Token(value %s, location %s)" % ( # pragma: no mutate + repr(self.value), + repr(self.location), + ) + + def __eq__(self, other): + if other is None: + return False + return self.value == other.value and self.location == other.location + + +# Location of a token +class TokenLocation: + def __init__(self, line, offset, file): + self.line = line + self.offset = offset + self.file = file + + def __repr__(self): + return "TokenLocation(line %i, offset %i, file '%s')" % ( # pragma: no mutate + self.line, + self.offset, + self.file, + ) + + def __eq__(self, other): + if other is None: + return False + return ( + self.line == other.line + and self.offset == other.offset + and self.file == other.file + ) + + +# Represents a stream of consumable tokens +class TokenStream: + def __init__(self, tokens): + self.tokens = tokens + + def __repr__(self): + return "TokenStream(%s)" % (self.tokens) # pragma: no mutate + + def pop(self): + if self.tokens: + return self.tokens.pop(0) + else: + return None + + def peek(self): + if self.tokens: + return self.tokens[0] + else: + return None diff --git a/src/parse2/tokenize.py b/src/parse2/tokenize.py new file mode 100644 index 0000000..1549701 --- /dev/null +++ b/src/parse2/tokenize.py @@ -0,0 +1,87 @@ +# SPDX-License-Identifier: LGPL-2.1-only +# Copyright 2022 Jookia + +from src.parse2.token import Token, TokenLocation + +# Valid space code points +spaces = [ + "\t", # U+0009 HORIZONTAL TAB + " ", # U+0020 SPACE +] + +# Valid new line tokens +newlines = [ + "\n", # U+000A LINE FEED + "\v", # U+000B VERTICAL TAB + "\f", # U+000C FORM FEED + "\r", # U+000D CARRIAGE RETURN + "\r\n", # U+000A U+000D CARRIAGE RETURN then LINE FEED + "\u0085", # U+0085 NEXT LINE + "\u2028", # U+2028 LINE SEPARATOR + "\u2029", # U+2029 PARAGRAPH SEPARATOR +] + + +# Checks whether a symbol is general whitespace +def is_whitespace(symbol): + return symbol in spaces or symbol in newlines + + +# Splits text in to a list of tokens and whitespace +def split_tokens(input): + if input == "": + return [] + tokens = [] + prev = input[0] + buffer = prev + location = TokenLocation(1, 1, "") + for curr in input[1:]: + curr_space = is_whitespace(curr) + prev_space = is_whitespace(prev) + switching = curr_space != prev_space + crlf = prev == "\r" and curr == "\n" + # Flush if we switch between whitespace and non-whitespace code points + # Flush if we're working with a stream of whitespace + # Don't flush if we're in the middle of a CR LF sequence + flush = switching or (curr_space and not crlf) + if flush: + tokens.append(Token(buffer, location)) + buffer = "" + buffer += curr + prev = curr + tokens.append(Token(buffer, location)) + return tokens + + +# Generates a list of tokens with locations +def locate_tokens(tokens, filename): + new_tokens = [] + line = 1 + offset = 1 + for t in tokens: + location = TokenLocation(line, offset, filename) + new = Token(t.value, location) + new_tokens.append(new) + if t.value in newlines: + line = line + 1 + offset = 1 + else: + offset += len(t.value) + return new_tokens + + +# Removes whitespace tokens +def strip_whitespace(tokens): + output = [] + for t in tokens: + if not is_whitespace(t.value): + output.append(t) + return output + + +# Tokenizes source code +def tokenize(source, filename): + split = split_tokens(source) + located = locate_tokens(split, filename) + stripped = strip_whitespace(located) + return stripped diff --git a/src/token.py b/src/token.py deleted file mode 100644 index cdf9ec9..0000000 --- a/src/token.py +++ /dev/null @@ -1,65 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-only -# Copyright 2022 Jookia - - -# Represents a token -class Token: - def __init__(self, value, location): - self.value = value - self.location = location - - def __repr__(self): - return "Token(value %s, location %s)" % ( # pragma: no mutate - repr(self.value), - repr(self.location), - ) - - def __eq__(self, other): - if other is None: - return False - return self.value == other.value and self.location == other.location - - -# Location of a token -class TokenLocation: - def __init__(self, line, offset, file): - self.line = line - self.offset = offset - self.file = file - - def __repr__(self): - return "TokenLocation(line %i, offset %i, file '%s')" % ( # pragma: no mutate - self.line, - self.offset, - self.file, - ) - - def __eq__(self, other): - if other is None: - return False - return ( - self.line == other.line - and self.offset == other.offset - and self.file == other.file - ) - - -# Represents a stream of consumable tokens -class TokenStream: - def __init__(self, tokens): - self.tokens = tokens - - def __repr__(self): - return "TokenStream(%s)" % (self.tokens) # pragma: no mutate - - def pop(self): - if self.tokens: - return self.tokens.pop(0) - else: - return None - - def peek(self): - if self.tokens: - return self.tokens[0] - else: - return None diff --git a/src/tokenize.py b/src/tokenize.py deleted file mode 100644 index 7a11f38..0000000 --- a/src/tokenize.py +++ /dev/null @@ -1,87 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-only -# Copyright 2022 Jookia - -from src.token import Token, TokenLocation - -# Valid space code points -spaces = [ - "\t", # U+0009 HORIZONTAL TAB - " ", # U+0020 SPACE -] - -# Valid new line tokens -newlines = [ - "\n", # U+000A LINE FEED - "\v", # U+000B VERTICAL TAB - "\f", # U+000C FORM FEED - "\r", # U+000D CARRIAGE RETURN - "\r\n", # U+000A U+000D CARRIAGE RETURN then LINE FEED - "\u0085", # U+0085 NEXT LINE - "\u2028", # U+2028 LINE SEPARATOR - "\u2029", # U+2029 PARAGRAPH SEPARATOR -] - - -# Checks whether a symbol is general whitespace -def is_whitespace(symbol): - return symbol in spaces or symbol in newlines - - -# Splits text in to a list of tokens and whitespace -def split_tokens(input): - if input == "": - return [] - tokens = [] - prev = input[0] - buffer = prev - location = TokenLocation(1, 1, "") - for curr in input[1:]: - curr_space = is_whitespace(curr) - prev_space = is_whitespace(prev) - switching = curr_space != prev_space - crlf = prev == "\r" and curr == "\n" - # Flush if we switch between whitespace and non-whitespace code points - # Flush if we're working with a stream of whitespace - # Don't flush if we're in the middle of a CR LF sequence - flush = switching or (curr_space and not crlf) - if flush: - tokens.append(Token(buffer, location)) - buffer = "" - buffer += curr - prev = curr - tokens.append(Token(buffer, location)) - return tokens - - -# Generates a list of tokens with locations -def locate_tokens(tokens, filename): - new_tokens = [] - line = 1 - offset = 1 - for t in tokens: - location = TokenLocation(line, offset, filename) - new = Token(t.value, location) - new_tokens.append(new) - if t.value in newlines: - line = line + 1 - offset = 1 - else: - offset += len(t.value) - return new_tokens - - -# Removes whitespace tokens -def strip_whitespace(tokens): - output = [] - for t in tokens: - if not is_whitespace(t.value): - output.append(t) - return output - - -# Tokenizes source code -def tokenize(source, filename): - split = split_tokens(source) - located = locate_tokens(split, filename) - stripped = strip_whitespace(located) - return stripped diff --git a/tests/parse/templates.py b/tests/parse/templates.py deleted file mode 100644 index a06d294..0000000 --- a/tests/parse/templates.py +++ /dev/null @@ -1,51 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-only -# Copyright 2022 Jookia - -from hypothesis.strategies import composite, integers - -from src.parse import ParseErrorException -from src.token import TokenStream -from tests.test_token import static_token_by_value - - -# Draws tokens with an element randomly between the first and last token -# Returns the new list and new token -@composite -def draw_random_within(draw, source, new): - list = draw(source) - data = static_token_by_value(new) - pos = draw(integers(min_value=1, max_value=(len(list) - 1))) - new_data = list[0:pos] + [data] + list[pos:] - return (new_data, data) - - -# Tests that something parses correctly -# We expect the following behaviour: -# - The parse function generates the expected output -# - The parse function doesn't consume extra tokens -def template_test_valid(parser, tokens, expected): - canary = static_token_by_value("CANARY") - stream = TokenStream(tokens + [canary]) - parsed = parser(stream, None) - if expected is None: - assert parsed is None - else: - assert parsed is not None - assert parsed == expected - assert stream.pop() == canary - assert stream.pop() is None - - -# Test that something parses incorrectly -# We expect the following behaviour: -# - The parse function generates the expected error -# - The parse function uses the parse context as a parent -def template_test_invalid(parser, context, tokens, expected): - stream = TokenStream(tokens.copy()) - error = None - try: - parsed = parser(stream, context) - raise AssertionError("Parsed invalid data: %s" % (parsed)) - except ParseErrorException as e: - error = e - assert error == expected diff --git a/tests/parse/test_bool.py b/tests/parse/test_bool.py deleted file mode 100644 index 1471eac..0000000 --- a/tests/parse/test_bool.py +++ /dev/null @@ -1,58 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-only -# Copyright 2022 Jookia - -from hypothesis import assume, given -from hypothesis.strategies import composite - -from src.ast_types import Bool -from src.parse import ParseContext, ParseError, ParseErrorException, ParseTask, Parser -from tests.parse.templates import template_test_valid, template_test_invalid -from tests.parse.test_error import static_parse_context -from tests.test_token import draw_token_bool, draw_token_random - - -# Draws tokens to not make a valid boolean -@composite -def draw_token_not_bool(draw): - token = draw(draw_token_random()) - assume(token.value not in ["True", "False"]) - return token - - -# Tests parse_bool works correctly -# We expect the following behaviour: -# - The resulting boolean is True if the first token is True -# - The resulting boolean is False if the first token is False -# template_test_valid provides general parsing properties -@given(draw_token_bool()) -def test_parse_bool_valid(token): - value = token.value == "True" - expected = Bool(value) - template_test_valid(Parser().parse_bool, [token], expected) - - -# Tests parsing of invalid booleans -# We expect the following behaviour: -# - Error if the token is not True or False -# - Have ParseError.NOT_BOOL as the exception code -# - Have ParseTask.PARSE_BOOL as the context's parse task -@given(draw_token_not_bool()) -def test_parse_bool_invalid_incorrect(token): - parent_context = static_parse_context() - context = ParseContext(ParseTask.PARSE_BOOL, token, parent_context) - error = ParseErrorException(ParseError.NOT_BOOL, token, None, context) - parser = Parser().parse_bool - template_test_invalid(parser, parent_context, [token], error) - - -# Tests parsing of empty tokens -# We expect the following behaviour: -# - Error if there isn't a token -# - Have ParseError.NO_TOKEN as the exception code -# - Have ParseTask.PARSE_BOOL as the context's parse task -def test_parse_bool_invalid_empty(): - parent_context = static_parse_context() - context = ParseContext(ParseTask.PARSE_BOOL, None, parent_context) - error = ParseErrorException(ParseError.NO_TOKEN, None, None, context) - parser = Parser().parse_bool - template_test_invalid(parser, parent_context, [], error) diff --git a/tests/parse/test_clear_notes.py b/tests/parse/test_clear_notes.py deleted file mode 100644 index a85f1ba..0000000 --- a/tests/parse/test_clear_notes.py +++ /dev/null @@ -1,88 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-only -# Copyright 2022 Jookia - -# Clear notes syntax consists of the following: -# - One or more tokens or notes -# -# Parsing gives the following: -# All tokens that aren't notes -# -# The following error contexts are used: -# CLEAR_NOTES - Used when parsing the file -# -# The following parse errors are generated: -# FOUND_ENDNOTE - When a stray EndNote token is found - -from hypothesis import given -from hypothesis.strategies import composite, just, lists, one_of - -from src.parse import ( - clear_notes, - ParseContext, - ParseError, - ParseErrorException, - ParseTask, -) -from src.token import TokenStream -from tests.parse.templates import template_test_invalid -from tests.parse.test_error import static_parse_context -from tests.parse.test_note import ( - draw_note_value_token, - static_note_tokens, - static_note_invalid, - static_note_invalid_error, -) -from tests.test_token import static_token_by_value - - -# Draws a tokens of notes and non-notes and output without notes -@composite -def draw_notes_to_clear(draw): - token_sets = draw( - lists(one_of([lists(draw_note_value_token()), just(static_note_tokens())])) - ) - output = [] - tokens = [] - for set in token_sets: - tokens += set - if set != static_note_tokens(): - output += set - return (tokens, output) - - -# Tests clear_notes filters out notes -# We expect the following behaviour: -# - Tokens that are part of note structures are removed -# template_test provides general parsing properties -@given(draw_notes_to_clear()) -def test_parse_clear_notes_valid(test_data): - (tokens, result) = test_data - stream = TokenStream(tokens) - cleared = clear_notes(stream, None) - assert cleared == result - - -# Tests clear_notes passes through note errors -# We expect the following behaviour: -# - When an invalid note is parsed the error is propagated -# - Have ParseTask.CLEAR_NOTES as the context's parse task -def test_parse_clear_notes_startnote_propagation(): - tokens = static_note_invalid() - parent_context = static_parse_context() - context = ParseContext(ParseTask.CLEAR_NOTES, tokens[0], parent_context) - error = static_note_invalid_error(context) - template_test_invalid(clear_notes, parent_context, tokens, error) - - -# Tests clear_notes errors when finding an EndNote -# We expect the following behaviour: -# - When EndNote is found a ParseError.FOUND_ENDNOTE error is raised -# - Have ParseTask.CLEAR_NOTES as the context's parse task -@given(lists(draw_note_value_token())) -def test_parse_clear_notes_invalid_endnote(tokens): - token = static_token_by_value("EndNote") - new_tokens = tokens + [token] - parent_context = static_parse_context() - context = ParseContext(ParseTask.CLEAR_NOTES, new_tokens[0], parent_context) - error = ParseErrorException(ParseError.FOUND_ENDNOTE, token, None, context) - template_test_invalid(clear_notes, parent_context, new_tokens, error) diff --git a/tests/parse/test_conditional.py b/tests/parse/test_conditional.py deleted file mode 100644 index 070d88d..0000000 --- a/tests/parse/test_conditional.py +++ /dev/null @@ -1,232 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-only -# Copyright 2022 Jookia - -# Conditional syntax consists of the following tokens: -# - "If" -# - A test statement, terminated by "Then" -# - A success statement, terminated by "Else" -# - A failure statement, terminated by "EndIf" -# -# Parsing gives a Conditional data structure containing: -# test - The parsed test statement -# success - The parsed success statement -# failure - The parsed failure statement -# -# The following cases are errors: -# - If not being the literal "To" -# - The test not parsing correctly -# - The success not parsing correctly -# - The failure not parsing correctly -# -# The following error contexts are used: -# PARSE_CONDITIONAL - Used when parsing the general syntax -# PARSE_TEST - Used when parsing the test statement -# PARSE_SUCCESS - Used when parsing the success statement -# PARSE_FAILURE - Used when parsing the failure statement -# -# The following parse errors are generated: -# NO_TOKEN - When there's not enough tokens -# WRONG_TOKEN - When If isn't the correct values -# -# The following parsers are used and have their errors -# and data structures propagated: -# parse_statement - Used with "Then" terminator for the test statement -# parse_statement - Used with "Else" terminator for the success statement -# parse_statement - Used with "EndIf" terminator for the failure statement - -import enum - -from hypothesis import assume, given -from hypothesis.strategies import composite, data, integers, just, one_of - -from src.ast_types import Conditional -from src.parse import ( - ParseContext, - ParseError, - ParseErrorException, - ParseTask, - Parser, - read_token, -) -from tests.parse.templates import ( - template_test_valid, - template_test_invalid, -) -from tests.test_token import ( - draw_token_random, - static_token_by_value, -) -from tests.parse.test_error import static_parse_context - -# -# Helper functions -# - - -# Values used by the mocked parser -class MockStatement(enum.Enum): - MockTest = enum.auto() - MockSuccess = enum.auto() - MockFailure = enum.auto() - - -# Mocks and tests the parse_statement parser -# Instead of parsing a complex statement it just parses -# a the following tokens: MockTest, MockSuccess, MockFailure -# The terminator is required to be: -# - "Then" for MockTest -# - "Else" for MockSuccess -# - "EndIf" for MockFailure -class MockParser(Parser): - def parse_statement(self, stream, parent_context, terminator): - token = read_token(stream, None, parent_context) - if token.value == "MockTest" and terminator == "Then": - return MockStatement.MockTest - elif token.value == "MockSuccess" and terminator == "Else": - return MockStatement.MockSuccess - elif token.value == "MockFailure" and terminator == "EndIf": - return MockStatement.MockFailure - else: - raise ParseErrorException( - ParseError.WRONG_TOKEN, token, None, parent_context - ) - - -# A valid conditional expression and tokens -def static_conditional_valid_tokens(): - tokens = [ - static_token_by_value("If"), - static_token_by_value("MockTest"), - static_token_by_value("MockSuccess"), - static_token_by_value("MockFailure"), - ] - expected = Conditional( - MockStatement.MockTest, MockStatement.MockSuccess, MockStatement.MockFailure - ) - return (tokens, expected) - - -# Calculates the parse context for a specific token in a conditional expression -def context_at(parent_context, tokens, index): - max = len(tokens) - 1 - if max == -1: - start = None - token = None - elif max < index: - start = tokens[0] - token = None - else: - start = tokens[0] - token = tokens[index] - context = ParseContext(ParseTask.PARSE_CONDITIONAL, start, parent_context) - if index == 0: - return context - elif index == 1: - subcontext = ParseContext(ParseTask.PARSE_TEST, token, context) - return subcontext - elif index == 2: - subcontext = ParseContext(ParseTask.PARSE_SUCCESS, token, context) - return subcontext - elif index == 3: - subcontext = ParseContext(ParseTask.PARSE_FAILURE, token, context) - return subcontext - else: - assert "Should never be called" - - -# Draws something that isn't a mock statement -@composite -def draw_not_statement(draw): - token = draw(draw_token_random()) - assume(token.value not in ["MockTest", "MockSuccess", "MockFailure"]) - return token - - -# Draws the wrong statement for a given conditional position -def draw_wrong_statement_at(index): - random = draw_not_statement() - test = just(static_token_by_value("MockTest")) - success = just(static_token_by_value("MockSuccess")) - failure = just(static_token_by_value("MockFailure")) - if index == 1: - return one_of([success, failure, random]) - elif index == 2: - return one_of([test, failure, random]) - elif index == 3: - return one_of([test, success, random]) - else: - assert "Should never be called" - - -# -# Test functions -# - -# Tests parsing a valid statement -# We expect the following behaviour: -# - The test statement is read and assigned -# - The success statement is read and assigned -# - The failure statement is read and assigned -def test_parse_conditional_valid(): - (tokens, expected) = static_conditional_valid_tokens() - parser = MockParser().parse_conditional - return template_test_valid(parser, tokens, expected) - - -# Tests parsing a truncated statement -# We expect the following behaviour: -# - A NO_TOKEN parse error is raised -# - The error context is PARSE_CONDITIONAL -# - The test statement has its own subcontext, PARSE_TEST -# - The success statement has its own subcontext, PARSE_SUCCESS -# - The failure statement has its own subcontext, PARSE_FAILURE -@given(data()) -def test_parse_conditional_short(data): - (tokens, _) = static_conditional_valid_tokens() - new_len = data.draw( - integers(min_value=0, max_value=(len(tokens) - 1)), label="shorten point" - ) - short_tokens = tokens[0:new_len] - parent_context = static_parse_context() - context = context_at(parent_context, short_tokens, new_len) - error = ParseErrorException(ParseError.NO_TOKEN, None, None, context) - parser = MockParser().parse_conditional - template_test_invalid(parser, parent_context, short_tokens, error) - - -# Tests parsing an invalid "If" -# We expect the following behaviour: -# - A WRONG_TOKEN parse error is raised -# - The error context is PARSE_CONDITIONAL -# - The token "If" is expected -@given(data()) -def test_parse_conditional_wrong_if(data): - (tokens, _) = static_conditional_valid_tokens() - new_if = data.draw(draw_token_random(), label="new if") - assume(new_if.value != "If") - new_tokens = [new_if] + tokens[1:] - parent_context = static_parse_context() - context = context_at(parent_context, new_tokens, 0) - error = ParseErrorException(ParseError.WRONG_TOKEN, new_if, "If", context) - parser = MockParser().parse_conditional - template_test_invalid(parser, parent_context, new_tokens, error) - - -# Tests parsing an invalid statement -# We expect the following behaviour: -# - A WRONG_TOKEN parse error is raised by the mock parser -# - The error context is PARSE_CONDITIONAL -# - Our error context is retained by parse_statement -@given(data()) -def test_parse_conditional_wrong_statement(data): - (tokens, _) = static_conditional_valid_tokens() - statement_pos = data.draw( - integers(min_value=1, max_value=(len(tokens) - 1)), label="statement position" - ) - new_statement = data.draw(draw_wrong_statement_at(statement_pos)) - new_tokens = tokens[0:statement_pos] + [new_statement] + tokens[statement_pos + 1 :] - parent_context = static_parse_context() - context = context_at(parent_context, new_tokens, statement_pos) - error = ParseErrorException(ParseError.WRONG_TOKEN, new_statement, None, context) - parser = MockParser().parse_conditional - template_test_invalid(parser, parent_context, new_tokens, error) diff --git a/tests/parse/test_directive.py b/tests/parse/test_directive.py deleted file mode 100644 index e3452eb..0000000 --- a/tests/parse/test_directive.py +++ /dev/null @@ -1,189 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-only -# Copyright 2022 Jookia - -# Directive syntax consists of one of the following: -# - A set -# - A conditional -# - A statement, terminated by "Done" -# -# Parsing gives one of the following: -# Set - The parsed set node -# Conditional - The parsed conditional node -# Statement - The parsed statement node -# -# The following error contexts are used: -# PARSE_DIRECTIVE - Used when parsing the directive -# -# The following parse errors are generated: -# NO_TOKEN - When there's not enough tokens -# -# The following parsers are used and have their errors -# and data structures propagated: -# parse_statement - Used with "Done" terminator - -import enum - -from hypothesis import given -from hypothesis.strategies import composite, just, one_of - -from src.parse import ( - ParseContext, - ParseError, - ParseErrorException, - ParseTask, - Parser, -) -from tests.parse.templates import ( - template_test_valid, - template_test_invalid, -) -from tests.test_token import ( - draw_token_unknown, - static_token_by_value, -) -from tests.parse.test_error import static_parse_context -from src.ast_types import Bool, Statement - -# -# Helper functions -# - - -# Values used by the mocked parser -class MockDirective(enum.Enum): - MockSet = enum.auto() - MockConditional = enum.auto() - MockStatement = enum.auto() - - -# Mocks and tests the parse_directive parser -# Instead of parsing sets, conditionals and statements -# it instead returns a mock value -class MockParserValid(Parser): - def parse_set(self, stream, parent_context): - stream.pop() - return MockDirective.MockSet - - def parse_conditional(self, stream, parent_context): - stream.pop() - return MockDirective.MockConditional - - def parse_statement(self, stream, parent_context, terminator): - assert terminator == "Done" - stream.pop() - return MockDirective.MockStatement - - -# Mocks and tests the parse_directive parser error handling -# Instead of parsing, just return an error -# Re-use the enum elements to give a unique error for each node -class MockParserInvalid(Parser): - def _raise_error(self, error, parent_context): - raise ParseErrorException(error, None, None, parent_context) - - def parse_set(self, stream, parent_context): - self._raise_error(MockDirective.MockSet, parent_context) - - def parse_conditional(self, stream, parent_context): - self._raise_error(MockDirective.MockConditional, parent_context) - - def parse_statement(self, stream, parent_context, terminator): - assert terminator == "Done" - self._raise_error(MockDirective.MockStatement, parent_context) - - -# A valid directive containing a set -def static_directive_set(): - return ([static_token_by_value("Set")], MockDirective.MockSet) - - -# A valid directive containing a conditional -def static_directive_conditional(): - return ([static_token_by_value("If")], MockDirective.MockConditional) - - -# Draws a valid directive containing a statement -@composite -def draw_directive_statement(draw): - return ([draw(draw_token_unknown())], MockDirective.MockStatement) - - -# Draws a valid directive -@composite -def draw_directive_valid(draw): - return draw( - one_of( - [ - just(static_directive_set()), - just(static_directive_conditional()), - draw_directive_statement(), - ] - ) - ) - - -# A simple directive tokens and result -def static_directive_valid(): - return ( - [static_token_by_value("True"), static_token_by_value("Done")], - Statement(Bool(True), None, []), - ) - - -# An invalid directive token -def static_directive_invalid(): - return [static_token_by_value("Done")] - - -# An invalid directive token error -def static_directive_invalid_error(parent_context): - token = static_directive_invalid()[0] - directive_context = ParseContext(ParseTask.PARSE_DIRECTIVE, token, parent_context) - statement_context = ParseContext( - ParseTask.PARSE_STATEMENT, token, directive_context - ) - context = ParseContext(ParseTask.PARSE_SUBJECT, token, statement_context) - return ParseErrorException(ParseError.FOUND_TERMINATOR, token, None, context) - - -# -# Test functions -# - -# Tests parsing a valid directive -# We expect the following behaviour: -# - Sets are detected and parsed -# - Conditionals are detected and parsed -# - Statements are detected and parsed -@given(draw_directive_valid()) -def test_parse_directive_valid(test_data): - (tokens, expected) = test_data - parser = MockParserValid().parse_directive - return template_test_valid(parser, tokens, expected) - - -# Tests parsing an empty directive -# We expect the following behaviour: -# - A NO_TOKEN parse error is raised -# - The error context is PARSE_DIRECTIVE -def test_parse_directive_empty(): - tokens = [] - parent_context = static_parse_context() - context = ParseContext(ParseTask.PARSE_DIRECTIVE, None, parent_context) - error = ParseErrorException(ParseError.NO_TOKEN, None, None, context) - parser = MockParserValid().parse_directive - template_test_invalid(parser, parent_context, tokens, error) - - -# Tests error propagation -# We expect the following behaviour: -# - A mock error is raised for each case -# - Our error context is used for the error -@given(draw_directive_valid()) -def test_parse_directive_error(test_data): - (tokens, expected) = test_data - parent_context = static_parse_context() - context = ParseContext(ParseTask.PARSE_DIRECTIVE, tokens[0], parent_context) - error = ParseErrorException(expected, None, None, context) - parser = MockParserInvalid().parse_directive - template_test_invalid(parser, parent_context, tokens, error) diff --git a/tests/parse/test_error.py b/tests/parse/test_error.py deleted file mode 100644 index 94a325f..0000000 --- a/tests/parse/test_error.py +++ /dev/null @@ -1,285 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-only -# Copyright 2022 Jookia - -# Parse error reporting consists of the following data structures: -# -# ParseTasks represent a task such as parsing a file or directive. -# -# ParseContexts represent a step during the parsing process for the purposes of -# error reporting. -# A context contains: -# - A ParseTask specifying the step -# - The Token the task started at (possibly None) -# - A parent ParseContext (possibly None) if this task is a component -# of another task -# -# ParseErrors represent an error encountered during a parsing task, such as an -# unexpected token or invalid token. -# -# ParseErrorExceptions represent an error encounted during parsing. -# An error exception contains: -# - A ParseError detailing the error -# - The Token the error is at (possibly None) -# - An expected string (possibly None), used if the parser is expecting -# a single, specific token such as a keyword -# - A ParseContext detailing the current parsing task - -from hypothesis import given -from hypothesis.strategies import composite, integers, sampled_from, text - -from src.i18n import Message -from src.parse import ( - ParseContext, - ParseError, - ParseErrorException, - ParseTask, - format_context, - format_exception, - format_full_error, -) -from tests.templates import template_test_structure -from tests.test_token import draw_token_random, static_token - -# -# Helper functions -# - - -# Mapping of error to message identifiers -error_message_ids = { - ParseError.TEST_ERROR: "ParseErrorTestError", - ParseError.NO_TOKEN: "ParseErrorNoToken", - ParseError.WRONG_TOKEN: "ParseErrorWrongToken", - ParseError.FOUND_STARTTEXT: "ParseErrorFoundStartText", - ParseError.FOUND_STARTNOTE: "ParseErrorFoundStartNote", - ParseError.NOT_BOOL: "ParseErrorNotBool", - ParseError.FOUND_ENDNOTE: "ParseErrorFoundEndNote", - ParseError.RESERVED_NAME: "ParseErrorReservedName", - ParseError.FOUND_TERMINATOR: "ParseErrorFoundTerminator", -} - -# Mapping of task to message identifiers -task_message_ids = { - ParseTask.TEST_TASK: "ParseTaskTestTask", - ParseTask.PARSE_NOTE: "ParseTaskNote", - ParseTask.CLEAR_NOTES: "ParseTaskClearNotes", - ParseTask.PARSE_TEXT: "ParseTaskText", - ParseTask.PARSE_BOOL: "ParseTaskBool", - ParseTask.PARSE_REFERENCE: "ParseTaskReference", - ParseTask.PARSE_VALUE: "ParseTaskValue", - ParseTask.PARSE_STATEMENT: "ParseTaskStatement", - ParseTask.PARSE_SUBJECT: "ParseTaskSubject", - ParseTask.PARSE_VERB: "ParseTaskVerb", - ParseTask.PARSE_ARGUMENT: "ParseTaskArgument", - ParseTask.PARSE_SET: "ParseTaskSet", - ParseTask.PARSE_CONDITIONAL: "ParseTaskConditional", - ParseTask.PARSE_TEST: "ParseTaskTest", - ParseTask.PARSE_SUCCESS: "ParseTaskSuccess", - ParseTask.PARSE_FAILURE: "ParseTaskFailure", - ParseTask.PARSE_DIRECTIVE: "ParseTaskDirective", - ParseTask.PARSE_FILE: "ParseTaskFile", -} - - -# Draws a strategy, with 25% of draws being None -@composite -def draw_maybe(draw, strategy): - chance = draw(integers(min_value=1, max_value=4)) - if chance == 1: - return None - else: - return draw(strategy) - - -# Draws a random parse task -@composite -def draw_parse_task(draw): - return draw(sampled_from(ParseTask.list())) - - -# Draws a random parse context without a parent -@composite -def draw_parse_context(draw): - task = draw(draw_parse_task()) - token = draw(draw_maybe(draw_token_random())) - context = draw(draw_maybe(draw_parse_context())) - return ParseContext(task, token, context) - - -# Static parse context -def static_parse_context(): - task = ParseTask.TEST_TASK - token = static_token() - return ParseContext(task, token, None) - - -# Draws a random parse error -@composite -def draw_parse_error(draw): - return draw(sampled_from(ParseError.list())) - - -# Draws a random parse error exception -@composite -def draw_parse_error_exception(draw): - error = draw(draw_parse_error()) - token = draw(draw_maybe(draw_token_random())) - expected = draw(draw_maybe(text())) - context = draw(draw_parse_context()) - return ParseErrorException(error, token, expected, context) - - -# -# Test functions -# - - -# Test parse context structure -@template_test_structure( - ParseContext, - draw_parse_context(), - task=draw_parse_task(), - token=draw_maybe(draw_token_random()), - parent=draw_maybe(draw_parse_context()), -) -def test_parse_context_structure(): - pass - - -# Test parse error exception structure -@template_test_structure( - ParseErrorException, - draw_parse_error_exception(), - error=draw_parse_error(), - token=draw_maybe(draw_token_random()), - expected=draw_maybe(text()), - context=draw_maybe(draw_parse_context()), -) -def test_parse_error_exception_structure(): - pass - - -# Tests formatting a ParseContext -# We expect the following behaviour: -# - A Message is returned -# - The message ID begins with ParseContext -# - The first parameter is task -# - If the token field is set, the ID is appended with -# "At" and the second and third parameters are the token's -# location's line and offset -# Two combinations are possible: -# - Message("ParseContext", [task]) -# - Message("ParseContextAt", [task, file, line, offset]) -# task is a message representing the ParseTask, equivalent to: -# - Message(task_message_ids[context.task], []) -# file is a source file's name -# line is a source file's line number -# offset is a source file's line offset -def _test_parse_error_format_context(context): - task = Message(task_message_ids[context.task], []) - has_location = context.token is not None - if has_location: - file = context.token.location.file - line = context.token.location.line - offset = context.token.location.offset - expected = Message("ParseContextAt", [task, file, line, offset]) - else: - expected = Message("ParseContext", [task]) - value = format_context(context) - assert expected == value - - -# Tests formatting with a random ParseContext -@given(draw_parse_context()) -def test_parse_error_format_context(context): - _test_parse_error_format_context(context) - - -# Tests formatting with each ParseTask -@given(draw_parse_task()) -def test_parse_error_format_parse_task(task): - context = ParseContext(task, None, None) - _test_parse_error_format_context(context) - - -# Tests formatting a ParseErrorException -# We expect the following behaviour: -# - A Message is returned -# - The message ID begins with ParseError -# - If the expected field is set, the ID is appended with -# "Expected" and the first parameter is the expected value -# - Otherwise the first parameter is the error -# - If the token field is set, the ID is appended with -# "At" and the second and third parameters are the token's -# location's line and offset -# Four combinations are possible: -# - Message("ParserError", [error]) -# - Message("ParserErrorAt", [error, file, line, offset]) -# - Message("ParserErrorExpected", [expected]) -# - Message("ParserErrorExpectedAt", [expected, file, line, offset]) -# error is a message representing the ParseError, equivalent to: -# - Message(error_message_ids[exception.error], []) -# file is a source file's name -# line is a source file's line number -# offset is a source file's line offset -def _test_parse_error_format_exception(exception): - has_expected = exception.expected is not None - has_location = exception.token is not None - # Variables used for message parameters - err = Message(error_message_ids[exception.error], []) - expect = exception.expected - if has_location: - file = exception.token.location.file - line = exception.token.location.line - offset = exception.token.location.offset - else: - file = None - line = None - offset = None - # Truth table used for message lookup - # Indexes are has_expected and has_location - messages = [ - # Cases without an expected token: - [ - Message("ParserError", [err]), - Message("ParserErrorAt", [err, file, line, offset]), - ], - # Cases with an expected token: - [ - Message("ParserErrorExpected", [expect]), - Message("ParserErrorExpectedAt", [expect, file, line, offset]), - ], - ] - expected = messages[has_expected][has_location] - value = format_exception(exception) - assert expected == value - - -# Tests formatting with a random ParseErrorException -@given(draw_parse_error_exception()) -def test_parse_error_format_exception(exception): - _test_parse_error_format_exception(exception) - - -# Tests formatting with each ParseError -@given(draw_parse_error()) -def test_parse_error_format_parse_error(error): - exception = ParseErrorException(error, None, None, static_parse_context()) - _test_parse_error_format_exception(exception) - - -# Tests formatting a full error -# We expect the following behaviour: -# - An array of Messages are returned -# - The first message is the exception formatted -# - The second messages is the exception's context formatted -# - Subsequent messages are of any context decedents formatted -@given(draw_parse_error_exception()) -def test_parse_error_format_full_error(exception): - expected = [format_exception(exception)] - context = exception.context - while context is not None: - expected.append(format_context(context)) - context = context.parent - value = format_full_error(exception) - assert expected == value diff --git a/tests/parse/test_file.py b/tests/parse/test_file.py deleted file mode 100644 index db27b21..0000000 --- a/tests/parse/test_file.py +++ /dev/null @@ -1,74 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-only -# Copyright 2022 Jookia - -# File syntax consists of the following: -# - One or more directives -# -# Parsing gives the following: -# A list of directives - All directives in the file -# -# The following error contexts are used: -# PARSE_FILE - Used when parsing the file -# -# No parse errors are generated. - -from hypothesis import given -from hypothesis.strategies import composite, just, lists - -from src.token import TokenStream -from src.parse import ( - ParseContext, - ParseTask, - parse_file, -) -from tests.parse.templates import template_test_invalid -from tests.parse.test_error import static_parse_context -from tests.parse.test_directive import ( - static_directive_valid, - static_directive_invalid, - static_directive_invalid_error, -) - - -# A valid file -@composite -def draw_file_valid(draw): - directives = draw(lists(just(static_directive_valid()))) - all_tokens = [] - all_expected = [] - for (tokens, expected) in directives: - all_tokens += tokens - all_expected.append(expected) - return (all_tokens, all_expected) - - -# -# Test functions -# - -# Tests parsing a valid file -# We expect the following behaviour: -# - All directives are parsed -# - No tokens are left after parsing -@given(draw_file_valid()) -def test_parse_file_valid(test_data): - (tokens, expected) = test_data - stream = TokenStream(tokens.copy()) - parsed = parse_file(stream, None) - assert parsed == expected - assert stream.pop() is None - - -# Tests parsing a invalid file -# We expect the following behaviour: -# - The error context is PARSE_FILE -# - A wrong directive error is propagated -@given(draw_file_valid()) -def test_parse_file_invalid(test_data): - (tokens, expected) = test_data - invalid_directive = static_directive_invalid() - new_tokens = tokens + invalid_directive - parent_context = static_parse_context() - context = ParseContext(ParseTask.PARSE_FILE, new_tokens[0], parent_context) - error = static_directive_invalid_error(context) - template_test_invalid(parse_file, parent_context, new_tokens, error) diff --git a/tests/parse/test_note.py b/tests/parse/test_note.py deleted file mode 100644 index 12e5e92..0000000 --- a/tests/parse/test_note.py +++ /dev/null @@ -1,154 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-only -# Copyright 2022 Jookia - -# Note syntax consists of the following: -# - StartNote -# - Optionally any token that isn't EndNote -# - EndNote -# -# Parsing gives the following: -# No value -# -# The following error contexts are used: -# PARSE_NOTE - Used when parsing the file -# -# The following parse errors are generated: -# FOUND_ENDNOTE - When a stray EndNote token is found - -from hypothesis import assume, given -from hypothesis.strategies import ( - composite, - lists, -) - -from src.parse import ( - skip_note, - ParseContext, - ParseError, - ParseErrorException, - ParseTask, -) -from tests.parse.templates import ( - draw_random_within, - template_test_invalid, - template_test_valid, -) -from tests.parse.test_error import static_parse_context -from tests.test_token import ( - static_token_by_value, - draw_token_random, -) - -# -# Helper functions -# - - -# Static tokens representing a note -def static_note_tokens(): - return [static_token_by_value("StartNote"), static_token_by_value("EndNote")] - - -# An invalid note token -def static_note_invalid(): - return [static_token_by_value("StartNote")] - - -# An invalid note token error -def static_note_invalid_error(parent_context): - token = static_note_invalid()[0] - note_context = ParseContext(ParseTask.PARSE_NOTE, token, parent_context) - return ParseErrorException(ParseError.NO_TOKEN, None, None, note_context) - - -# Draws a random token suitable for note building -@composite -def draw_note_value_token(draw): - token = draw(draw_token_random()) - assume(token.value not in ["StartNote", "EndNote"]) - return token - - -# Draws a random token that isn't a StartNote token -@composite -def draw_token_not_startnote(draw): - token = draw(draw_token_random()) - assume(token.value != "StartNote") - return token - - -# Draws tokens to make a valid note -@composite -def draw_token_note_valid(draw): - tokens = draw(lists(draw_note_value_token())) - start = static_token_by_value("StartNote") - end = static_token_by_value("EndNote") - all_tokens = [start] + tokens + [end] - return all_tokens - - -# -# Test functions -# - - -# Tests skip_note works correctly -# We expect the following behaviour: -# - No value is returned -# template_test provides general parsing properties -@given(draw_token_note_valid()) -def test_parse_note_valid(tokens): - template_test_valid(skip_note, tokens, None) - - -# Tests parsing notes without StartNote -# We expect the following behaviour: -# - Error if StartNote's token value is not "StartNote" -# - Have ParseError.WRONG_TOKEN as the exception code -# - Have ParseTask.PARSE_NOTE as the context's parse task -@given(draw_token_note_valid(), draw_token_not_startnote()) -def test_parse_note_invalid_nostartnote(tokens, token): - new_tokens = [token] + tokens[1:0] - parent_context = static_parse_context() - context = ParseContext(ParseTask.PARSE_NOTE, new_tokens[0], parent_context) - error = ParseErrorException(ParseError.WRONG_TOKEN, token, "StartNote", context) - template_test_invalid(skip_note, parent_context, new_tokens, error) - - -# Tests parsing empty notes -# We expect the following behaviour: -# - Error if there is no StartNote token at all -# - Have ParseError.NO_TOKEN as the exception code -# - Have ParseTask.PARSE_NOTE as the context's parse task -def test_parse_note_invalid_empty(): - parent_context = static_parse_context() - context = ParseContext(ParseTask.PARSE_NOTE, None, parent_context) - error = ParseErrorException(ParseError.NO_TOKEN, None, None, context) - template_test_invalid(skip_note, parent_context, [], error) - - -# Tests parsing a note with a StartNote token in it -# We expect the following behaviour: -# - Error if a StartNote token is in the note content -# - Have ParseTask.PARSE_NOTE as the context's parse task -@given(draw_random_within(draw_token_note_valid(), "StartNote")) -def test_parse_note_invalid_extrastartnote(within): - (tokens, start) = within - parent_context = static_parse_context() - context = ParseContext(ParseTask.PARSE_NOTE, tokens[0], parent_context) - error = ParseErrorException(ParseError.FOUND_STARTNOTE, start, None, context) - template_test_invalid(skip_note, parent_context, tokens, error) - - -# Tests parsing a note without an EndNote token -# We expect the following behaviour: -# - Error if there is no EndNote token at all -# - Have ParseError.NO_TOKEN as the exception code -# - Have ParseTask.PARSE_NOTE as the context's parse task -@given(draw_token_note_valid()) -def test_parse_note_invalid_noendnote(tokens): - new_tokens = tokens[0:-1] - parent_context = static_parse_context() - context = ParseContext(ParseTask.PARSE_NOTE, tokens[0], parent_context) - error = ParseErrorException(ParseError.NO_TOKEN, None, None, context) - template_test_invalid(skip_note, parent_context, new_tokens, error) diff --git a/tests/parse/test_parse.py b/tests/parse/test_parse.py deleted file mode 100644 index edd55db..0000000 --- a/tests/parse/test_parse.py +++ /dev/null @@ -1,37 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-only -# Copyright 2022 Jookia - -from hypothesis import given -from hypothesis.strategies import lists - -from src.parse import ( - clear_notes, - ParseErrorException, - parse, - parse_file, -) -from src.token import TokenStream -from tests.test_token import draw_token_random -from tests.parse.test_error import draw_parse_context - - -# Tests the parser wrapper works correctly -# We expect the following behaviour: -# - Notes to be removed from the tokens -# - The remaining tokens to be parsed as a file -@given(lists(draw_token_random()), draw_parse_context()) -def test_parse_fuzz(tokens, context): - result = None - parsed = None - try: - stream = TokenStream(tokens.copy()) - cleared = clear_notes(stream, context) - stream2 = TokenStream(cleared) - result = parse_file(stream2, context) - except ParseErrorException as e: - result = e - try: - parsed = parse(tokens.copy(), context) - except ParseErrorException as e: - parsed = e - assert parsed == result diff --git a/tests/parse/test_reference.py b/tests/parse/test_reference.py deleted file mode 100644 index ec3d1eb..0000000 --- a/tests/parse/test_reference.py +++ /dev/null @@ -1,53 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-only -# Copyright 2022 Jookia - -from hypothesis import given -from hypothesis.strategies import composite - -from src.ast_types import Reference -from src.parse import ParseContext, ParseError, ParseErrorException, ParseTask, Parser -from tests.parse.templates import template_test_valid, template_test_invalid -from tests.parse.test_error import static_parse_context -from tests.test_token import draw_token_known, draw_token_unknown - - -# Draws tokens to make a reference -@composite -def draw_token_reference_valid(draw): - token = draw(draw_token_unknown()) - return ([token], Reference(token.value)) - - -# Tests parse_reference works correctly -# We expect the following behaviour: -# - The resulting reference has the token's value -# template_test_valid provides general parsing properties -@given(draw_token_reference_valid()) -def test_parse_reference_valid(valid_data): - (tokens, expected) = valid_data - template_test_valid(Parser().parse_reference, tokens, expected) - - -# Tests parsing a reference with a reserved name errors -# We expect the following behaviour: -# - Error if a keyword or literal is encountered -# - Have ParseError.RESERVED_NAME as the exception code -# - Have ParseTask.PARSE_REFERENCE as the context's parse task -@given(draw_token_known()) -def test_parse_reference_invalid_name(token): - parent_context = static_parse_context() - context = ParseContext(ParseTask.PARSE_REFERENCE, token, parent_context) - error = ParseErrorException(ParseError.RESERVED_NAME, token, None, context) - template_test_invalid(Parser().parse_reference, parent_context, [token], error) - - -# Tests parsing of empty references -# We expect the following behaviour: -# - Error if there isn't a token -# - Have ParseError.NO_TOKEN as the exception code -# - Have ParseTask.PARSE_REFERENCE as the context's parse task -def test_parse_reference_invalid_empty(): - parent_context = static_parse_context() - context = ParseContext(ParseTask.PARSE_REFERENCE, None, parent_context) - error = ParseErrorException(ParseError.NO_TOKEN, None, None, context) - template_test_invalid(Parser().parse_reference, parent_context, [], error) diff --git a/tests/parse/test_set.py b/tests/parse/test_set.py deleted file mode 100644 index 809c4c2..0000000 --- a/tests/parse/test_set.py +++ /dev/null @@ -1,218 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-only -# Copyright 2022 Jookia - -# Set syntax consists of the following tokens: -# - "Set" -# - Name, a value that isn't a keyword -# - "To" -# - A statement, terminated by "EndSet" -# -# Parsing gives a Set data structure containing: -# subject - The value of Name -# statement - The parsed statement -# -# The following cases are errors: -# - Not having enough tokens to parse -# - Set not being the literal "Set" -# - Name being a keyword -# - To not being the literal "To" -# - The statement not parsing correctly -# -# The following error contexts are used: -# PARSE_SET - Used when parsing the general syntax -# PARSE_SUBJECT - Used when parsing the subject -# -# The following parse errors are generated: -# NO_TOKEN - When there's not enough tokens -# WRONG_TOKEN - When Set or To aren't the correct values -# RESERVED_NAME - When Name is not a keyword -# -# The following parsers are used and have their errors -# and data structures propagated: -# parse_statement - Used with "EndSet" terminator for the statement - -import enum - -from hypothesis import assume, given -from hypothesis.strategies import composite, data, integers - -from src.ast_types import Set -from src.parse import ( - ParseContext, - ParseError, - ParseErrorException, - ParseTask, - Parser, - read_token, -) -from tests.parse.templates import ( - template_test_valid, - template_test_invalid, -) -from tests.test_token import ( - draw_token_known, - draw_token_random, - draw_token_unknown, - static_token_by_value, -) -from tests.parse.test_error import static_parse_context - -# -# Helper functions -# - - -# Values used by the mocked parser -class MockStatement(enum.Enum): - MockValue = enum.auto() - - -# Mocks and tests the parse_statement parser -# Instead of parsing a complex statement it just parses -# a single token: MockStatement -# The terminator is required to be "EndSet" -class MockParser(Parser): - def parse_statement(self, stream, parent_context, terminator): - assert terminator == "EndSet" - read_token(stream, "MockStatement", parent_context) - return MockStatement.MockValue - - -# Draws a valid set expression and tokens -@composite -def draw_set_valid_tokens(draw): - subject = draw(draw_token_unknown()) - tokens = [ - static_token_by_value("Set"), - subject, - static_token_by_value("To"), - static_token_by_value("MockStatement"), - ] - expected = Set(subject.value, MockStatement.MockValue) - return (tokens, expected) - - -# Calculates the parse context for a specific token in a set expression -def context_at(parent_context, tokens, index): - max = len(tokens) - 1 - if max == -1: - start = None - token = None - elif max < index: - start = tokens[0] - token = None - else: - start = tokens[0] - token = tokens[index] - context = ParseContext(ParseTask.PARSE_SET, start, parent_context) - if index == 1: - subcontext = ParseContext(ParseTask.PARSE_SUBJECT, token, context) - return subcontext - else: - return context - - -# -# Test functions -# - -# Tests parsing a valid statement -# We expect the following behaviour: -# - The name is read as the subject -# - The statement is read as the statement -@given(draw_set_valid_tokens()) -def test_parse_set_valid(test_data): - (tokens, expected) = test_data - parser = MockParser().parse_set - return template_test_valid(parser, tokens, expected) - - -# Tests parsing a truncated statement -# We expect the following behaviour: -# - A NO_TOKEN parse error is raised -# - The error context is PARSE_SET -# - The subject has its own subcontext, PARSE_SUBJECT -@given(data()) -def test_parse_set_short(data): - (tokens, _) = data.draw(draw_set_valid_tokens(), label="valid data") - new_len = data.draw( - integers(min_value=0, max_value=(len(tokens) - 1)), label="shorten point" - ) - short_tokens = tokens[0:new_len] - parent_context = static_parse_context() - context = context_at(parent_context, short_tokens, new_len) - error = ParseErrorException(ParseError.NO_TOKEN, None, None, context) - parser = MockParser().parse_set - template_test_invalid(parser, parent_context, short_tokens, error) - - -# Tests parsing an invalid "Set" -# We expect the following behaviour: -# - A WRONG_TOKEN parse error is raised -# - The error context is PARSE_SET -# - The token "Set" is expected -@given(data()) -def test_parse_set_wrong_set(data): - (tokens, _) = data.draw(draw_set_valid_tokens(), label="valid data") - new_set = data.draw(draw_token_random(), label="new set") - assume(new_set.value != "Set") - new_tokens = [new_set] + tokens[1:] - parent_context = static_parse_context() - context = context_at(parent_context, new_tokens, 0) - error = ParseErrorException(ParseError.WRONG_TOKEN, new_set, "Set", context) - parser = MockParser().parse_set - template_test_invalid(parser, parent_context, new_tokens, error) - - -# Tests parsing an invalid "To" -# We expect the following behaviour: -# - A WRONG_TOKEN parse error is raised -# - The error context is PARSE_SET -# - The token "To" is expected -@given(data()) -def test_parse_set_wrong_to(data): - (tokens, _) = data.draw(draw_set_valid_tokens(), label="valid data") - new_to = data.draw(draw_token_random(), label="new to") - assume(new_to.value != "To") - new_tokens = tokens[0:2] + [new_to] + tokens[3:] - parent_context = static_parse_context() - context = context_at(parent_context, new_tokens, 2) - error = ParseErrorException(ParseError.WRONG_TOKEN, new_to, "To", context) - parser = MockParser().parse_set - template_test_invalid(parser, parent_context, new_tokens, error) - - -# Tests parsing an invalid name -# We expect the following behaviour: -# - A WRONG_TOKEN parse error is raised -# - The error context is PARSE_SET -# - The token "To" is expected -@given(data()) -def test_parse_set_wrong_name(data): - (tokens, _) = data.draw(draw_set_valid_tokens(), label="valid data") - new_name = data.draw(draw_token_known(), label="new name") - new_tokens = tokens[0:1] + [new_name] + tokens[2:] - parent_context = static_parse_context() - context = context_at(parent_context, new_tokens, 1) - error = ParseErrorException(ParseError.RESERVED_NAME, new_name, None, context) - parser = MockParser().parse_set - template_test_invalid(parser, parent_context, new_tokens, error) - - -# Tests parsing an invalid statement -# We expect the following behaviour: -# - A WRONG_TOKEN parse error is raised by the mock parser -# - The error context is PARSE_SET -# - Our error context is retained by parse_statement -@given(data()) -def test_parse_set_wrong_statement(data): - (tokens, _) = data.draw(draw_set_valid_tokens(), label="valid data") - new_statement = static_token_by_value("NotStatement") - new_tokens = tokens[0:3] + [new_statement] - parent_context = static_parse_context() - context = context_at(parent_context, new_tokens, 3) - error = ParseErrorException( - ParseError.WRONG_TOKEN, new_statement, "MockStatement", context - ) - parser = MockParser().parse_set - template_test_invalid(parser, parent_context, new_tokens, error) diff --git a/tests/parse/test_statement.py b/tests/parse/test_statement.py deleted file mode 100644 index 94ea8f3..0000000 --- a/tests/parse/test_statement.py +++ /dev/null @@ -1,227 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-only -# Copyright 2022 Jookia - -import enum - -from hypothesis import assume, given -from hypothesis.strategies import composite, integers, lists - -from src.ast_types import Statement -from src.parse import ( - ParseContext, - ParseError, - ParseErrorException, - ParseTask, - Parser, - read_token, -) -from tests.parse.templates import ( - template_test_valid, - template_test_invalid, -) -from tests.parse.test_error import static_parse_context -from tests.test_token import ( - draw_token_known, - draw_token_random, - draw_token_unknown, - static_token_by_value, -) - - -# Values indicating what a parser did -class ParserMockAction(enum.Enum): - PARSE_VALUE = enum.auto() - WRONG_VALUE = enum.auto() - - -# Dummy Parser for testing statement parsing -# Return a static value of ParserMockACTION.PARSE_VALUE if a token starts with "TestValue" -# Otherwise throw an error of ParseMockAction.WRONG_VALUE -class ParserStatementMock(Parser): - def parse_value(self, stream, context): - token = read_token(stream, None, context) - if token.value.startswith("TestValue"): - return ParserMockAction.PARSE_VALUE - else: - raise ParseErrorException( - ParserMockAction.WRONG_VALUE, token, None, context - ) - - -# Creates a dummy parse function with a terminator specified by the last token -def make_test_parser(tokens): - def parser(stream, context): - if tokens == []: - terminator = "" - else: - terminator = tokens[-1].value - return ParserStatementMock().parse_statement(stream, context, terminator) - - return parser - - -# Draws a statement value with a somewhat random name -@composite -def draw_token_statement_value(draw): - number = draw(integers()) - return static_token_by_value("TestValue" + str(number)) - - -# Draws a statement name -@composite -def draw_token_statement_name(draw): - return draw(draw_token_unknown()) - - -# Draws a statement terminator -@composite -def draw_token_statement_terminator(draw): - return draw(draw_token_random()) - - -# Creates a context for a token in a statement -def make_test_context(parent_context, index, statement_token, token): - statement_context = ParseContext( - ParseTask.PARSE_STATEMENT, statement_token, parent_context - ) - if index == 0: - context = ParseTask.PARSE_SUBJECT - elif index == 1: - context = ParseTask.PARSE_VERB - else: - context = ParseTask.PARSE_ARGUMENT - context = ParseContext(context, token, statement_context) - return context - - -# Creates a context using existing tokens -def make_test_context_tokens(parent_context, index, tokens): - return make_test_context(parent_context, index, tokens[0], tokens[index]) - - -# Draws a valid statement's tokens -@composite -def draw_token_statement(draw): - values = draw(lists(draw_token_statement_value(), min_size=1)) - subject = values[0] - verb = [] - if len(values) > 1: - verb = [draw(draw_token_statement_name())] - arguments = values[2:] - terminator = draw(draw_token_statement_terminator()) - assume(terminator not in values) - assume(terminator not in verb) - tokens = [subject] + verb + arguments + [terminator] - return tokens - - -# Draws a valid statement -@composite -def draw_token_statement_valid(draw): - tokens = draw(draw_token_statement()) - subject = ParserMockAction.PARSE_VALUE - verb = None - # Account for the terminator - if len(tokens) > 2: - verb = tokens[1].value - argument_count = len(tokens) - 3 - arguments = [ParserMockAction.PARSE_VALUE] * argument_count - statement = Statement(subject, verb, arguments) - return (tokens, statement) - - -# Draws a statement with an invalid subject or argument -@composite -def draw_token_statement_invalid_value(draw): - tokens = draw(draw_token_statement()) - new_token = draw(draw_token_random()) - assume(not new_token.value.startswith("TestValue")) # Not a value - assume(new_token.value != tokens[-1].value) # Not the terminator - max_position = len(tokens) - 2 # Ignore Terminator - position = draw(integers(min_value=0, max_value=max_position)) - assume(position != 1) # Skip Verb - new_tokens = tokens[:position] + [new_token] + tokens[position + 1 :] - return (new_tokens, new_token, position) - - -# Tests parsing a valid statement -# We expect the following behaviour: -# - A value is read as the subject -# - Optionally, a name is read as the verb -# - Optionally, any number of arguments are read as values -# - A terminator is found afterwards -# template_test_valid provides general parsing properties -@given(draw_token_statement_valid()) -def test_parse_statement_valid(valid_data): - (tokens, expected) = valid_data - parser = make_test_parser(tokens) - template_test_valid(parser, tokens, expected) - - -# Tests parsing a statement without a terminator -# This also covers cases of premature truncation for verbs and arguments -# We expect the following behaviour: -# - Error reading a verb or argument -# - Have ParseTask.PARSE_VERB or ParseTask.PARSE_ARGUMENT as the context's parse task -# - Have ParseTask.PARSE_STATEMENT as the context's parse task's parent -# - Have ParseError.NO_TOKEN as the exception code -# template_test_invalid provides general parsing properties -@given(draw_token_statement()) -def test_parse_statement_invalid_no_terminator(tokens): - truncated = tokens[:-1] - parent_context = static_parse_context() - context = make_test_context(parent_context, len(truncated), tokens[0], None) - error = ParseErrorException(ParseError.NO_TOKEN, None, None, context) - parser = make_test_parser(tokens) - template_test_invalid(parser, parent_context, truncated, error) - - -# Tests parsing a statement with an invalid value -# We expect the following behaviour: -# - Error reading a invalid value on subject or argument -# - Have ParseTask.PARSE_SUBJECT or ParseTask.PARSE_ARGUMENT as the context's parse task -# - Have ParseTask.PARSE_STATEMENT as the context's parse task's parent -# - Have ParserMockAction.WRONG_VALUE as the exception code -# template_test_invalid provides general parsing properties -@given(draw_token_statement_invalid_value()) -def test_parse_statement_invalid_value(invalid): - (new_tokens, new_value, position) = invalid - parent_context = static_parse_context() - context = make_test_context_tokens(parent_context, position, new_tokens) - error = ParseErrorException(ParserMockAction.WRONG_VALUE, new_value, None, context) - parser = make_test_parser(new_tokens) - template_test_invalid(parser, parent_context, new_tokens, error) - - -# Tests parsing a statement with an invalid verb -# We expect the following behaviour: -# - Error reading a known token as a verb -# - Have ParseTask.PARSE_VERB as the context's parse task -# - Have ParseTask.PARSE_STATEMENT as the context's parse task's parent -# - Have ParseError.RESERVED_NAME as the exception code -# template_test_invalid provides general parsing properties -@given(draw_token_statement(), draw_token_known()) -def test_parse_statement_invalid_verb(tokens, new_token): - assume(new_token.value != tokens[-1].value) - new_tokens = tokens[:1] + [new_token] + tokens[1:] - parent_context = static_parse_context() - context = make_test_context_tokens(parent_context, 1, new_tokens) - error = ParseErrorException(ParseError.RESERVED_NAME, new_token, None, context) - parser = make_test_parser(new_tokens) - template_test_invalid(parser, parent_context, new_tokens, error) - - -# Tests parsing an empty statement -# We expect the following behaviour: -# - Error reading an empty statement -# - Have ParseTask.PARSE_SUBJECT as the context's parse task -# - Have ParseTask.PARSE_STATEMENT as the context's parse task's parent -# - Have ParserError.NO_TOKEN as the exception code -# template_test_invalid provides general parsing properties -def test_parse_statement_invalid_empty(): - tokens = [] - parent_context = static_parse_context() - context = make_test_context(parent_context, 0, None, None) - error = ParseErrorException(ParseError.NO_TOKEN, None, None, context) - parser = make_test_parser(tokens) - template_test_invalid(parser, parent_context, tokens, error) diff --git a/tests/parse/test_text.py b/tests/parse/test_text.py deleted file mode 100644 index 9bbf587..0000000 --- a/tests/parse/test_text.py +++ /dev/null @@ -1,132 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-only -# Copyright 2022 Jookia - -from hypothesis import assume, given -from hypothesis.strategies import ( - composite, - lists, -) - -from src.ast_types import Text -from src.parse import ( - ParseContext, - ParseError, - ParseErrorException, - ParseTask, - Parser, -) -from tests.parse.templates import ( - draw_random_within, - template_test_invalid, - template_test_valid, -) -from tests.parse.test_error import static_parse_context -from tests.test_token import ( - static_token_by_value, - draw_token_random, -) - - -# Draws a random token suitable for text building -@composite -def draw_text_value_token(draw): - token = draw(draw_token_random()) - assume(token.value not in ["StartText", "EndText"]) - return token - - -# Draws a random token that isn't StartText token -@composite -def draw_token_not_starttext(draw): - token = draw(draw_token_random()) - assume(token.value != "StartText") - return token - - -# Draws tokens to make a valid text string and its value -@composite -def draw_token_text_valid(draw): - tokens = draw(lists(draw_text_value_token())) - buffer = "" - for token in tokens: - buffer += token.value + " " - value = buffer[:-1] # Drop trailing space - start = static_token_by_value("StartText") - end = static_token_by_value("EndText") - all_tokens = [start] + tokens + [end] - return (all_tokens, Text(value)) - - -# Draws just the tokens of a valid text string -@composite -def draw_token_text_valid_tokens(draw): - (tokens, _) = draw(draw_token_text_valid()) - return tokens - - -# Tests parse_text works correctly -# We expect the following behaviour: -# - The resulting text is the value of tokens between StartText and EndText -# - The value of the tokens is joined by U+0020 SPACE code points -# - The Token's value is the resulting text -# template_test_valid provides general parsing properties -@given(draw_token_text_valid()) -def test_parse_text_valid(valid_data): - (tokens, expected) = valid_data - template_test_valid(Parser().parse_text, tokens, expected) - - -# Test parsing text without StartText -# We expect the following behaviour: -# - Error if StartText's token value is not "StartText" -# - Have ParseError.PARSE_TEXT as the exception code -# - Have ParseTask.PARSE_TEXT as the context's parse task -@given(draw_token_text_valid_tokens(), draw_token_not_starttext()) -def test_parse_text_invalid_nostarttext(tokens, not_starttext): - new_tokens = [not_starttext] + tokens[1:0] - parent_context = static_parse_context() - context = ParseContext(ParseTask.PARSE_TEXT, new_tokens[0], parent_context) - error = ParseErrorException( - ParseError.WRONG_TOKEN, not_starttext, "StartText", context - ) - template_test_invalid(Parser().parse_text, parent_context, new_tokens, error) - - -# Tests parsing empty text -# We expect the following behaviour: -# - Error if there is no StartText token at all -# - Have ParseError.NO_TOKEN as the exception code -# - Have ParseTask.PARSE_TEXT as the context's parse task -def test_parse_text_invalid_empty(): - parent_context = static_parse_context() - context = ParseContext(ParseTask.PARSE_TEXT, None, parent_context) - error = ParseErrorException(ParseError.NO_TOKEN, None, None, context) - template_test_invalid(Parser().parse_text, parent_context, [], error) - - -# Tests parsing text with a StartText token in it -# We expect the following behaviour: -# - Error if a StartText token is in the text content -# - Have ParseError.FOUND_STARTTEXT as the exception code -# - Have ParseTask.PARSE_TEXT as the context's parse task -@given(draw_random_within(draw_token_text_valid_tokens(), "StartText")) -def test_parse_text_invalid_extrastarttext(within): - (tokens, start) = within - parent_context = static_parse_context() - context = ParseContext(ParseTask.PARSE_TEXT, tokens[0], parent_context) - error = ParseErrorException(ParseError.FOUND_STARTTEXT, start, None, context) - template_test_invalid(Parser().parse_text, parent_context, tokens, error) - - -# Tests parsing text without an EndText token -# We expect the following behaviour: -# - Error if there is no EndText token at all -# - Have ParseError.NO_TOKEN as the exception code -# - Have ParseTask.PARSE_TEXT as the context's parse task -@given(draw_token_text_valid_tokens()) -def test_parse_text_invalid_noendtext(tokens): - new_tokens = tokens[0:-1] - parent_context = static_parse_context() - context = ParseContext(ParseTask.PARSE_TEXT, tokens[0], parent_context) - error = ParseErrorException(ParseError.NO_TOKEN, None, None, context) - template_test_invalid(Parser().parse_text, parent_context, new_tokens, error) diff --git a/tests/parse/test_value.py b/tests/parse/test_value.py deleted file mode 100644 index f05b936..0000000 --- a/tests/parse/test_value.py +++ /dev/null @@ -1,147 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-only -# Copyright 2022 Jookia - -import enum - -from hypothesis import assume, given -from hypothesis.strategies import composite, just, one_of - -from src.parse import ParseContext, ParseError, ParseErrorException, ParseTask, Parser -from tests.parse.templates import template_test_valid, template_test_invalid -from tests.parse.test_error import static_parse_context -from tests.test_token import ( - draw_token_keyword, - draw_token_unknown, - static_token_by_value, -) - - -# Values indicating what a parser did -class ParserMockAction(enum.Enum): - PARSE_BOOL = enum.auto() - PARSE_TEXT = enum.auto() - PARSE_REFERENCE = enum.auto() - - -# Dummy Parser for testing value parsing -# Instead of actually parsing values just return a static value to show what -# the parser would normally do -class ParserValueMockValid(Parser): - def parse_bool(self, stream, context): - stream.pop() - return ParserMockAction.PARSE_BOOL - - def parse_text(self, stream, context): - stream.pop() - return ParserMockAction.PARSE_TEXT - - def parse_reference(self, stream, context): - stream.pop() - return ParserMockAction.PARSE_REFERENCE - - -# Dummy Parser for testing error propagation -# Uses parser mock values as errors -class ParserValueMockError(Parser): - def parse_bool(self, stream, context): - raise ParseErrorException(ParserMockAction.PARSE_BOOL, None, None, context) - - def parse_text(self, stream, context): - raise ParseErrorException(ParserMockAction.PARSE_TEXT, None, None, context) - - def parse_reference(self, stream, context): - raise ParseErrorException(ParserMockAction.PARSE_REFERENCE, None, None, context) - - -# Generates a strategy for a valid word and parser action -def token_and_action(word, action): - return just(([static_token_by_value(word)], action)) - - -# Draws tokens for values based on literals -@composite -def draw_token_value_literal(draw): - strategies = [ - token_and_action("True", ParserMockAction.PARSE_BOOL), - token_and_action("False", ParserMockAction.PARSE_BOOL), - token_and_action("StartText", ParserMockAction.PARSE_TEXT), - ] - return draw(one_of(strategies)) - - -# Draws tokens to make a value based on a reference -@composite -def draw_token_value_reference(draw): - token = draw(draw_token_unknown()) - return ([token], ParserMockAction.PARSE_REFERENCE) - - -# Draws tokens and valid value for a valid value -@composite -def draw_token_value_valid(draw): - strategies = [ - draw_token_value_literal(), - draw_token_value_reference(), - ] - return draw(one_of(strategies)) - - -# Tests parsing a literal value -# We expect the following behaviour: -# - parse_value parses a Bool if it sees True or False -# - parse_value parses a Text if it sees StartText -# template_test_valid provides general parsing properties -@given(draw_token_value_literal()) -def test_parse_value_literal(literal): - (tokens, expected) = literal - template_test_valid(ParserValueMockValid().parse_value, tokens, expected) - - -# Tests parsing a reference value -# We expect the following behaviour: -# - parse_value parses a Reference if it sees an unknown value -# template_test_valid provides general parsing properties -@given(draw_token_value_reference()) -def test_parse_value_reference(reference): - (tokens, expected) = reference - template_test_valid(ParserValueMockValid().parse_value, tokens, expected) - - -# Tests parsing a keyword as a value fails -# We expect the following behaviour: -# - Error if a keyword is encountered -# - Have ParseError.RESERVED_NAME as the exception code -# - Have ParseTask.PARSE_VALUE as the context's parse task -@given(draw_token_keyword()) -def test_parse_value_invalid_name(token): - assume(token.value != "StartText") - parent_context = static_parse_context() - context = ParseContext(ParseTask.PARSE_VALUE, token, parent_context) - error = ParseErrorException(ParseError.RESERVED_NAME, token, None, context) - template_test_invalid(Parser().parse_value, parent_context, [token], error) - - -# Tests parsing empty value -# We expect the following behaviour: -# - Have ParseError.NO_TOKEN as the exception code -# - Have ParseTask.PARSE_VALUE as the context's parse task -def test_parse_value_invalid_empty(): - parent_context = static_parse_context() - context = ParseContext(ParseTask.PARSE_VALUE, None, parent_context) - error = ParseErrorException(ParseError.NO_TOKEN, None, None, context) - template_test_invalid(Parser().parse_value, parent_context, [], error) - - -# Tests parse_value error propagation -# We expect the following behaviour: -# - Errors from parsing are propagated and have the correct context -# - Have ParseTask.PARSE_VALUE as the context's parse task -@given(draw_token_value_valid()) -def test_parse_value_error_propagation(valid_data): - (tokens, action) = valid_data - parent_context = static_parse_context() - context = ParseContext(ParseTask.PARSE_VALUE, tokens[0], parent_context) - error = ParseErrorException(action, None, None, context) - template_test_invalid( - ParserValueMockError().parse_value, parent_context, tokens, error - ) diff --git a/tests/parse2/templates.py b/tests/parse2/templates.py new file mode 100644 index 0000000..89dde15 --- /dev/null +++ b/tests/parse2/templates.py @@ -0,0 +1,51 @@ +# SPDX-License-Identifier: LGPL-2.1-only +# Copyright 2022 Jookia + +from hypothesis.strategies import composite, integers + +from src.parse2.parse import ParseErrorException +from src.parse2.token import TokenStream +from tests.parse2.test_token import static_token_by_value + + +# Draws tokens with an element randomly between the first and last token +# Returns the new list and new token +@composite +def draw_random_within(draw, source, new): + list = draw(source) + data = static_token_by_value(new) + pos = draw(integers(min_value=1, max_value=(len(list) - 1))) + new_data = list[0:pos] + [data] + list[pos:] + return (new_data, data) + + +# Tests that something parses correctly +# We expect the following behaviour: +# - The parse function generates the expected output +# - The parse function doesn't consume extra tokens +def template_test_valid(parser, tokens, expected): + canary = static_token_by_value("CANARY") + stream = TokenStream(tokens + [canary]) + parsed = parser(stream, None) + if expected is None: + assert parsed is None + else: + assert parsed is not None + assert parsed == expected + assert stream.pop() == canary + assert stream.pop() is None + + +# Test that something parses incorrectly +# We expect the following behaviour: +# - The parse function generates the expected error +# - The parse function uses the parse context as a parent +def template_test_invalid(parser, context, tokens, expected): + stream = TokenStream(tokens.copy()) + error = None + try: + parsed = parser(stream, context) + raise AssertionError("Parsed invalid data: %s" % (parsed)) + except ParseErrorException as e: + error = e + assert error == expected diff --git a/tests/parse2/test_bool.py b/tests/parse2/test_bool.py new file mode 100644 index 0000000..22e32c6 --- /dev/null +++ b/tests/parse2/test_bool.py @@ -0,0 +1,64 @@ +# SPDX-License-Identifier: LGPL-2.1-only +# Copyright 2022 Jookia + +from hypothesis import assume, given +from hypothesis.strategies import composite + +from src.ast_types import Bool +from src.parse2.parse import ( + ParseContext, + ParseError, + ParseErrorException, + ParseTask, + Parser, +) +from tests.parse2.templates import template_test_valid, template_test_invalid +from tests.parse2.test_error import static_parse_context +from tests.parse2.test_token import draw_token_bool, draw_token_random + + +# Draws tokens to not make a valid boolean +@composite +def draw_token_not_bool(draw): + token = draw(draw_token_random()) + assume(token.value not in ["True", "False"]) + return token + + +# Tests parse_bool works correctly +# We expect the following behaviour: +# - The resulting boolean is True if the first token is True +# - The resulting boolean is False if the first token is False +# template_test_valid provides general parsing properties +@given(draw_token_bool()) +def test_parse2_bool_valid(token): + value = token.value == "True" + expected = Bool(value) + template_test_valid(Parser().parse_bool, [token], expected) + + +# Tests parsing of invalid booleans +# We expect the following behaviour: +# - Error if the token is not True or False +# - Have ParseError.NOT_BOOL as the exception code +# - Have ParseTask.PARSE_BOOL as the context's parse task +@given(draw_token_not_bool()) +def test_parse2_bool_invalid_incorrect(token): + parent_context = static_parse_context() + context = ParseContext(ParseTask.PARSE_BOOL, token, parent_context) + error = ParseErrorException(ParseError.NOT_BOOL, token, None, context) + parser = Parser().parse_bool + template_test_invalid(parser, parent_context, [token], error) + + +# Tests parsing of empty tokens +# We expect the following behaviour: +# - Error if there isn't a token +# - Have ParseError.NO_TOKEN as the exception code +# - Have ParseTask.PARSE_BOOL as the context's parse task +def test_parse2_bool_invalid_empty(): + parent_context = static_parse_context() + context = ParseContext(ParseTask.PARSE_BOOL, None, parent_context) + error = ParseErrorException(ParseError.NO_TOKEN, None, None, context) + parser = Parser().parse_bool + template_test_invalid(parser, parent_context, [], error) diff --git a/tests/parse2/test_clear_notes.py b/tests/parse2/test_clear_notes.py new file mode 100644 index 0000000..b3c7a4e --- /dev/null +++ b/tests/parse2/test_clear_notes.py @@ -0,0 +1,88 @@ +# SPDX-License-Identifier: LGPL-2.1-only +# Copyright 2022 Jookia + +# Clear notes syntax consists of the following: +# - One or more tokens or notes +# +# Parsing gives the following: +# All tokens that aren't notes +# +# The following error contexts are used: +# CLEAR_NOTES - Used when parsing the file +# +# The following parse errors are generated: +# FOUND_ENDNOTE - When a stray EndNote token is found + +from hypothesis import given +from hypothesis.strategies import composite, just, lists, one_of + +from src.parse2.parse import ( + clear_notes, + ParseContext, + ParseError, + ParseErrorException, + ParseTask, +) +from src.parse2.token import TokenStream +from tests.parse2.templates import template_test_invalid +from tests.parse2.test_error import static_parse_context +from tests.parse2.test_note import ( + draw_note_value_token, + static_note_tokens, + static_note_invalid, + static_note_invalid_error, +) +from tests.parse2.test_token import static_token_by_value + + +# Draws a tokens of notes and non-notes and output without notes +@composite +def draw_notes_to_clear(draw): + token_sets = draw( + lists(one_of([lists(draw_note_value_token()), just(static_note_tokens())])) + ) + output = [] + tokens = [] + for set in token_sets: + tokens += set + if set != static_note_tokens(): + output += set + return (tokens, output) + + +# Tests clear_notes filters out notes +# We expect the following behaviour: +# - Tokens that are part of note structures are removed +# template_test provides general parsing properties +@given(draw_notes_to_clear()) +def test_parse2_clear_notes_valid(test_data): + (tokens, result) = test_data + stream = TokenStream(tokens) + cleared = clear_notes(stream, None) + assert cleared == result + + +# Tests clear_notes passes through note errors +# We expect the following behaviour: +# - When an invalid note is parsed the error is propagated +# - Have ParseTask.CLEAR_NOTES as the context's parse task +def test_parse2_clear_notes_startnote_propagation(): + tokens = static_note_invalid() + parent_context = static_parse_context() + context = ParseContext(ParseTask.CLEAR_NOTES, tokens[0], parent_context) + error = static_note_invalid_error(context) + template_test_invalid(clear_notes, parent_context, tokens, error) + + +# Tests clear_notes errors when finding an EndNote +# We expect the following behaviour: +# - When EndNote is found a ParseError.FOUND_ENDNOTE error is raised +# - Have ParseTask.CLEAR_NOTES as the context's parse task +@given(lists(draw_note_value_token())) +def test_parse2_clear_notes_invalid_endnote(tokens): + token = static_token_by_value("EndNote") + new_tokens = tokens + [token] + parent_context = static_parse_context() + context = ParseContext(ParseTask.CLEAR_NOTES, new_tokens[0], parent_context) + error = ParseErrorException(ParseError.FOUND_ENDNOTE, token, None, context) + template_test_invalid(clear_notes, parent_context, new_tokens, error) diff --git a/tests/parse2/test_conditional.py b/tests/parse2/test_conditional.py new file mode 100644 index 0000000..dd43e93 --- /dev/null +++ b/tests/parse2/test_conditional.py @@ -0,0 +1,233 @@ +# SPDX-License-Identifier: LGPL-2.1-only +# Copyright 2022 Jookia + +# Conditional syntax consists of the following tokens: +# - "If" +# - A test statement, terminated by "Then" +# - A success statement, terminated by "Else" +# - A failure statement, terminated by "EndIf" +# +# Parsing gives a Conditional data structure containing: +# test - The parsed test statement +# success - The parsed success statement +# failure - The parsed failure statement +# +# The following cases are errors: +# - If not being the literal "To" +# - The test not parsing correctly +# - The success not parsing correctly +# - The failure not parsing correctly +# +# The following error contexts are used: +# PARSE_CONDITIONAL - Used when parsing the general syntax +# PARSE_TEST - Used when parsing the test statement +# PARSE_SUCCESS - Used when parsing the success statement +# PARSE_FAILURE - Used when parsing the failure statement +# +# The following parse errors are generated: +# NO_TOKEN - When there's not enough tokens +# WRONG_TOKEN - When If isn't the correct values +# +# The following parsers are used and have their errors +# and data structures propagated: +# parse_statement - Used with "Then" terminator for the test statement +# parse_statement - Used with "Else" terminator for the success statement +# parse_statement - Used with "EndIf" terminator for the failure statement + +import enum + +from hypothesis import assume, given +from hypothesis.strategies import composite, data, integers, just, one_of + +from src.ast_types import Conditional +from src.parse2.parse import ( + ParseContext, + ParseError, + ParseErrorException, + ParseTask, + Parser, + read_token, +) +from tests.parse2.templates import ( + template_test_valid, + template_test_invalid, +) +from tests.parse2.test_token import ( + draw_token_random, + static_token_by_value, +) +from tests.parse2.test_error import static_parse_context + +# +# Helper functions +# + + +# Values used by the mocked parser +class MockStatement(enum.Enum): + MockTest = enum.auto() + MockSuccess = enum.auto() + MockFailure = enum.auto() + + +# Mocks and tests the parse_statement parser +# Instead of parsing a complex statement it just parses +# a the following tokens: MockTest, MockSuccess, MockFailure +# The terminator is required to be: +# - "Then" for MockTest +# - "Else" for MockSuccess +# - "EndIf" for MockFailure +class MockParser(Parser): + def parse_statement(self, stream, parent_context, terminator): + token = read_token(stream, None, parent_context) + if token.value == "MockTest" and terminator == "Then": + return MockStatement.MockTest + elif token.value == "MockSuccess" and terminator == "Else": + return MockStatement.MockSuccess + elif token.value == "MockFailure" and terminator == "EndIf": + return MockStatement.MockFailure + else: + raise ParseErrorException( + ParseError.WRONG_TOKEN, token, None, parent_context + ) + + +# A valid conditional expression and tokens +def static_conditional_valid_tokens(): + tokens = [ + static_token_by_value("If"), + static_token_by_value("MockTest"), + static_token_by_value("MockSuccess"), + static_token_by_value("MockFailure"), + ] + expected = Conditional( + MockStatement.MockTest, MockStatement.MockSuccess, MockStatement.MockFailure + ) + return (tokens, expected) + + +# Calculates the parse context for a specific token in a conditional expression +def context_at(parent_context, tokens, index): + max = len(tokens) - 1 + if max == -1: + start = None + token = None + elif max < index: + start = tokens[0] + token = None + else: + start = tokens[0] + token = tokens[index] + context = ParseContext(ParseTask.PARSE_CONDITIONAL, start, parent_context) + if index == 0: + return context + elif index == 1: + subcontext = ParseContext(ParseTask.PARSE_TEST, token, context) + return subcontext + elif index == 2: + subcontext = ParseContext(ParseTask.PARSE_SUCCESS, token, context) + return subcontext + elif index == 3: + subcontext = ParseContext(ParseTask.PARSE_FAILURE, token, context) + return subcontext + else: + assert "Should never be called" + + +# Draws something that isn't a mock statement +@composite +def draw_not_statement(draw): + token = draw(draw_token_random()) + assume(token.value not in ["MockTest", "MockSuccess", "MockFailure"]) + return token + + +# Draws the wrong statement for a given conditional position +def draw_wrong_statement_at(index): + random = draw_not_statement() + test = just(static_token_by_value("MockTest")) + success = just(static_token_by_value("MockSuccess")) + failure = just(static_token_by_value("MockFailure")) + if index == 1: + return one_of([success, failure, random]) + elif index == 2: + return one_of([test, failure, random]) + elif index == 3: + return one_of([test, success, random]) + else: + assert "Should never be called" + + +# +# Test functions +# + + +# Tests parsing a valid statement +# We expect the following behaviour: +# - The test statement is read and assigned +# - The success statement is read and assigned +# - The failure statement is read and assigned +def test_parse2_conditional_valid(): + (tokens, expected) = static_conditional_valid_tokens() + parser = MockParser().parse_conditional + return template_test_valid(parser, tokens, expected) + + +# Tests parsing a truncated statement +# We expect the following behaviour: +# - A NO_TOKEN parse error is raised +# - The error context is PARSE_CONDITIONAL +# - The test statement has its own subcontext, PARSE_TEST +# - The success statement has its own subcontext, PARSE_SUCCESS +# - The failure statement has its own subcontext, PARSE_FAILURE +@given(data()) +def test_parse2_conditional_short(data): + (tokens, _) = static_conditional_valid_tokens() + new_len = data.draw( + integers(min_value=0, max_value=(len(tokens) - 1)), label="shorten point" + ) + short_tokens = tokens[0:new_len] + parent_context = static_parse_context() + context = context_at(parent_context, short_tokens, new_len) + error = ParseErrorException(ParseError.NO_TOKEN, None, None, context) + parser = MockParser().parse_conditional + template_test_invalid(parser, parent_context, short_tokens, error) + + +# Tests parsing an invalid "If" +# We expect the following behaviour: +# - A WRONG_TOKEN parse error is raised +# - The error context is PARSE_CONDITIONAL +# - The token "If" is expected +@given(data()) +def test_parse2_conditional_wrong_if(data): + (tokens, _) = static_conditional_valid_tokens() + new_if = data.draw(draw_token_random(), label="new if") + assume(new_if.value != "If") + new_tokens = [new_if] + tokens[1:] + parent_context = static_parse_context() + context = context_at(parent_context, new_tokens, 0) + error = ParseErrorException(ParseError.WRONG_TOKEN, new_if, "If", context) + parser = MockParser().parse_conditional + template_test_invalid(parser, parent_context, new_tokens, error) + + +# Tests parsing an invalid statement +# We expect the following behaviour: +# - A WRONG_TOKEN parse error is raised by the mock parser +# - The error context is PARSE_CONDITIONAL +# - Our error context is retained by parse_statement +@given(data()) +def test_parse2_conditional_wrong_statement(data): + (tokens, _) = static_conditional_valid_tokens() + statement_pos = data.draw( + integers(min_value=1, max_value=(len(tokens) - 1)), label="statement position" + ) + new_statement = data.draw(draw_wrong_statement_at(statement_pos)) + new_tokens = tokens[0:statement_pos] + [new_statement] + tokens[statement_pos + 1 :] + parent_context = static_parse_context() + context = context_at(parent_context, new_tokens, statement_pos) + error = ParseErrorException(ParseError.WRONG_TOKEN, new_statement, None, context) + parser = MockParser().parse_conditional + template_test_invalid(parser, parent_context, new_tokens, error) diff --git a/tests/parse2/test_directive.py b/tests/parse2/test_directive.py new file mode 100644 index 0000000..aaec738 --- /dev/null +++ b/tests/parse2/test_directive.py @@ -0,0 +1,190 @@ +# SPDX-License-Identifier: LGPL-2.1-only +# Copyright 2022 Jookia + +# Directive syntax consists of one of the following: +# - A set +# - A conditional +# - A statement, terminated by "Done" +# +# Parsing gives one of the following: +# Set - The parsed set node +# Conditional - The parsed conditional node +# Statement - The parsed statement node +# +# The following error contexts are used: +# PARSE_DIRECTIVE - Used when parsing the directive +# +# The following parse errors are generated: +# NO_TOKEN - When there's not enough tokens +# +# The following parsers are used and have their errors +# and data structures propagated: +# parse_statement - Used with "Done" terminator + +import enum + +from hypothesis import given +from hypothesis.strategies import composite, just, one_of + +from src.parse2.parse import ( + ParseContext, + ParseError, + ParseErrorException, + ParseTask, + Parser, +) +from tests.parse2.templates import ( + template_test_valid, + template_test_invalid, +) +from tests.parse2.test_token import ( + draw_token_unknown, + static_token_by_value, +) +from tests.parse2.test_error import static_parse_context +from src.ast_types import Bool, Statement + +# +# Helper functions +# + + +# Values used by the mocked parser +class MockDirective(enum.Enum): + MockSet = enum.auto() + MockConditional = enum.auto() + MockStatement = enum.auto() + + +# Mocks and tests the parse_directive parser +# Instead of parsing sets, conditionals and statements +# it instead returns a mock value +class MockParserValid(Parser): + def parse_set(self, stream, parent_context): + stream.pop() + return MockDirective.MockSet + + def parse_conditional(self, stream, parent_context): + stream.pop() + return MockDirective.MockConditional + + def parse_statement(self, stream, parent_context, terminator): + assert terminator == "Done" + stream.pop() + return MockDirective.MockStatement + + +# Mocks and tests the parse_directive parser error handling +# Instead of parsing, just return an error +# Re-use the enum elements to give a unique error for each node +class MockParserInvalid(Parser): + def _raise_error(self, error, parent_context): + raise ParseErrorException(error, None, None, parent_context) + + def parse_set(self, stream, parent_context): + self._raise_error(MockDirective.MockSet, parent_context) + + def parse_conditional(self, stream, parent_context): + self._raise_error(MockDirective.MockConditional, parent_context) + + def parse_statement(self, stream, parent_context, terminator): + assert terminator == "Done" + self._raise_error(MockDirective.MockStatement, parent_context) + + +# A valid directive containing a set +def static_directive_set(): + return ([static_token_by_value("Set")], MockDirective.MockSet) + + +# A valid directive containing a conditional +def static_directive_conditional(): + return ([static_token_by_value("If")], MockDirective.MockConditional) + + +# Draws a valid directive containing a statement +@composite +def draw_directive_statement(draw): + return ([draw(draw_token_unknown())], MockDirective.MockStatement) + + +# Draws a valid directive +@composite +def draw_directive_valid(draw): + return draw( + one_of( + [ + just(static_directive_set()), + just(static_directive_conditional()), + draw_directive_statement(), + ] + ) + ) + + +# A simple directive tokens and result +def static_directive_valid(): + return ( + [static_token_by_value("True"), static_token_by_value("Done")], + Statement(Bool(True), None, []), + ) + + +# An invalid directive token +def static_directive_invalid(): + return [static_token_by_value("Done")] + + +# An invalid directive token error +def static_directive_invalid_error(parent_context): + token = static_directive_invalid()[0] + directive_context = ParseContext(ParseTask.PARSE_DIRECTIVE, token, parent_context) + statement_context = ParseContext( + ParseTask.PARSE_STATEMENT, token, directive_context + ) + context = ParseContext(ParseTask.PARSE_SUBJECT, token, statement_context) + return ParseErrorException(ParseError.FOUND_TERMINATOR, token, None, context) + + +# +# Test functions +# + + +# Tests parsing a valid directive +# We expect the following behaviour: +# - Sets are detected and parsed +# - Conditionals are detected and parsed +# - Statements are detected and parsed +@given(draw_directive_valid()) +def test_parse2_directive_valid(test_data): + (tokens, expected) = test_data + parser = MockParserValid().parse_directive + return template_test_valid(parser, tokens, expected) + + +# Tests parsing an empty directive +# We expect the following behaviour: +# - A NO_TOKEN parse error is raised +# - The error context is PARSE_DIRECTIVE +def test_parse2_directive_empty(): + tokens = [] + parent_context = static_parse_context() + context = ParseContext(ParseTask.PARSE_DIRECTIVE, None, parent_context) + error = ParseErrorException(ParseError.NO_TOKEN, None, None, context) + parser = MockParserValid().parse_directive + template_test_invalid(parser, parent_context, tokens, error) + + +# Tests error propagation +# We expect the following behaviour: +# - A mock error is raised for each case +# - Our error context is used for the error +@given(draw_directive_valid()) +def test_parse2_directive_error(test_data): + (tokens, expected) = test_data + parent_context = static_parse_context() + context = ParseContext(ParseTask.PARSE_DIRECTIVE, tokens[0], parent_context) + error = ParseErrorException(expected, None, None, context) + parser = MockParserInvalid().parse_directive + template_test_invalid(parser, parent_context, tokens, error) diff --git a/tests/parse2/test_error.py b/tests/parse2/test_error.py new file mode 100644 index 0000000..5266948 --- /dev/null +++ b/tests/parse2/test_error.py @@ -0,0 +1,285 @@ +# SPDX-License-Identifier: LGPL-2.1-only +# Copyright 2022 Jookia + +# Parse error reporting consists of the following data structures: +# +# ParseTasks represent a task such as parsing a file or directive. +# +# ParseContexts represent a step during the parsing process for the purposes of +# error reporting. +# A context contains: +# - A ParseTask specifying the step +# - The Token the task started at (possibly None) +# - A parent ParseContext (possibly None) if this task is a component +# of another task +# +# ParseErrors represent an error encountered during a parsing task, such as an +# unexpected token or invalid token. +# +# ParseErrorExceptions represent an error encounted during parsing. +# An error exception contains: +# - A ParseError detailing the error +# - The Token the error is at (possibly None) +# - An expected string (possibly None), used if the parser is expecting +# a single, specific token such as a keyword +# - A ParseContext detailing the current parsing task + +from hypothesis import given +from hypothesis.strategies import composite, integers, sampled_from, text + +from src.i18n import Message +from src.parse2.parse import ( + ParseContext, + ParseError, + ParseErrorException, + ParseTask, + format_context, + format_exception, + format_full_error, +) +from tests.templates import template_test_structure +from tests.parse2.test_token import draw_token_random, static_token + +# +# Helper functions +# + + +# Mapping of error to message identifiers +error_message_ids = { + ParseError.TEST_ERROR: "ParseErrorTestError", + ParseError.NO_TOKEN: "ParseErrorNoToken", + ParseError.WRONG_TOKEN: "ParseErrorWrongToken", + ParseError.FOUND_STARTTEXT: "ParseErrorFoundStartText", + ParseError.FOUND_STARTNOTE: "ParseErrorFoundStartNote", + ParseError.NOT_BOOL: "ParseErrorNotBool", + ParseError.FOUND_ENDNOTE: "ParseErrorFoundEndNote", + ParseError.RESERVED_NAME: "ParseErrorReservedName", + ParseError.FOUND_TERMINATOR: "ParseErrorFoundTerminator", +} + +# Mapping of task to message identifiers +task_message_ids = { + ParseTask.TEST_TASK: "ParseTaskTestTask", + ParseTask.PARSE_NOTE: "ParseTaskNote", + ParseTask.CLEAR_NOTES: "ParseTaskClearNotes", + ParseTask.PARSE_TEXT: "ParseTaskText", + ParseTask.PARSE_BOOL: "ParseTaskBool", + ParseTask.PARSE_REFERENCE: "ParseTaskReference", + ParseTask.PARSE_VALUE: "ParseTaskValue", + ParseTask.PARSE_STATEMENT: "ParseTaskStatement", + ParseTask.PARSE_SUBJECT: "ParseTaskSubject", + ParseTask.PARSE_VERB: "ParseTaskVerb", + ParseTask.PARSE_ARGUMENT: "ParseTaskArgument", + ParseTask.PARSE_SET: "ParseTaskSet", + ParseTask.PARSE_CONDITIONAL: "ParseTaskConditional", + ParseTask.PARSE_TEST: "ParseTaskTest", + ParseTask.PARSE_SUCCESS: "ParseTaskSuccess", + ParseTask.PARSE_FAILURE: "ParseTaskFailure", + ParseTask.PARSE_DIRECTIVE: "ParseTaskDirective", + ParseTask.PARSE_FILE: "ParseTaskFile", +} + + +# Draws a strategy, with 25% of draws being None +@composite +def draw_maybe(draw, strategy): + chance = draw(integers(min_value=1, max_value=4)) + if chance == 1: + return None + else: + return draw(strategy) + + +# Draws a random parse task +@composite +def draw_parse_task(draw): + return draw(sampled_from(ParseTask.list())) + + +# Draws a random parse context without a parent +@composite +def draw_parse_context(draw): + task = draw(draw_parse_task()) + token = draw(draw_maybe(draw_token_random())) + context = draw(draw_maybe(draw_parse_context())) + return ParseContext(task, token, context) + + +# Static parse context +def static_parse_context(): + task = ParseTask.TEST_TASK + token = static_token() + return ParseContext(task, token, None) + + +# Draws a random parse error +@composite +def draw_parse_error(draw): + return draw(sampled_from(ParseError.list())) + + +# Draws a random parse error exception +@composite +def draw_parse_error_exception(draw): + error = draw(draw_parse_error()) + token = draw(draw_maybe(draw_token_random())) + expected = draw(draw_maybe(text())) + context = draw(draw_parse_context()) + return ParseErrorException(error, token, expected, context) + + +# +# Test functions +# + + +# Test parse context structure +@template_test_structure( + ParseContext, + draw_parse_context(), + task=draw_parse_task(), + token=draw_maybe(draw_token_random()), + parent=draw_maybe(draw_parse_context()), +) +def test_parse2_context_structure(): + pass + + +# Test parse error exception structure +@template_test_structure( + ParseErrorException, + draw_parse_error_exception(), + error=draw_parse_error(), + token=draw_maybe(draw_token_random()), + expected=draw_maybe(text()), + context=draw_maybe(draw_parse_context()), +) +def test_parse2_error_exception_structure(): + pass + + +# Tests formatting a ParseContext +# We expect the following behaviour: +# - A Message is returned +# - The message ID begins with ParseContext +# - The first parameter is task +# - If the token field is set, the ID is appended with +# "At" and the second and third parameters are the token's +# location's line and offset +# Two combinations are possible: +# - Message("ParseContext", [task]) +# - Message("ParseContextAt", [task, file, line, offset]) +# task is a message representing the ParseTask, equivalent to: +# - Message(task_message_ids[context.task], []) +# file is a source file's name +# line is a source file's line number +# offset is a source file's line offset +def _test_parse2_error_format_context(context): + task = Message(task_message_ids[context.task], []) + has_location = context.token is not None + if has_location: + file = context.token.location.file + line = context.token.location.line + offset = context.token.location.offset + expected = Message("ParseContextAt", [task, file, line, offset]) + else: + expected = Message("ParseContext", [task]) + value = format_context(context) + assert expected == value + + +# Tests formatting with a random ParseContext +@given(draw_parse_context()) +def test_parse2_error_format_context(context): + _test_parse2_error_format_context(context) + + +# Tests formatting with each ParseTask +@given(draw_parse_task()) +def test_parse2_error_format_parse_task(task): + context = ParseContext(task, None, None) + _test_parse2_error_format_context(context) + + +# Tests formatting a ParseErrorException +# We expect the following behaviour: +# - A Message is returned +# - The message ID begins with ParseError +# - If the expected field is set, the ID is appended with +# "Expected" and the first parameter is the expected value +# - Otherwise the first parameter is the error +# - If the token field is set, the ID is appended with +# "At" and the second and third parameters are the token's +# location's line and offset +# Four combinations are possible: +# - Message("ParserError", [error]) +# - Message("ParserErrorAt", [error, file, line, offset]) +# - Message("ParserErrorExpected", [expected]) +# - Message("ParserErrorExpectedAt", [expected, file, line, offset]) +# error is a message representing the ParseError, equivalent to: +# - Message(error_message_ids[exception.error], []) +# file is a source file's name +# line is a source file's line number +# offset is a source file's line offset +def _test_parse2_error_format_exception(exception): + has_expected = exception.expected is not None + has_location = exception.token is not None + # Variables used for message parameters + err = Message(error_message_ids[exception.error], []) + expect = exception.expected + if has_location: + file = exception.token.location.file + line = exception.token.location.line + offset = exception.token.location.offset + else: + file = None + line = None + offset = None + # Truth table used for message lookup + # Indexes are has_expected and has_location + messages = [ + # Cases without an expected token: + [ + Message("ParserError", [err]), + Message("ParserErrorAt", [err, file, line, offset]), + ], + # Cases with an expected token: + [ + Message("ParserErrorExpected", [expect]), + Message("ParserErrorExpectedAt", [expect, file, line, offset]), + ], + ] + expected = messages[has_expected][has_location] + value = format_exception(exception) + assert expected == value + + +# Tests formatting with a random ParseErrorException +@given(draw_parse_error_exception()) +def test_parse2_error_format_exception(exception): + _test_parse2_error_format_exception(exception) + + +# Tests formatting with each ParseError +@given(draw_parse_error()) +def test_parse2_error_format_parse_error(error): + exception = ParseErrorException(error, None, None, static_parse_context()) + _test_parse2_error_format_exception(exception) + + +# Tests formatting a full error +# We expect the following behaviour: +# - An array of Messages are returned +# - The first message is the exception formatted +# - The second messages is the exception's context formatted +# - Subsequent messages are of any context decedents formatted +@given(draw_parse_error_exception()) +def test_parse2_error_format_full_error(exception): + expected = [format_exception(exception)] + context = exception.context + while context is not None: + expected.append(format_context(context)) + context = context.parent + value = format_full_error(exception) + assert expected == value diff --git a/tests/parse2/test_file.py b/tests/parse2/test_file.py new file mode 100644 index 0000000..ccc3e7c --- /dev/null +++ b/tests/parse2/test_file.py @@ -0,0 +1,75 @@ +# SPDX-License-Identifier: LGPL-2.1-only +# Copyright 2022 Jookia + +# File syntax consists of the following: +# - One or more directives +# +# Parsing gives the following: +# A list of directives - All directives in the file +# +# The following error contexts are used: +# PARSE_FILE - Used when parsing the file +# +# No parse errors are generated. + +from hypothesis import given +from hypothesis.strategies import composite, just, lists + +from src.parse2.token import TokenStream +from src.parse2.parse import ( + ParseContext, + ParseTask, + parse_file, +) +from tests.parse2.templates import template_test_invalid +from tests.parse2.test_error import static_parse_context +from tests.parse2.test_directive import ( + static_directive_valid, + static_directive_invalid, + static_directive_invalid_error, +) + + +# A valid file +@composite +def draw_file_valid(draw): + directives = draw(lists(just(static_directive_valid()))) + all_tokens = [] + all_expected = [] + for tokens, expected in directives: + all_tokens += tokens + all_expected.append(expected) + return (all_tokens, all_expected) + + +# +# Test functions +# + + +# Tests parsing a valid file +# We expect the following behaviour: +# - All directives are parsed +# - No tokens are left after parsing +@given(draw_file_valid()) +def test_parse2_file_valid(test_data): + (tokens, expected) = test_data + stream = TokenStream(tokens.copy()) + parsed = parse_file(stream, None) + assert parsed == expected + assert stream.pop() is None + + +# Tests parsing a invalid file +# We expect the following behaviour: +# - The error context is PARSE_FILE +# - A wrong directive error is propagated +@given(draw_file_valid()) +def test_parse2_file_invalid(test_data): + (tokens, expected) = test_data + invalid_directive = static_directive_invalid() + new_tokens = tokens + invalid_directive + parent_context = static_parse_context() + context = ParseContext(ParseTask.PARSE_FILE, new_tokens[0], parent_context) + error = static_directive_invalid_error(context) + template_test_invalid(parse_file, parent_context, new_tokens, error) diff --git a/tests/parse2/test_note.py b/tests/parse2/test_note.py new file mode 100644 index 0000000..c971249 --- /dev/null +++ b/tests/parse2/test_note.py @@ -0,0 +1,154 @@ +# SPDX-License-Identifier: LGPL-2.1-only +# Copyright 2022 Jookia + +# Note syntax consists of the following: +# - StartNote +# - Optionally any token that isn't EndNote +# - EndNote +# +# Parsing gives the following: +# No value +# +# The following error contexts are used: +# PARSE_NOTE - Used when parsing the file +# +# The following parse errors are generated: +# FOUND_ENDNOTE - When a stray EndNote token is found + +from hypothesis import assume, given +from hypothesis.strategies import ( + composite, + lists, +) + +from src.parse2.parse import ( + skip_note, + ParseContext, + ParseError, + ParseErrorException, + ParseTask, +) +from tests.parse2.templates import ( + draw_random_within, + template_test_invalid, + template_test_valid, +) +from tests.parse2.test_error import static_parse_context +from tests.parse2.test_token import ( + static_token_by_value, + draw_token_random, +) + +# +# Helper functions +# + + +# Static tokens representing a note +def static_note_tokens(): + return [static_token_by_value("StartNote"), static_token_by_value("EndNote")] + + +# An invalid note token +def static_note_invalid(): + return [static_token_by_value("StartNote")] + + +# An invalid note token error +def static_note_invalid_error(parent_context): + token = static_note_invalid()[0] + note_context = ParseContext(ParseTask.PARSE_NOTE, token, parent_context) + return ParseErrorException(ParseError.NO_TOKEN, None, None, note_context) + + +# Draws a random token suitable for note building +@composite +def draw_note_value_token(draw): + token = draw(draw_token_random()) + assume(token.value not in ["StartNote", "EndNote"]) + return token + + +# Draws a random token that isn't a StartNote token +@composite +def draw_token_not_startnote(draw): + token = draw(draw_token_random()) + assume(token.value != "StartNote") + return token + + +# Draws tokens to make a valid note +@composite +def draw_token_note_valid(draw): + tokens = draw(lists(draw_note_value_token())) + start = static_token_by_value("StartNote") + end = static_token_by_value("EndNote") + all_tokens = [start] + tokens + [end] + return all_tokens + + +# +# Test functions +# + + +# Tests skip_note works correctly +# We expect the following behaviour: +# - No value is returned +# template_test provides general parsing properties +@given(draw_token_note_valid()) +def test_parse2_note_valid(tokens): + template_test_valid(skip_note, tokens, None) + + +# Tests parsing notes without StartNote +# We expect the following behaviour: +# - Error if StartNote's token value is not "StartNote" +# - Have ParseError.WRONG_TOKEN as the exception code +# - Have ParseTask.PARSE_NOTE as the context's parse task +@given(draw_token_note_valid(), draw_token_not_startnote()) +def test_parse2_note_invalid_nostartnote(tokens, token): + new_tokens = [token] + tokens[1:0] + parent_context = static_parse_context() + context = ParseContext(ParseTask.PARSE_NOTE, new_tokens[0], parent_context) + error = ParseErrorException(ParseError.WRONG_TOKEN, token, "StartNote", context) + template_test_invalid(skip_note, parent_context, new_tokens, error) + + +# Tests parsing empty notes +# We expect the following behaviour: +# - Error if there is no StartNote token at all +# - Have ParseError.NO_TOKEN as the exception code +# - Have ParseTask.PARSE_NOTE as the context's parse task +def test_parse2_note_invalid_empty(): + parent_context = static_parse_context() + context = ParseContext(ParseTask.PARSE_NOTE, None, parent_context) + error = ParseErrorException(ParseError.NO_TOKEN, None, None, context) + template_test_invalid(skip_note, parent_context, [], error) + + +# Tests parsing a note with a StartNote token in it +# We expect the following behaviour: +# - Error if a StartNote token is in the note content +# - Have ParseTask.PARSE_NOTE as the context's parse task +@given(draw_random_within(draw_token_note_valid(), "StartNote")) +def test_parse2_note_invalid_extrastartnote(within): + (tokens, start) = within + parent_context = static_parse_context() + context = ParseContext(ParseTask.PARSE_NOTE, tokens[0], parent_context) + error = ParseErrorException(ParseError.FOUND_STARTNOTE, start, None, context) + template_test_invalid(skip_note, parent_context, tokens, error) + + +# Tests parsing a note without an EndNote token +# We expect the following behaviour: +# - Error if there is no EndNote token at all +# - Have ParseError.NO_TOKEN as the exception code +# - Have ParseTask.PARSE_NOTE as the context's parse task +@given(draw_token_note_valid()) +def test_parse2_note_invalid_noendnote(tokens): + new_tokens = tokens[0:-1] + parent_context = static_parse_context() + context = ParseContext(ParseTask.PARSE_NOTE, tokens[0], parent_context) + error = ParseErrorException(ParseError.NO_TOKEN, None, None, context) + template_test_invalid(skip_note, parent_context, new_tokens, error) diff --git a/tests/parse2/test_parse.py b/tests/parse2/test_parse.py new file mode 100644 index 0000000..c2d3a59 --- /dev/null +++ b/tests/parse2/test_parse.py @@ -0,0 +1,37 @@ +# SPDX-License-Identifier: LGPL-2.1-only +# Copyright 2022 Jookia + +from hypothesis import given +from hypothesis.strategies import lists + +from src.parse2.parse import ( + clear_notes, + ParseErrorException, + parse, + parse_file, +) +from src.parse2.token import TokenStream +from tests.parse2.test_token import draw_token_random +from tests.parse2.test_error import draw_parse_context + + +# Tests the parser wrapper works correctly +# We expect the following behaviour: +# - Notes to be removed from the tokens +# - The remaining tokens to be parsed as a file +@given(lists(draw_token_random()), draw_parse_context()) +def test_parse2_fuzz(tokens, context): + result = None + parsed = None + try: + stream = TokenStream(tokens.copy()) + cleared = clear_notes(stream, context) + stream2 = TokenStream(cleared) + result = parse_file(stream2, context) + except ParseErrorException as e: + result = e + try: + parsed = parse(tokens.copy(), context) + except ParseErrorException as e: + parsed = e + assert parsed == result diff --git a/tests/parse2/test_reference.py b/tests/parse2/test_reference.py new file mode 100644 index 0000000..d388da8 --- /dev/null +++ b/tests/parse2/test_reference.py @@ -0,0 +1,59 @@ +# SPDX-License-Identifier: LGPL-2.1-only +# Copyright 2022 Jookia + +from hypothesis import given +from hypothesis.strategies import composite + +from src.ast_types import Reference +from src.parse2.parse import ( + ParseContext, + ParseError, + ParseErrorException, + ParseTask, + Parser, +) +from tests.parse2.templates import template_test_valid, template_test_invalid +from tests.parse2.test_error import static_parse_context +from tests.parse2.test_token import draw_token_known, draw_token_unknown + + +# Draws tokens to make a reference +@composite +def draw_token_reference_valid(draw): + token = draw(draw_token_unknown()) + return ([token], Reference(token.value)) + + +# Tests parse_reference works correctly +# We expect the following behaviour: +# - The resulting reference has the token's value +# template_test_valid provides general parsing properties +@given(draw_token_reference_valid()) +def test_parse2_reference_valid(valid_data): + (tokens, expected) = valid_data + template_test_valid(Parser().parse_reference, tokens, expected) + + +# Tests parsing a reference with a reserved name errors +# We expect the following behaviour: +# - Error if a keyword or literal is encountered +# - Have ParseError.RESERVED_NAME as the exception code +# - Have ParseTask.PARSE_REFERENCE as the context's parse task +@given(draw_token_known()) +def test_parse2_reference_invalid_name(token): + parent_context = static_parse_context() + context = ParseContext(ParseTask.PARSE_REFERENCE, token, parent_context) + error = ParseErrorException(ParseError.RESERVED_NAME, token, None, context) + template_test_invalid(Parser().parse_reference, parent_context, [token], error) + + +# Tests parsing of empty references +# We expect the following behaviour: +# - Error if there isn't a token +# - Have ParseError.NO_TOKEN as the exception code +# - Have ParseTask.PARSE_REFERENCE as the context's parse task +def test_parse2_reference_invalid_empty(): + parent_context = static_parse_context() + context = ParseContext(ParseTask.PARSE_REFERENCE, None, parent_context) + error = ParseErrorException(ParseError.NO_TOKEN, None, None, context) + template_test_invalid(Parser().parse_reference, parent_context, [], error) diff --git a/tests/parse2/test_set.py b/tests/parse2/test_set.py new file mode 100644 index 0000000..3a74d4b --- /dev/null +++ b/tests/parse2/test_set.py @@ -0,0 +1,219 @@ +# SPDX-License-Identifier: LGPL-2.1-only +# Copyright 2022 Jookia + +# Set syntax consists of the following tokens: +# - "Set" +# - Name, a value that isn't a keyword +# - "To" +# - A statement, terminated by "EndSet" +# +# Parsing gives a Set data structure containing: +# subject - The value of Name +# statement - The parsed statement +# +# The following cases are errors: +# - Not having enough tokens to parse +# - Set not being the literal "Set" +# - Name being a keyword +# - To not being the literal "To" +# - The statement not parsing correctly +# +# The following error contexts are used: +# PARSE_SET - Used when parsing the general syntax +# PARSE_SUBJECT - Used when parsing the subject +# +# The following parse errors are generated: +# NO_TOKEN - When there's not enough tokens +# WRONG_TOKEN - When Set or To aren't the correct values +# RESERVED_NAME - When Name is not a keyword +# +# The following parsers are used and have their errors +# and data structures propagated: +# parse_statement - Used with "EndSet" terminator for the statement + +import enum + +from hypothesis import assume, given +from hypothesis.strategies import composite, data, integers + +from src.ast_types import Set +from src.parse2.parse import ( + ParseContext, + ParseError, + ParseErrorException, + ParseTask, + Parser, + read_token, +) +from tests.parse2.templates import ( + template_test_valid, + template_test_invalid, +) +from tests.parse2.test_token import ( + draw_token_known, + draw_token_random, + draw_token_unknown, + static_token_by_value, +) +from tests.parse2.test_error import static_parse_context + +# +# Helper functions +# + + +# Values used by the mocked parser +class MockStatement(enum.Enum): + MockValue = enum.auto() + + +# Mocks and tests the parse_statement parser +# Instead of parsing a complex statement it just parses +# a single token: MockStatement +# The terminator is required to be "EndSet" +class MockParser(Parser): + def parse_statement(self, stream, parent_context, terminator): + assert terminator == "EndSet" + read_token(stream, "MockStatement", parent_context) + return MockStatement.MockValue + + +# Draws a valid set expression and tokens +@composite +def draw_set_valid_tokens(draw): + subject = draw(draw_token_unknown()) + tokens = [ + static_token_by_value("Set"), + subject, + static_token_by_value("To"), + static_token_by_value("MockStatement"), + ] + expected = Set(subject.value, MockStatement.MockValue) + return (tokens, expected) + + +# Calculates the parse context for a specific token in a set expression +def context_at(parent_context, tokens, index): + max = len(tokens) - 1 + if max == -1: + start = None + token = None + elif max < index: + start = tokens[0] + token = None + else: + start = tokens[0] + token = tokens[index] + context = ParseContext(ParseTask.PARSE_SET, start, parent_context) + if index == 1: + subcontext = ParseContext(ParseTask.PARSE_SUBJECT, token, context) + return subcontext + else: + return context + + +# +# Test functions +# + + +# Tests parsing a valid statement +# We expect the following behaviour: +# - The name is read as the subject +# - The statement is read as the statement +@given(draw_set_valid_tokens()) +def test_parse2_set_valid(test_data): + (tokens, expected) = test_data + parser = MockParser().parse_set + return template_test_valid(parser, tokens, expected) + + +# Tests parsing a truncated statement +# We expect the following behaviour: +# - A NO_TOKEN parse error is raised +# - The error context is PARSE_SET +# - The subject has its own subcontext, PARSE_SUBJECT +@given(data()) +def test_parse2_set_short(data): + (tokens, _) = data.draw(draw_set_valid_tokens(), label="valid data") + new_len = data.draw( + integers(min_value=0, max_value=(len(tokens) - 1)), label="shorten point" + ) + short_tokens = tokens[0:new_len] + parent_context = static_parse_context() + context = context_at(parent_context, short_tokens, new_len) + error = ParseErrorException(ParseError.NO_TOKEN, None, None, context) + parser = MockParser().parse_set + template_test_invalid(parser, parent_context, short_tokens, error) + + +# Tests parsing an invalid "Set" +# We expect the following behaviour: +# - A WRONG_TOKEN parse error is raised +# - The error context is PARSE_SET +# - The token "Set" is expected +@given(data()) +def test_parse2_set_wrong_set(data): + (tokens, _) = data.draw(draw_set_valid_tokens(), label="valid data") + new_set = data.draw(draw_token_random(), label="new set") + assume(new_set.value != "Set") + new_tokens = [new_set] + tokens[1:] + parent_context = static_parse_context() + context = context_at(parent_context, new_tokens, 0) + error = ParseErrorException(ParseError.WRONG_TOKEN, new_set, "Set", context) + parser = MockParser().parse_set + template_test_invalid(parser, parent_context, new_tokens, error) + + +# Tests parsing an invalid "To" +# We expect the following behaviour: +# - A WRONG_TOKEN parse error is raised +# - The error context is PARSE_SET +# - The token "To" is expected +@given(data()) +def test_parse2_set_wrong_to(data): + (tokens, _) = data.draw(draw_set_valid_tokens(), label="valid data") + new_to = data.draw(draw_token_random(), label="new to") + assume(new_to.value != "To") + new_tokens = tokens[0:2] + [new_to] + tokens[3:] + parent_context = static_parse_context() + context = context_at(parent_context, new_tokens, 2) + error = ParseErrorException(ParseError.WRONG_TOKEN, new_to, "To", context) + parser = MockParser().parse_set + template_test_invalid(parser, parent_context, new_tokens, error) + + +# Tests parsing an invalid name +# We expect the following behaviour: +# - A WRONG_TOKEN parse error is raised +# - The error context is PARSE_SET +# - The token "To" is expected +@given(data()) +def test_parse2_set_wrong_name(data): + (tokens, _) = data.draw(draw_set_valid_tokens(), label="valid data") + new_name = data.draw(draw_token_known(), label="new name") + new_tokens = tokens[0:1] + [new_name] + tokens[2:] + parent_context = static_parse_context() + context = context_at(parent_context, new_tokens, 1) + error = ParseErrorException(ParseError.RESERVED_NAME, new_name, None, context) + parser = MockParser().parse_set + template_test_invalid(parser, parent_context, new_tokens, error) + + +# Tests parsing an invalid statement +# We expect the following behaviour: +# - A WRONG_TOKEN parse error is raised by the mock parser +# - The error context is PARSE_SET +# - Our error context is retained by parse_statement +@given(data()) +def test_parse2_set_wrong_statement(data): + (tokens, _) = data.draw(draw_set_valid_tokens(), label="valid data") + new_statement = static_token_by_value("NotStatement") + new_tokens = tokens[0:3] + [new_statement] + parent_context = static_parse_context() + context = context_at(parent_context, new_tokens, 3) + error = ParseErrorException( + ParseError.WRONG_TOKEN, new_statement, "MockStatement", context + ) + parser = MockParser().parse_set + template_test_invalid(parser, parent_context, new_tokens, error) diff --git a/tests/parse2/test_statement.py b/tests/parse2/test_statement.py new file mode 100644 index 0000000..e89621b --- /dev/null +++ b/tests/parse2/test_statement.py @@ -0,0 +1,227 @@ +# SPDX-License-Identifier: LGPL-2.1-only +# Copyright 2022 Jookia + +import enum + +from hypothesis import assume, given +from hypothesis.strategies import composite, integers, lists + +from src.ast_types import Statement +from src.parse2.parse import ( + ParseContext, + ParseError, + ParseErrorException, + ParseTask, + Parser, + read_token, +) +from tests.parse2.templates import ( + template_test_valid, + template_test_invalid, +) +from tests.parse2.test_error import static_parse_context +from tests.parse2.test_token import ( + draw_token_known, + draw_token_random, + draw_token_unknown, + static_token_by_value, +) + + +# Values indicating what a parser did +class ParserMockAction(enum.Enum): + PARSE_VALUE = enum.auto() + WRONG_VALUE = enum.auto() + + +# Dummy Parser for testing statement parsing +# Return a static value of ParserMockACTION.PARSE_VALUE if a token starts with "TestValue" +# Otherwise throw an error of ParseMockAction.WRONG_VALUE +class ParserStatementMock(Parser): + def parse_value(self, stream, context): + token = read_token(stream, None, context) + if token.value.startswith("TestValue"): + return ParserMockAction.PARSE_VALUE + else: + raise ParseErrorException( + ParserMockAction.WRONG_VALUE, token, None, context + ) + + +# Creates a dummy parse function with a terminator specified by the last token +def make_test_parser(tokens): + def parser(stream, context): + if tokens == []: + terminator = "" + else: + terminator = tokens[-1].value + return ParserStatementMock().parse_statement(stream, context, terminator) + + return parser + + +# Draws a statement value with a somewhat random name +@composite +def draw_token_statement_value(draw): + number = draw(integers()) + return static_token_by_value("TestValue" + str(number)) + + +# Draws a statement name +@composite +def draw_token_statement_name(draw): + return draw(draw_token_unknown()) + + +# Draws a statement terminator +@composite +def draw_token_statement_terminator(draw): + return draw(draw_token_random()) + + +# Creates a context for a token in a statement +def make_test_context(parent_context, index, statement_token, token): + statement_context = ParseContext( + ParseTask.PARSE_STATEMENT, statement_token, parent_context + ) + if index == 0: + context = ParseTask.PARSE_SUBJECT + elif index == 1: + context = ParseTask.PARSE_VERB + else: + context = ParseTask.PARSE_ARGUMENT + context = ParseContext(context, token, statement_context) + return context + + +# Creates a context using existing tokens +def make_test_context_tokens(parent_context, index, tokens): + return make_test_context(parent_context, index, tokens[0], tokens[index]) + + +# Draws a valid statement's tokens +@composite +def draw_token_statement(draw): + values = draw(lists(draw_token_statement_value(), min_size=1)) + subject = values[0] + verb = [] + if len(values) > 1: + verb = [draw(draw_token_statement_name())] + arguments = values[2:] + terminator = draw(draw_token_statement_terminator()) + assume(terminator not in values) + assume(terminator not in verb) + tokens = [subject] + verb + arguments + [terminator] + return tokens + + +# Draws a valid statement +@composite +def draw_token_statement_valid(draw): + tokens = draw(draw_token_statement()) + subject = ParserMockAction.PARSE_VALUE + verb = None + # Account for the terminator + if len(tokens) > 2: + verb = tokens[1].value + argument_count = len(tokens) - 3 + arguments = [ParserMockAction.PARSE_VALUE] * argument_count + statement = Statement(subject, verb, arguments) + return (tokens, statement) + + +# Draws a statement with an invalid subject or argument +@composite +def draw_token_statement_invalid_value(draw): + tokens = draw(draw_token_statement()) + new_token = draw(draw_token_random()) + assume(not new_token.value.startswith("TestValue")) # Not a value + assume(new_token.value != tokens[-1].value) # Not the terminator + max_position = len(tokens) - 2 # Ignore Terminator + position = draw(integers(min_value=0, max_value=max_position)) + assume(position != 1) # Skip Verb + new_tokens = tokens[:position] + [new_token] + tokens[position + 1 :] + return (new_tokens, new_token, position) + + +# Tests parsing a valid statement +# We expect the following behaviour: +# - A value is read as the subject +# - Optionally, a name is read as the verb +# - Optionally, any number of arguments are read as values +# - A terminator is found afterwards +# template_test_valid provides general parsing properties +@given(draw_token_statement_valid()) +def test_parse2_statement_valid(valid_data): + (tokens, expected) = valid_data + parser = make_test_parser(tokens) + template_test_valid(parser, tokens, expected) + + +# Tests parsing a statement without a terminator +# This also covers cases of premature truncation for verbs and arguments +# We expect the following behaviour: +# - Error reading a verb or argument +# - Have ParseTask.PARSE_VERB or ParseTask.PARSE_ARGUMENT as the context's parse task +# - Have ParseTask.PARSE_STATEMENT as the context's parse task's parent +# - Have ParseError.NO_TOKEN as the exception code +# template_test_invalid provides general parsing properties +@given(draw_token_statement()) +def test_parse2_statement_invalid_no_terminator(tokens): + truncated = tokens[:-1] + parent_context = static_parse_context() + context = make_test_context(parent_context, len(truncated), tokens[0], None) + error = ParseErrorException(ParseError.NO_TOKEN, None, None, context) + parser = make_test_parser(tokens) + template_test_invalid(parser, parent_context, truncated, error) + + +# Tests parsing a statement with an invalid value +# We expect the following behaviour: +# - Error reading a invalid value on subject or argument +# - Have ParseTask.PARSE_SUBJECT or ParseTask.PARSE_ARGUMENT as the context's parse task +# - Have ParseTask.PARSE_STATEMENT as the context's parse task's parent +# - Have ParserMockAction.WRONG_VALUE as the exception code +# template_test_invalid provides general parsing properties +@given(draw_token_statement_invalid_value()) +def test_parse2_statement_invalid_value(invalid): + (new_tokens, new_value, position) = invalid + parent_context = static_parse_context() + context = make_test_context_tokens(parent_context, position, new_tokens) + error = ParseErrorException(ParserMockAction.WRONG_VALUE, new_value, None, context) + parser = make_test_parser(new_tokens) + template_test_invalid(parser, parent_context, new_tokens, error) + + +# Tests parsing a statement with an invalid verb +# We expect the following behaviour: +# - Error reading a known token as a verb +# - Have ParseTask.PARSE_VERB as the context's parse task +# - Have ParseTask.PARSE_STATEMENT as the context's parse task's parent +# - Have ParseError.RESERVED_NAME as the exception code +# template_test_invalid provides general parsing properties +@given(draw_token_statement(), draw_token_known()) +def test_parse2_statement_invalid_verb(tokens, new_token): + assume(new_token.value != tokens[-1].value) + new_tokens = tokens[:1] + [new_token] + tokens[1:] + parent_context = static_parse_context() + context = make_test_context_tokens(parent_context, 1, new_tokens) + error = ParseErrorException(ParseError.RESERVED_NAME, new_token, None, context) + parser = make_test_parser(new_tokens) + template_test_invalid(parser, parent_context, new_tokens, error) + + +# Tests parsing an empty statement +# We expect the following behaviour: +# - Error reading an empty statement +# - Have ParseTask.PARSE_SUBJECT as the context's parse task +# - Have ParseTask.PARSE_STATEMENT as the context's parse task's parent +# - Have ParserError.NO_TOKEN as the exception code +# template_test_invalid provides general parsing properties +def test_parse2_statement_invalid_empty(): + tokens = [] + parent_context = static_parse_context() + context = make_test_context(parent_context, 0, None, None) + error = ParseErrorException(ParseError.NO_TOKEN, None, None, context) + parser = make_test_parser(tokens) + template_test_invalid(parser, parent_context, tokens, error) diff --git a/tests/parse2/test_text.py b/tests/parse2/test_text.py new file mode 100644 index 0000000..17ef21a --- /dev/null +++ b/tests/parse2/test_text.py @@ -0,0 +1,132 @@ +# SPDX-License-Identifier: LGPL-2.1-only +# Copyright 2022 Jookia + +from hypothesis import assume, given +from hypothesis.strategies import ( + composite, + lists, +) + +from src.ast_types import Text +from src.parse2.parse import ( + ParseContext, + ParseError, + ParseErrorException, + ParseTask, + Parser, +) +from tests.parse2.templates import ( + draw_random_within, + template_test_invalid, + template_test_valid, +) +from tests.parse2.test_error import static_parse_context +from tests.parse2.test_token import ( + static_token_by_value, + draw_token_random, +) + + +# Draws a random token suitable for text building +@composite +def draw_text_value_token(draw): + token = draw(draw_token_random()) + assume(token.value not in ["StartText", "EndText"]) + return token + + +# Draws a random token that isn't StartText token +@composite +def draw_token_not_starttext(draw): + token = draw(draw_token_random()) + assume(token.value != "StartText") + return token + + +# Draws tokens to make a valid text string and its value +@composite +def draw_token_text_valid(draw): + tokens = draw(lists(draw_text_value_token())) + buffer = "" + for token in tokens: + buffer += token.value + " " + value = buffer[:-1] # Drop trailing space + start = static_token_by_value("StartText") + end = static_token_by_value("EndText") + all_tokens = [start] + tokens + [end] + return (all_tokens, Text(value)) + + +# Draws just the tokens of a valid text string +@composite +def draw_token_text_valid_tokens(draw): + (tokens, _) = draw(draw_token_text_valid()) + return tokens + + +# Tests parse_text works correctly +# We expect the following behaviour: +# - The resulting text is the value of tokens between StartText and EndText +# - The value of the tokens is joined by U+0020 SPACE code points +# - The Token's value is the resulting text +# template_test_valid provides general parsing properties +@given(draw_token_text_valid()) +def test_parse2_text_valid(valid_data): + (tokens, expected) = valid_data + template_test_valid(Parser().parse_text, tokens, expected) + + +# Test parsing text without StartText +# We expect the following behaviour: +# - Error if StartText's token value is not "StartText" +# - Have ParseError.PARSE_TEXT as the exception code +# - Have ParseTask.PARSE_TEXT as the context's parse task +@given(draw_token_text_valid_tokens(), draw_token_not_starttext()) +def test_parse2_text_invalid_nostarttext(tokens, not_starttext): + new_tokens = [not_starttext] + tokens[1:0] + parent_context = static_parse_context() + context = ParseContext(ParseTask.PARSE_TEXT, new_tokens[0], parent_context) + error = ParseErrorException( + ParseError.WRONG_TOKEN, not_starttext, "StartText", context + ) + template_test_invalid(Parser().parse_text, parent_context, new_tokens, error) + + +# Tests parsing empty text +# We expect the following behaviour: +# - Error if there is no StartText token at all +# - Have ParseError.NO_TOKEN as the exception code +# - Have ParseTask.PARSE_TEXT as the context's parse task +def test_parse2_text_invalid_empty(): + parent_context = static_parse_context() + context = ParseContext(ParseTask.PARSE_TEXT, None, parent_context) + error = ParseErrorException(ParseError.NO_TOKEN, None, None, context) + template_test_invalid(Parser().parse_text, parent_context, [], error) + + +# Tests parsing text with a StartText token in it +# We expect the following behaviour: +# - Error if a StartText token is in the text content +# - Have ParseError.FOUND_STARTTEXT as the exception code +# - Have ParseTask.PARSE_TEXT as the context's parse task +@given(draw_random_within(draw_token_text_valid_tokens(), "StartText")) +def test_parse2_text_invalid_extrastarttext(within): + (tokens, start) = within + parent_context = static_parse_context() + context = ParseContext(ParseTask.PARSE_TEXT, tokens[0], parent_context) + error = ParseErrorException(ParseError.FOUND_STARTTEXT, start, None, context) + template_test_invalid(Parser().parse_text, parent_context, tokens, error) + + +# Tests parsing text without an EndText token +# We expect the following behaviour: +# - Error if there is no EndText token at all +# - Have ParseError.NO_TOKEN as the exception code +# - Have ParseTask.PARSE_TEXT as the context's parse task +@given(draw_token_text_valid_tokens()) +def test_parse2_text_invalid_noendtext(tokens): + new_tokens = tokens[0:-1] + parent_context = static_parse_context() + context = ParseContext(ParseTask.PARSE_TEXT, tokens[0], parent_context) + error = ParseErrorException(ParseError.NO_TOKEN, None, None, context) + template_test_invalid(Parser().parse_text, parent_context, new_tokens, error) diff --git a/tests/parse2/test_token.py b/tests/parse2/test_token.py new file mode 100644 index 0000000..3e2de0e --- /dev/null +++ b/tests/parse2/test_token.py @@ -0,0 +1,208 @@ +# SPDX-License-Identifier: LGPL-2.1-only +# Copyright 2022 Jookia + +from hypothesis import given, assume +from hypothesis.strategies import ( + booleans, + composite, + integers, + lists, + one_of, + sampled_from, + text, +) + +from src.parse2.token import Token, TokenLocation, TokenStream +from tests.templates import template_test_structure + +# Keywords recognized by the language +keywords = [ + "Done", + "Set", + "To", + "EndSet", + "If", + "Then", + "Else", + "EndIf", + "StartNote", + "EndNote", + "StartText", + "EndText", +] + +# Literals recognized by the language +literals = [ + "True", + "False", +] + + +# Draws a random token location +@composite +def draw_token_location(draw): + line = draw(integers()) + offset = draw(integers()) + filename = draw(text()) + return TokenLocation(line, offset, filename) + + +# Static token location +def static_token_location(): + return TokenLocation(1234, 4321, "Hello world") + + +# Test token location structure +@template_test_structure( + TokenLocation, + draw_token_location(), + line=integers(), + offset=integers(), + file=text(), +) +def test_parse2_token_location_structure(): + pass + + +# Creates a token with a specific value and static location +def static_token_by_value(value): + return Token(value, static_token_location()) + + +# Draws a bool token +@composite +def draw_token_bool(draw): + location = static_token_location() + if draw(booleans()): + value = "True" + else: + value = "False" + return Token(value, location) + + +# Draws a keyword token +@composite +def draw_token_keyword(draw): + location = static_token_location() + value = draw(sampled_from(keywords)) + return Token(value, location) + + +# All strategies used to generate known tokens +known_strategies = [ + draw_token_bool(), + draw_token_keyword(), +] + + +# Draws a random token +@composite +def draw_token_known(draw): + token = draw(one_of(known_strategies)) + return token + + +# Draws an unknown token +@composite +def draw_token_unknown(draw): + location = static_token_location() + value = draw(text(min_size=1)) + assume(value not in literals) + assume(value not in keywords) + return Token(value, location) + + +# Draws a known token and possibly add garbage +# This is to ensure that tokens must completely match a value +@composite +def draw_token_garbled(draw): + token = draw(draw_token_unknown()) + value = token.value + if draw(booleans()): + value = draw(text(min_size=1)) + value + if draw(booleans()): + value = value + draw(text(min_size=1)) + return Token(value, token.location) + + +# All strategies used to generate random tokens +random_strategies = known_strategies + [ + draw_token_unknown(), + draw_token_garbled(), +] + + +# Draws a random token +@composite +def draw_token_random(draw): + token = draw(one_of(random_strategies)) + return token + + +# Static token +def static_token(): + return Token("Hello world!", static_token_location()) + + +# Test token structure +@template_test_structure( + Token, + draw_token_random(), + value=text(), + location=draw_token_location(), +) +def test_parse2_token_token_structure(): + pass + + +# Tests that a token stream pops items correctly +# We expect the following behaviour: +# - All items are popped in order +# - None is returned at the end of the stream +@given(lists(draw_token_random())) +def test_parse2_token_token_stream_pop(tokens): + stream = TokenStream(tokens.copy()) + read = [] + token = stream.pop() + while token is not None: + read.append(token) + token = stream.pop() + assert read == tokens + assert stream.pop() is None + + +# Tests that a token stream peeks items correctly +# We expect the following behaviour: +# - Peeking does not pop any values +# - None is returned at the end of the stream +@given(lists(draw_token_random()), integers(min_value=0, max_value=100)) +def test_parse2_token_token_stream_peek(tokens, times): + stream = TokenStream(tokens.copy()) + token_count = len(stream.tokens) + if token_count == 0: + real_times = times + expected = None + else: + real_times = times % token_count + expected = tokens[0] + for _ in range(0, real_times): + token = stream.peek() + assert token == expected + + +# Tests that peeking and popping don't influence each other +# We expect the following behaviour: +# - Peeking does not influence the next pop call +# - Popping does not influence the next peep call +@given(lists(draw_token_random())) +def test_parse2_token_token_stream_mixed(tokens): + stream = TokenStream(tokens.copy()) + read = [] + token = True + while token is not None: + peeked = stream.peek() + token = stream.pop() + read.append(token) + assert peeked == token + assert read[:-1] == tokens # Skip None at end + assert stream.pop() is None diff --git a/tests/parse2/test_tokenize.py b/tests/parse2/test_tokenize.py new file mode 100644 index 0000000..43b5541 --- /dev/null +++ b/tests/parse2/test_tokenize.py @@ -0,0 +1,258 @@ +# SPDX-License-Identifier: LGPL-2.1-only +# Copyright 2022 Jookia + +from hypothesis import assume, given +from hypothesis.strategies import ( + booleans, + characters, + composite, + just, + lists, + one_of, + sampled_from, + text, +) + +from src.parse2 import tokenize +from src.parse2.token import Token, TokenLocation +from tests.parse2.test_token import static_token_location + + +# Values considered spaces +valid_spaces = [ + "\t", # U+0009 HORIZONTAL TAB + " ", # U+0020 SPACE +] + +# Single values reserved for new line use +single_newlines = [ + "\n", # U+000A LINE FEED + "\v", # U+000B VERTICAL TAB + "\f", # U+000C FORM FEED + "\r", # U+000D CARRIAGE RETURN + "\u0085", # U+0085 NEXT LINE + "\u2028", # U+2028 LINE SEPARATOR + "\u2029", # U+2029 PARAGRAPH SEPARATOR +] + +# Multi values reserved for new line use +multi_newlines = [ + "\r\n", # U+000A U+000D CARRIAGE RETURN then LINE FEED +] + +# All values reserved for new line use +valid_newlines = single_newlines + multi_newlines + + +# Draws a space token +@composite +def draw_token_space(draw): + location = static_token_location() + value = draw(sampled_from(valid_spaces)) + return Token(value, location) + + +# Draws a new line token +@composite +def draw_token_newline(draw): + location = static_token_location() + value = draw(sampled_from(valid_newlines)) + return Token(value, location) + + +# Draws a random token without whitespace +@composite +def draw_token_nospace(draw): + reserved = valid_spaces + single_newlines + location = static_token_location() + chars = characters(blacklist_characters=reserved) + value = draw(text(alphabet=chars, min_size=1)) + for v in multi_newlines: + assume(v not in value) + return Token(value, location) + + +# Draws a random token perhaps with whitespaces +@composite +def draw_token_maybespace(draw): + strategies = [ + draw_token_space(), + draw_token_newline(), + draw_token_nospace(), + ] + return draw(one_of(strategies)) + + +# Draws a token using an existing strategy but with a blank location just like split_tokens outputs +@composite +def draw_token_splitted(draw, strategy): + token = draw(strategy) + location = TokenLocation(1, 1, "") + return Token(token.value, location) + + +# Merges \r and \n tokens to \r\n tokens +def merge_crlf(tokens): + if len(tokens) < 2: + return tokens + prev = tokens[0] + merged = [] + for curr in tokens[1:]: + if prev.value == "\r" and curr.value == "\n": + # Previous token is \r, don't append it + # Instead promote this \n token to \r\n + prev = Token("\r\n", prev.location) + else: + # Append the previous token + merged.append(prev) + prev = curr + merged.append(prev) + return merged + + +# Generates an alternating sequence of unknown or whitespace tokens +# intended for splitting in to separate tokens +@composite +def draw_tokens_to_split(draw): + source = "" + tokens = [] + elements = draw(lists(just(True))) # Dummy list for sizing + drawing_whitespace = draw(booleans()) + for _ in elements: + if drawing_whitespace: + # Multiple whitespaces get split in to multiple tokens + strategy = one_of([draw_token_space(), draw_token_newline()]) + locationed = draw_token_splitted(strategy) + spaces = draw(lists(locationed, min_size=1)) + tokens += merge_crlf(spaces) + else: + locationed = draw_token_splitted(draw_token_nospace()) + tokens.append(draw(locationed)) + drawing_whitespace = not drawing_whitespace + for t in tokens: + source += t.value + return (source, tokens) + + +# Test that the tokenizer can split tokens properly +# We expect the following behaviour: +# - Whitespace is any of the following Unicode sequences: +# U+0009 HORIZONTAL TAB +# U+000A LINE FEED +# U+000B VERTICAL TAB +# U+000C FORM FEED +# U+000D CARRIAGE RETURN +# U+000D U+000A CARRIAGE RETURN then LINE FEED +# U+0020 SPACE +# U+0085 NEXT LINE +# U+2028 LINE SEPARATOR +# U+2029 PARAGRAPH SEPARATOR +# - Non-whitespace is anything else +# - Whitespace and non-whitespace are separated in to separate tokens +# - Whitespace sequences are split in to multiple adjacent tokens +# - Non-whitespace code points are combined in to a single token +# - Each token location is line 1 offset 1 of file "" +@given(draw_tokens_to_split()) +def test_parse2_tokenize_split_tokens(test_data): + (source, tokens) = test_data + assert tokenize.split_tokens(source) == tokens + + +# Generates a list of tokens with correct locations +@composite +def draw_tokens_locations(draw): + tokens = draw(lists(draw_token_maybespace())) + filename = draw(text()) + located = [] + line = 1 + offset = 1 + for t in tokens: + location = TokenLocation(line, offset, filename) + new = Token(t.value, location) + located.append(new) + if t.value in valid_newlines: + line = line + 1 + offset = 1 + else: + offset += len(t.value) + return (tokens, located, filename) + + +# Test that the tokenizer can determine locations +# We expect the following behaviour: +# - New line tokens are tokens with one of the following Unicode sequences: +# U+000A LINE FEED +# U+000B VERTICAL TAB +# U+000C FORM FEED +# U+000D CARRIAGE RETURN +# U+000D U+000A CARRIAGE RETURN then LINE FEED +# U+0085 NEXT LINE +# U+2028 LINE SEPARATOR +# U+2029 PARAGRAPH SEPARATOR +# - Only the token location is modified +# - Each token's location filename is the generated filename +# - A token's line is equal to 1 plus the number of new line tokens +# before the token +# - A token's offset is equal to 1 plus the sum of previous token's +# value lengths 'before' it, where 'before' is defined as any +# token between the last new line token (or start of file) before the token +# and the token itself +@given(draw_tokens_locations()) +def test_parse2_tokenize_locations(test_data): + (input, located, filename) = test_data + assert tokenize.locate_tokens(input, filename) == located + + +# Generates two list of tokens: One with whitespace and one without +@composite +def draw_tokens_whitespace(draw): + input = draw(lists(draw_token_maybespace())) + stripped = [] + for t in input: + is_whitespace = t.value in valid_spaces or t.value in valid_newlines + if not is_whitespace: + stripped.append(t) + return (input, stripped) + + +# Test that the tokenizer can strip whitespace correctly +# We expect the following behaviour: +# - No tokens are modified +# - Tokens with the following values are removed from the output: +# U+0009 HORIZONTAL TAB +# U+000A LINE FEED +# U+000B VERTICAL TAB +# U+000C FORM FEED +# U+000D CARRIAGE RETURN +# U+000D U+000A CARRIAGE RETURN then LINE FEED +# U+0020 SPACE +# U+0085 NEXT LINE +# U+2028 LINE SEPARATOR +# U+2029 PARAGRAPH SEPARATOR +@given(draw_tokens_whitespace()) +def test_parse2_tokenize_strip_whitespace(test_data): + (input, tokens) = test_data + assert tokenize.strip_whitespace(input) == tokens + + +# Draw a random string made of token values +@composite +def draw_source_fuzz(draw): + tokens = draw(lists(draw_token_maybespace())) + input = "" + for t in tokens: + input += t.value + return input + + +# Test that the tokenize function behaves as we expect +# We expect the following behaviour: +# - The tokenizer splits the tokens as expected, then +# - The tokenizer sets the token locations as expected +@given(draw_source_fuzz(), text()) +def test_parse2_tokenize_fuzz(source, filename): + split = tokenize.split_tokens(source) + located = tokenize.locate_tokens(split, filename) + stripped = tokenize.strip_whitespace(located) + tokenized = tokenize.tokenize(source, filename) + assert stripped == tokenized diff --git a/tests/parse2/test_value.py b/tests/parse2/test_value.py new file mode 100644 index 0000000..2b38651 --- /dev/null +++ b/tests/parse2/test_value.py @@ -0,0 +1,153 @@ +# SPDX-License-Identifier: LGPL-2.1-only +# Copyright 2022 Jookia + +import enum + +from hypothesis import assume, given +from hypothesis.strategies import composite, just, one_of + +from src.parse2.parse import ( + ParseContext, + ParseError, + ParseErrorException, + ParseTask, + Parser, +) +from tests.parse2.templates import template_test_valid, template_test_invalid +from tests.parse2.test_error import static_parse_context +from tests.parse2.test_token import ( + draw_token_keyword, + draw_token_unknown, + static_token_by_value, +) + + +# Values indicating what a parser did +class ParserMockAction(enum.Enum): + PARSE_BOOL = enum.auto() + PARSE_TEXT = enum.auto() + PARSE_REFERENCE = enum.auto() + + +# Dummy Parser for testing value parsing +# Instead of actually parsing values just return a static value to show what +# the parser would normally do +class ParserValueMockValid(Parser): + def parse_bool(self, stream, context): + stream.pop() + return ParserMockAction.PARSE_BOOL + + def parse_text(self, stream, context): + stream.pop() + return ParserMockAction.PARSE_TEXT + + def parse_reference(self, stream, context): + stream.pop() + return ParserMockAction.PARSE_REFERENCE + + +# Dummy Parser for testing error propagation +# Uses parser mock values as errors +class ParserValueMockError(Parser): + def parse_bool(self, stream, context): + raise ParseErrorException(ParserMockAction.PARSE_BOOL, None, None, context) + + def parse_text(self, stream, context): + raise ParseErrorException(ParserMockAction.PARSE_TEXT, None, None, context) + + def parse_reference(self, stream, context): + raise ParseErrorException(ParserMockAction.PARSE_REFERENCE, None, None, context) + + +# Generates a strategy for a valid word and parser action +def token_and_action(word, action): + return just(([static_token_by_value(word)], action)) + + +# Draws tokens for values based on literals +@composite +def draw_token_value_literal(draw): + strategies = [ + token_and_action("True", ParserMockAction.PARSE_BOOL), + token_and_action("False", ParserMockAction.PARSE_BOOL), + token_and_action("StartText", ParserMockAction.PARSE_TEXT), + ] + return draw(one_of(strategies)) + + +# Draws tokens to make a value based on a reference +@composite +def draw_token_value_reference(draw): + token = draw(draw_token_unknown()) + return ([token], ParserMockAction.PARSE_REFERENCE) + + +# Draws tokens and valid value for a valid value +@composite +def draw_token_value_valid(draw): + strategies = [ + draw_token_value_literal(), + draw_token_value_reference(), + ] + return draw(one_of(strategies)) + + +# Tests parsing a literal value +# We expect the following behaviour: +# - parse_value parses a Bool if it sees True or False +# - parse_value parses a Text if it sees StartText +# template_test_valid provides general parsing properties +@given(draw_token_value_literal()) +def test_parse2_value_literal(literal): + (tokens, expected) = literal + template_test_valid(ParserValueMockValid().parse_value, tokens, expected) + + +# Tests parsing a reference value +# We expect the following behaviour: +# - parse_value parses a Reference if it sees an unknown value +# template_test_valid provides general parsing properties +@given(draw_token_value_reference()) +def test_parse2_value_reference(reference): + (tokens, expected) = reference + template_test_valid(ParserValueMockValid().parse_value, tokens, expected) + + +# Tests parsing a keyword as a value fails +# We expect the following behaviour: +# - Error if a keyword is encountered +# - Have ParseError.RESERVED_NAME as the exception code +# - Have ParseTask.PARSE_VALUE as the context's parse task +@given(draw_token_keyword()) +def test_parse2_value_invalid_name(token): + assume(token.value != "StartText") + parent_context = static_parse_context() + context = ParseContext(ParseTask.PARSE_VALUE, token, parent_context) + error = ParseErrorException(ParseError.RESERVED_NAME, token, None, context) + template_test_invalid(Parser().parse_value, parent_context, [token], error) + + +# Tests parsing empty value +# We expect the following behaviour: +# - Have ParseError.NO_TOKEN as the exception code +# - Have ParseTask.PARSE_VALUE as the context's parse task +def test_parse2_value_invalid_empty(): + parent_context = static_parse_context() + context = ParseContext(ParseTask.PARSE_VALUE, None, parent_context) + error = ParseErrorException(ParseError.NO_TOKEN, None, None, context) + template_test_invalid(Parser().parse_value, parent_context, [], error) + + +# Tests parse_value error propagation +# We expect the following behaviour: +# - Errors from parsing are propagated and have the correct context +# - Have ParseTask.PARSE_VALUE as the context's parse task +@given(draw_token_value_valid()) +def test_parse2_value_error_propagation(valid_data): + (tokens, action) = valid_data + parent_context = static_parse_context() + context = ParseContext(ParseTask.PARSE_VALUE, tokens[0], parent_context) + error = ParseErrorException(action, None, None, context) + template_test_invalid( + ParserValueMockError().parse_value, parent_context, tokens, error + ) diff --git a/tests/test_token.py b/tests/test_token.py deleted file mode 100644 index 140b057..0000000 --- a/tests/test_token.py +++ /dev/null @@ -1,208 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-only -# Copyright 2022 Jookia - -from hypothesis import given, assume -from hypothesis.strategies import ( - booleans, - composite, - integers, - lists, - one_of, - sampled_from, - text, -) - -from src.token import Token, TokenLocation, TokenStream -from tests.templates import template_test_structure - -# Keywords recognized by the language -keywords = [ - "Done", - "Set", - "To", - "EndSet", - "If", - "Then", - "Else", - "EndIf", - "StartNote", - "EndNote", - "StartText", - "EndText", -] - -# Literals recognized by the language -literals = [ - "True", - "False", -] - - -# Draws a random token location -@composite -def draw_token_location(draw): - line = draw(integers()) - offset = draw(integers()) - filename = draw(text()) - return TokenLocation(line, offset, filename) - - -# Static token location -def static_token_location(): - return TokenLocation(1234, 4321, "Hello world") - - -# Test token location structure -@template_test_structure( - TokenLocation, - draw_token_location(), - line=integers(), - offset=integers(), - file=text(), -) -def test_token_location_structure(): - pass - - -# Creates a token with a specific value and static location -def static_token_by_value(value): - return Token(value, static_token_location()) - - -# Draws a bool token -@composite -def draw_token_bool(draw): - location = static_token_location() - if draw(booleans()): - value = "True" - else: - value = "False" - return Token(value, location) - - -# Draws a keyword token -@composite -def draw_token_keyword(draw): - location = static_token_location() - value = draw(sampled_from(keywords)) - return Token(value, location) - - -# All strategies used to generate known tokens -known_strategies = [ - draw_token_bool(), - draw_token_keyword(), -] - - -# Draws a random token -@composite -def draw_token_known(draw): - token = draw(one_of(known_strategies)) - return token - - -# Draws an unknown token -@composite -def draw_token_unknown(draw): - location = static_token_location() - value = draw(text(min_size=1)) - assume(value not in literals) - assume(value not in keywords) - return Token(value, location) - - -# Draws a known token and possibly add garbage -# This is to ensure that tokens must completely match a value -@composite -def draw_token_garbled(draw): - token = draw(draw_token_unknown()) - value = token.value - if draw(booleans()): - value = draw(text(min_size=1)) + value - if draw(booleans()): - value = value + draw(text(min_size=1)) - return Token(value, token.location) - - -# All strategies used to generate random tokens -random_strategies = known_strategies + [ - draw_token_unknown(), - draw_token_garbled(), -] - - -# Draws a random token -@composite -def draw_token_random(draw): - token = draw(one_of(random_strategies)) - return token - - -# Static token -def static_token(): - return Token("Hello world!", static_token_location()) - - -# Test token structure -@template_test_structure( - Token, - draw_token_random(), - value=text(), - location=draw_token_location(), -) -def test_token_token_structure(): - pass - - -# Tests that a token stream pops items correctly -# We expect the following behaviour: -# - All items are popped in order -# - None is returned at the end of the stream -@given(lists(draw_token_random())) -def test_token_token_stream_pop(tokens): - stream = TokenStream(tokens.copy()) - read = [] - token = stream.pop() - while token is not None: - read.append(token) - token = stream.pop() - assert read == tokens - assert stream.pop() is None - - -# Tests that a token stream peeks items correctly -# We expect the following behaviour: -# - Peeking does not pop any values -# - None is returned at the end of the stream -@given(lists(draw_token_random()), integers(min_value=0, max_value=100)) -def test_token_token_stream_peek(tokens, times): - stream = TokenStream(tokens.copy()) - token_count = len(stream.tokens) - if token_count == 0: - real_times = times - expected = None - else: - real_times = times % token_count - expected = tokens[0] - for _ in range(0, real_times): - token = stream.peek() - assert token == expected - - -# Tests that peeking and popping don't influence each other -# We expect the following behaviour: -# - Peeking does not influence the next pop call -# - Popping does not influence the next peep call -@given(lists(draw_token_random())) -def test_token_token_stream_mixed(tokens): - stream = TokenStream(tokens.copy()) - read = [] - token = True - while token is not None: - peeked = stream.peek() - token = stream.pop() - read.append(token) - assert peeked == token - assert read[:-1] == tokens # Skip None at end - assert stream.pop() is None diff --git a/tests/test_tokenize.py b/tests/test_tokenize.py deleted file mode 100644 index 20cb4c7..0000000 --- a/tests/test_tokenize.py +++ /dev/null @@ -1,258 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-only -# Copyright 2022 Jookia - -from hypothesis import assume, given -from hypothesis.strategies import ( - booleans, - characters, - composite, - just, - lists, - one_of, - sampled_from, - text, -) - -from src import tokenize -from src.token import Token, TokenLocation -from tests.test_token import static_token_location - - -# Values considered spaces -valid_spaces = [ - "\t", # U+0009 HORIZONTAL TAB - " ", # U+0020 SPACE -] - -# Single values reserved for new line use -single_newlines = [ - "\n", # U+000A LINE FEED - "\v", # U+000B VERTICAL TAB - "\f", # U+000C FORM FEED - "\r", # U+000D CARRIAGE RETURN - "\u0085", # U+0085 NEXT LINE - "\u2028", # U+2028 LINE SEPARATOR - "\u2029", # U+2029 PARAGRAPH SEPARATOR -] - -# Multi values reserved for new line use -multi_newlines = [ - "\r\n", # U+000A U+000D CARRIAGE RETURN then LINE FEED -] - -# All values reserved for new line use -valid_newlines = single_newlines + multi_newlines - - -# Draws a space token -@composite -def draw_token_space(draw): - location = static_token_location() - value = draw(sampled_from(valid_spaces)) - return Token(value, location) - - -# Draws a new line token -@composite -def draw_token_newline(draw): - location = static_token_location() - value = draw(sampled_from(valid_newlines)) - return Token(value, location) - - -# Draws a random token without whitespace -@composite -def draw_token_nospace(draw): - reserved = valid_spaces + single_newlines - location = static_token_location() - chars = characters(blacklist_characters=reserved) - value = draw(text(alphabet=chars, min_size=1)) - for v in multi_newlines: - assume(v not in value) - return Token(value, location) - - -# Draws a random token perhaps with whitespaces -@composite -def draw_token_maybespace(draw): - strategies = [ - draw_token_space(), - draw_token_newline(), - draw_token_nospace(), - ] - return draw(one_of(strategies)) - - -# Draws a token using an existing strategy but with a blank location just like split_tokens outputs -@composite -def draw_token_splitted(draw, strategy): - token = draw(strategy) - location = TokenLocation(1, 1, "") - return Token(token.value, location) - - -# Merges \r and \n tokens to \r\n tokens -def merge_crlf(tokens): - if len(tokens) < 2: - return tokens - prev = tokens[0] - merged = [] - for curr in tokens[1:]: - if prev.value == "\r" and curr.value == "\n": - # Previous token is \r, don't append it - # Instead promote this \n token to \r\n - prev = Token("\r\n", prev.location) - else: - # Append the previous token - merged.append(prev) - prev = curr - merged.append(prev) - return merged - - -# Generates an alternating sequence of unknown or whitespace tokens -# intended for splitting in to separate tokens -@composite -def draw_tokens_to_split(draw): - source = "" - tokens = [] - elements = draw(lists(just(True))) # Dummy list for sizing - drawing_whitespace = draw(booleans()) - for _ in elements: - if drawing_whitespace: - # Multiple whitespaces get split in to multiple tokens - strategy = one_of([draw_token_space(), draw_token_newline()]) - locationed = draw_token_splitted(strategy) - spaces = draw(lists(locationed, min_size=1)) - tokens += merge_crlf(spaces) - else: - locationed = draw_token_splitted(draw_token_nospace()) - tokens.append(draw(locationed)) - drawing_whitespace = not drawing_whitespace - for t in tokens: - source += t.value - return (source, tokens) - - -# Test that the tokenizer can split tokens properly -# We expect the following behaviour: -# - Whitespace is any of the following Unicode sequences: -# U+0009 HORIZONTAL TAB -# U+000A LINE FEED -# U+000B VERTICAL TAB -# U+000C FORM FEED -# U+000D CARRIAGE RETURN -# U+000D U+000A CARRIAGE RETURN then LINE FEED -# U+0020 SPACE -# U+0085 NEXT LINE -# U+2028 LINE SEPARATOR -# U+2029 PARAGRAPH SEPARATOR -# - Non-whitespace is anything else -# - Whitespace and non-whitespace are separated in to separate tokens -# - Whitespace sequences are split in to multiple adjacent tokens -# - Non-whitespace code points are combined in to a single token -# - Each token location is line 1 offset 1 of file "" -@given(draw_tokens_to_split()) -def test_tokenize_split_tokens(test_data): - (source, tokens) = test_data - assert tokenize.split_tokens(source) == tokens - - -# Generates a list of tokens with correct locations -@composite -def draw_tokens_locations(draw): - tokens = draw(lists(draw_token_maybespace())) - filename = draw(text()) - located = [] - line = 1 - offset = 1 - for t in tokens: - location = TokenLocation(line, offset, filename) - new = Token(t.value, location) - located.append(new) - if t.value in valid_newlines: - line = line + 1 - offset = 1 - else: - offset += len(t.value) - return (tokens, located, filename) - - -# Test that the tokenizer can determine locations -# We expect the following behaviour: -# - New line tokens are tokens with one of the following Unicode sequences: -# U+000A LINE FEED -# U+000B VERTICAL TAB -# U+000C FORM FEED -# U+000D CARRIAGE RETURN -# U+000D U+000A CARRIAGE RETURN then LINE FEED -# U+0085 NEXT LINE -# U+2028 LINE SEPARATOR -# U+2029 PARAGRAPH SEPARATOR -# - Only the token location is modified -# - Each token's location filename is the generated filename -# - A token's line is equal to 1 plus the number of new line tokens -# before the token -# - A token's offset is equal to 1 plus the sum of previous token's -# value lengths 'before' it, where 'before' is defined as any -# token between the last new line token (or start of file) before the token -# and the token itself -@given(draw_tokens_locations()) -def test_tokenize_locations(test_data): - (input, located, filename) = test_data - assert tokenize.locate_tokens(input, filename) == located - - -# Generates two list of tokens: One with whitespace and one without -@composite -def draw_tokens_whitespace(draw): - input = draw(lists(draw_token_maybespace())) - stripped = [] - for t in input: - is_whitespace = t.value in valid_spaces or t.value in valid_newlines - if not is_whitespace: - stripped.append(t) - return (input, stripped) - - -# Test that the tokenizer can strip whitespace correctly -# We expect the following behaviour: -# - No tokens are modified -# - Tokens with the following values are removed from the output: -# U+0009 HORIZONTAL TAB -# U+000A LINE FEED -# U+000B VERTICAL TAB -# U+000C FORM FEED -# U+000D CARRIAGE RETURN -# U+000D U+000A CARRIAGE RETURN then LINE FEED -# U+0020 SPACE -# U+0085 NEXT LINE -# U+2028 LINE SEPARATOR -# U+2029 PARAGRAPH SEPARATOR -@given(draw_tokens_whitespace()) -def test_tokenize_strip_whitespace(test_data): - (input, tokens) = test_data - assert tokenize.strip_whitespace(input) == tokens - - -# Draw a random string made of token values -@composite -def draw_source_fuzz(draw): - tokens = draw(lists(draw_token_maybespace())) - input = "" - for t in tokens: - input += t.value - return input - - -# Test that the tokenize function behaves as we expect -# We expect the following behaviour: -# - The tokenizer splits the tokens as expected, then -# - The tokenizer sets the token locations as expected -@given(draw_source_fuzz(), text()) -def test_tokenize_fuzz(source, filename): - split = tokenize.split_tokens(source) - located = tokenize.locate_tokens(split, filename) - stripped = tokenize.strip_whitespace(located) - tokenized = tokenize.tokenize(source, filename) - assert stripped == tokenized