# SPDX-License-Identifier: LGPL-2.1-only # Copyright 2022 Jookia <contact@jookia.org> from src.i18n import Message from src.ast_types import Bool, Conditional, Reference, Set, Statement, Text from src.token import TokenStream # Words that can't be used as references # This should include keywords and literals reserved_names = [ "Done", "Set", "To", "EndSet", "If", "Then", "Else", "EndIf", "StartNote", "EndNote", "StartText", "EndText", "True", "False", ] # Tasks that happen during parsing class ParseTask: TEST_TASK = 1 # pragma: no mutate PARSE_NOTE = 2 # pragma: no mutate CLEAR_NOTES = 3 # pragma: no mutate PARSE_TEXT = 4 # pragma: no mutate PARSE_BOOL = 5 # pragma: no mutate PARSE_REFERENCE = 6 # pragma: no mutate PARSE_VALUE = 7 # pragma: no mutate PARSE_STATEMENT = 8 # pragma: no mutate PARSE_SUBJECT = 9 # pragma: no mutate PARSE_VERB = 10 # pragma: no mutate PARSE_ARGUMENT = 11 # pragma: no mutate PARSE_SET = 12 # pragma: no mutate PARSE_CONDITIONAL = 13 # pragma: no mutate PARSE_TEST = 14 # pragma: no mutate PARSE_SUCCESS = 15 # pragma: no mutate PARSE_FAILURE = 16 # pragma: no mutate PARSE_DIRECTIVE = 17 # pragma: no mutate PARSE_FILE = 18 # pragma: no mutate MAX = 19 # pragma: no mutate # Returns a list of all tasks def list(): return list(range(1, ParseTask.MAX)) # pragma: no mutate # Message identifiers for ParseTasks ParseTaskMessageIDs = { ParseTask.TEST_TASK: "ParseTaskTestTask", ParseTask.PARSE_NOTE: "ParseTaskNote", ParseTask.CLEAR_NOTES: "ParseTaskClearNotes", ParseTask.PARSE_TEXT: "ParseTaskText", ParseTask.PARSE_BOOL: "ParseTaskBool", ParseTask.PARSE_REFERENCE: "ParseTaskReference", ParseTask.PARSE_VALUE: "ParseTaskValue", ParseTask.PARSE_STATEMENT: "ParseTaskStatement", ParseTask.PARSE_SUBJECT: "ParseTaskSubject", ParseTask.PARSE_VERB: "ParseTaskVerb", ParseTask.PARSE_ARGUMENT: "ParseTaskArgument", ParseTask.PARSE_SET: "ParseTaskSet", ParseTask.PARSE_CONDITIONAL: "ParseTaskConditional", ParseTask.PARSE_TEST: "ParseTaskTest", ParseTask.PARSE_SUCCESS: "ParseTaskSuccess", ParseTask.PARSE_FAILURE: "ParseTaskFailure", ParseTask.PARSE_DIRECTIVE: "ParseTaskDirective", ParseTask.PARSE_FILE: "ParseTaskFile", } # Context used for parse error exception class ParseContext: def __init__(self, task, token, parent): self.task = task self.token = token self.parent = parent def __repr__(self): return ( "ParseContext(task %s, token %s, parent\n %s)" # pragma: no mutate % ( # pragma: no mutate self.task, self.token, self.parent, ) ) def __eq__(self, other): if other is None: return False return ( self.task == other.task and self.token == other.token and self.parent == other.parent ) # Errors that can happen when parsing class ParseError: TEST_ERROR = 1 # pragma: no mutate NO_TOKEN = 2 # pragma: no mutate WRONG_TOKEN = 3 # pragma: no mutate FOUND_STARTTEXT = 4 # pragma: no mutate FOUND_STARTNOTE = 5 # pragma: no mutate NOT_BOOL = 6 # pragma: no mutate FOUND_ENDNOTE = 7 # pragma: no mutate RESERVED_NAME = 8 # pragma: no mutate FOUND_TERMINATOR = 9 # pragma: no mutate MAX = 10 # pragma: no mutate # Returns a list of all errors def list(): return list(range(1, ParseError.MAX)) # pragma: no mutate # Message identifiers for ParseErrors ParseErrorMessageIDs = { ParseError.TEST_ERROR: "ParseErrorTestError", ParseError.NO_TOKEN: "ParseErrorNoToken", ParseError.WRONG_TOKEN: "ParseErrorWrongToken", ParseError.FOUND_STARTTEXT: "ParseErrorFoundStartText", ParseError.FOUND_STARTNOTE: "ParseErrorFoundStartNote", ParseError.NOT_BOOL: "ParseErrorNotBool", ParseError.FOUND_ENDNOTE: "ParseErrorFoundEndNote", ParseError.RESERVED_NAME: "ParseErrorReservedName", ParseError.FOUND_TERMINATOR: "ParseErrorFoundTerminator", } # Exception thrown when a parse error is encountered class ParseErrorException(BaseException): def __init__(self, error, token, expected, context): self.error = error self.token = token self.expected = expected self.context = context def __repr__(self): return ( "ParseErrorException(error %s, token %s, expected %s, context %s)" # pragma: no mutate % ( # pragma: no mutate self.error, self.token, self.expected, self.context, ) ) def __eq__(self, other): if other is None: return False return ( self.error == other.error and self.token == other.token and self.expected == other.expected and self.context == other.context ) # Reads a token, possibly of a certain value def read_token(stream, value, context): t = stream.pop() if t is None: raise ParseErrorException(ParseError.NO_TOKEN, None, None, context) elif value is not None and t.value != value: raise ParseErrorException(ParseError.WRONG_TOKEN, t, value, context) return t # The note skipper in a wrapper class for easy testing class NoteSkipper: # Skip a note def skip_note(self, stream, parent_context): context = ParseContext(ParseTask.PARSE_NOTE, stream.peek(), parent_context) read_token(stream, "StartNote", context) while True: t = read_token(stream, None, context) # Don't allow StartNote in notes if t.value in ["StartNote"]: raise ParseErrorException(ParseError.FOUND_STARTNOTE, t, None, context) # EndNote found, end things elif t.value == "EndNote": break return None # Clear notes def clear_notes(self, stream, parent_context): context = ParseContext(ParseTask.CLEAR_NOTES, stream.peek(), parent_context) tokens = [] token = stream.peek() while token is not None: # Found a note, skip it if token.value == "StartNote": self.skip_note(stream, context) # EndNote found outside note elif token.value == "EndNote": raise ParseErrorException( ParseError.FOUND_ENDNOTE, token, None, context ) # Add the token if it's not note related else: tokens.append(stream.pop()) token = stream.peek() return tokens # The recursive descent parser in a wrapper class for easy testing class Parser: # Parses a text node def parse_text(self, stream, parent_context): context = ParseContext(ParseTask.PARSE_TEXT, stream.peek(), parent_context) buffer = "" t = read_token(stream, "StartText", context) # Parse following tokens while True: t = read_token(stream, None, context) # Don't allow StartText in text if t.value in ["StartText"]: raise ParseErrorException(ParseError.FOUND_STARTTEXT, t, None, context) # EndText found, end things elif t.value == "EndText": break else: buffer += t.value + " " value = buffer[:-1] # Drop trailing space return Text(value) # Parses a boolean node def parse_bool(self, stream, parent_context): context = ParseContext(ParseTask.PARSE_BOOL, stream.peek(), parent_context) t = read_token(stream, None, context) if t.value == "True": return Bool(True) elif t.value == "False": return Bool(False) else: raise ParseErrorException(ParseError.NOT_BOOL, t, None, context) # Parses a reference node def parse_reference(self, stream, parent_context): context = ParseContext(ParseTask.PARSE_REFERENCE, stream.peek(), parent_context) t = read_token(stream, None, context) if t.value in reserved_names: raise ParseErrorException(ParseError.RESERVED_NAME, t, None, context) return Reference(t.value) # Parses a value def parse_value(self, stream, parent_context): context = ParseContext(ParseTask.PARSE_VALUE, stream.peek(), parent_context) t = stream.peek() if t is None: raise ParseErrorException(ParseError.NO_TOKEN, None, None, context) elif t.value in ["True", "False"]: return self.parse_bool(stream, context) elif t.value == "StartText": return self.parse_text(stream, context) elif t.value in reserved_names: raise ParseErrorException(ParseError.RESERVED_NAME, t, None, context) else: return self.parse_reference(stream, context) # Parses a statement until a specified terminator def parse_statement(self, stream, parent_context, terminator): context = ParseContext(ParseTask.PARSE_STATEMENT, stream.peek(), parent_context) peeked_subject = stream.peek() context_subject = ParseContext(ParseTask.PARSE_SUBJECT, peeked_subject, context) if peeked_subject is not None and peeked_subject.value == terminator: raise ParseErrorException( ParseError.FOUND_TERMINATOR, peeked_subject, None, context_subject ) subject = self.parse_value(stream, context_subject) context_verb = ParseContext(ParseTask.PARSE_VERB, stream.peek(), context) verb = read_token(stream, None, context_verb) if verb.value == terminator: return Statement(subject, None, []) elif verb.value in reserved_names: raise ParseErrorException( ParseError.RESERVED_NAME, verb, None, context_verb ) arguments = [] # Parse following arguments while True: peeked_arg = stream.peek() context_arg = ParseContext(ParseTask.PARSE_ARGUMENT, peeked_arg, context) if peeked_arg is not None and peeked_arg.value == terminator: stream.pop() return Statement(subject, verb.value, arguments) arg = self.parse_value(stream, context_arg) # pragma: no mutate arguments.append(arg) # Parses a set node def parse_set(self, stream, parent_context): context = ParseContext(ParseTask.PARSE_SET, stream.peek(), parent_context) read_token(stream, "Set", context) subcontext = ParseContext(ParseTask.PARSE_SUBJECT, stream.peek(), context) subject = read_token(stream, None, subcontext) if subject.value in reserved_names: raise ParseErrorException( ParseError.RESERVED_NAME, subject, None, subcontext ) read_token(stream, "To", context) statement = self.parse_statement(stream, context, "EndSet") return Set(subject.value, statement) # Parses a conditional node def parse_conditional(self, stream, parent_context): context = ParseContext( ParseTask.PARSE_CONDITIONAL, stream.peek(), parent_context ) read_token(stream, "If", context) test_context = ParseContext(ParseTask.PARSE_TEST, stream.peek(), context) test = self.parse_statement(stream, test_context, "Then") success_context = ParseContext(ParseTask.PARSE_SUCCESS, stream.peek(), context) success = self.parse_statement(stream, success_context, "Else") failure_context = ParseContext(ParseTask.PARSE_FAILURE, stream.peek(), context) failure = self.parse_statement(stream, failure_context, "EndIf") return Conditional(test, success, failure) # Parses a directive def parse_directive(self, stream, parent_context): context = ParseContext(ParseTask.PARSE_DIRECTIVE, stream.peek(), parent_context) t = stream.peek() if t is None: raise ParseErrorException(ParseError.NO_TOKEN, None, None, context) elif t.value == "Set": return self.parse_set(stream, context) elif t.value == "If": return self.parse_conditional(stream, context) else: return self.parse_statement(stream, context, "Done") # Parses a file def parse_file(self, stream, parent_context): context = ParseContext(ParseTask.PARSE_FILE, stream.peek(), parent_context) directives = [] next = stream.peek() while next is not None: dir = self.parse_directive(stream, context) # pragma: no mutate directives.append(dir) next = stream.peek() return directives # Parses tokens def parse(tokens, context): stream = TokenStream(tokens) cleared = NoteSkipper().clear_notes(stream, context) stream2 = TokenStream(cleared) parsed = Parser().parse_file(stream2, context) return parsed # Formats a ParseContext def format_context(context): task = Message(ParseTaskMessageIDs[context.task], []) if context.token: file = context.token.location.file line = context.token.location.line offset = context.token.location.offset return Message("ParseContextAt", [task, file, line, offset]) else: return Message("ParseContext", [task]) # Formats a ParseErrorException def format_exception(exception): has_expected = exception.expected is not None has_token = exception.token is not None error = Message(ParseErrorMessageIDs[exception.error], []) if has_expected: args = [exception.expected] else: args = [error] if has_token: file = exception.token.location.file line = exception.token.location.line offset = exception.token.location.offset args = args + [file, line, offset] ids = [ ["ParserError", "ParserErrorAt"], ["ParserErrorExpected", "ParserErrorExpectedAt"], ] id = ids[has_expected][has_token] return Message(id, args) # Formats a ParseErrorException and its contexts def format_full_error(exception): formatted = [format_exception(exception)] context = exception.context while context is not None: formatted.append(format_context(context)) context = context.parent return formatted