Newer
Older
NewLang / src / parse.py
# SPDX-License-Identifier: LGPL-2.1-only
# Copyright 2022 Jookia <contact@jookia.org>

from src.i18n import Message
from src.ast_types import Bool, Conditional, Reference, Set, Statement, Text
from src.token import TokenStream


# Words that can't be used as references
# This should include keywords and literals
reserved_names = [
    "Done",
    "Set",
    "To",
    "EndSet",
    "If",
    "Then",
    "Else",
    "EndIf",
    "StartNote",
    "EndNote",
    "StartText",
    "EndText",
    "True",
    "False",
]


# Tasks that happen during parsing
class ParseTask:
    TEST_TASK = 1  # pragma: no mutate
    PARSE_NOTE = 2  # pragma: no mutate
    CLEAR_NOTES = 3  # pragma: no mutate
    PARSE_TEXT = 4  # pragma: no mutate
    PARSE_BOOL = 5  # pragma: no mutate
    PARSE_REFERENCE = 6  # pragma: no mutate
    PARSE_VALUE = 7  # pragma: no mutate
    PARSE_STATEMENT = 8  # pragma: no mutate
    PARSE_SUBJECT = 9  # pragma: no mutate
    PARSE_VERB = 10  # pragma: no mutate
    PARSE_ARGUMENT = 11  # pragma: no mutate
    PARSE_SET = 12  # pragma: no mutate
    PARSE_CONDITIONAL = 13  # pragma: no mutate
    PARSE_TEST = 14  # pragma: no mutate
    PARSE_SUCCESS = 15  # pragma: no mutate
    PARSE_FAILURE = 16  # pragma: no mutate
    PARSE_DIRECTIVE = 17  # pragma: no mutate
    PARSE_FILE = 18  # pragma: no mutate
    MAX = 19  # pragma: no mutate

    # Returns a list of all tasks
    def list():
        return list(range(1, ParseTask.MAX))  # pragma: no mutate


# Message identifiers for ParseTasks
ParseTaskMessageIDs = {
    ParseTask.TEST_TASK: "ParseTaskTestTask",
    ParseTask.PARSE_NOTE: "ParseTaskNote",
    ParseTask.CLEAR_NOTES: "ParseTaskClearNotes",
    ParseTask.PARSE_TEXT: "ParseTaskText",
    ParseTask.PARSE_BOOL: "ParseTaskBool",
    ParseTask.PARSE_REFERENCE: "ParseTaskReference",
    ParseTask.PARSE_VALUE: "ParseTaskValue",
    ParseTask.PARSE_STATEMENT: "ParseTaskStatement",
    ParseTask.PARSE_SUBJECT: "ParseTaskSubject",
    ParseTask.PARSE_VERB: "ParseTaskVerb",
    ParseTask.PARSE_ARGUMENT: "ParseTaskArgument",
    ParseTask.PARSE_SET: "ParseTaskSet",
    ParseTask.PARSE_CONDITIONAL: "ParseTaskConditional",
    ParseTask.PARSE_TEST: "ParseTaskTest",
    ParseTask.PARSE_SUCCESS: "ParseTaskSuccess",
    ParseTask.PARSE_FAILURE: "ParseTaskFailure",
    ParseTask.PARSE_DIRECTIVE: "ParseTaskDirective",
    ParseTask.PARSE_FILE: "ParseTaskFile",
}


# Context used for parse error exception
class ParseContext:
    def __init__(self, task, token, parent):
        self.task = task
        self.token = token
        self.parent = parent

    def __repr__(self):
        return (
            "ParseContext(task %s, token %s, parent\n  %s)"  # pragma: no mutate
            % (  # pragma: no mutate
                self.task,
                self.token,
                self.parent,
            )
        )

    def __eq__(self, other):
        if other is None:
            return False
        return (
            self.task == other.task
            and self.token == other.token
            and self.parent == other.parent
        )


# Errors that can happen when parsing
class ParseError:
    TEST_ERROR = 1  # pragma: no mutate
    NO_TOKEN = 2  # pragma: no mutate
    WRONG_TOKEN = 3  # pragma: no mutate
    FOUND_STARTTEXT = 4  # pragma: no mutate
    FOUND_STARTNOTE = 5  # pragma: no mutate
    NOT_BOOL = 6  # pragma: no mutate
    FOUND_ENDNOTE = 7  # pragma: no mutate
    RESERVED_NAME = 8  # pragma: no mutate
    FOUND_TERMINATOR = 9  # pragma: no mutate
    MAX = 10  # pragma: no mutate

    # Returns a list of all errors
    def list():
        return list(range(1, ParseError.MAX))  # pragma: no mutate


# Message identifiers for ParseErrors
ParseErrorMessageIDs = {
    ParseError.TEST_ERROR: "ParseErrorTestError",
    ParseError.NO_TOKEN: "ParseErrorNoToken",
    ParseError.WRONG_TOKEN: "ParseErrorWrongToken",
    ParseError.FOUND_STARTTEXT: "ParseErrorFoundStartText",
    ParseError.FOUND_STARTNOTE: "ParseErrorFoundStartNote",
    ParseError.NOT_BOOL: "ParseErrorNotBool",
    ParseError.FOUND_ENDNOTE: "ParseErrorFoundEndNote",
    ParseError.RESERVED_NAME: "ParseErrorReservedName",
    ParseError.FOUND_TERMINATOR: "ParseErrorFoundTerminator",
}


# Exception thrown when a parse error is encountered
class ParseErrorException(BaseException):
    def __init__(self, error, token, expected, context):
        self.error = error
        self.token = token
        self.expected = expected
        self.context = context

    def __repr__(self):
        return (
            "ParseErrorException(error %s, token %s, expected %s, context %s)"  # pragma: no mutate
            % (  # pragma: no mutate
                self.error,
                self.token,
                self.expected,
                self.context,
            )
        )

    def __eq__(self, other):
        if other is None:
            return False
        return (
            self.error == other.error
            and self.token == other.token
            and self.expected == other.expected
            and self.context == other.context
        )


# Reads a token, possibly of a certain value
def read_token(stream, value, context):
    t = stream.pop()
    if t is None:
        raise ParseErrorException(ParseError.NO_TOKEN, None, None, context)
    elif value is not None and t.value != value:
        raise ParseErrorException(ParseError.WRONG_TOKEN, t, value, context)
    return t


# Skip a note
def skip_note(stream, parent_context):
    context = ParseContext(ParseTask.PARSE_NOTE, stream.peek(), parent_context)
    read_token(stream, "StartNote", context)
    while True:
        t = read_token(stream, None, context)
        # Don't allow StartNote in notes
        if t.value in ["StartNote"]:
            raise ParseErrorException(ParseError.FOUND_STARTNOTE, t, None, context)
        # EndNote found, end things
        elif t.value == "EndNote":
            break
    return None


# Clear notes
def clear_notes(stream, parent_context):
    context = ParseContext(ParseTask.CLEAR_NOTES, stream.peek(), parent_context)
    tokens = []
    token = stream.peek()
    while token is not None:
        # Found a note, skip it
        if token.value == "StartNote":
            skip_note(stream, context)
        # EndNote found outside note
        elif token.value == "EndNote":
            raise ParseErrorException(ParseError.FOUND_ENDNOTE, token, None, context)
        # Add the token if it's not note related
        else:
            tokens.append(stream.pop())
        token = stream.peek()
    return tokens


# The recursive descent parser in a wrapper class for easy testing
class Parser:
    # Parses a text node
    def parse_text(self, stream, parent_context):
        context = ParseContext(ParseTask.PARSE_TEXT, stream.peek(), parent_context)
        buffer = ""
        t = read_token(stream, "StartText", context)
        # Parse following tokens
        while True:
            t = read_token(stream, None, context)
            # Don't allow StartText in text
            if t.value in ["StartText"]:
                raise ParseErrorException(ParseError.FOUND_STARTTEXT, t, None, context)
            # EndText found, end things
            elif t.value == "EndText":
                break
            else:
                buffer += t.value + " "
        value = buffer[:-1]  # Drop trailing space
        return Text(value)

    # Parses a boolean node
    def parse_bool(self, stream, parent_context):
        context = ParseContext(ParseTask.PARSE_BOOL, stream.peek(), parent_context)
        t = read_token(stream, None, context)
        if t.value == "True":
            return Bool(True)
        elif t.value == "False":
            return Bool(False)
        else:
            raise ParseErrorException(ParseError.NOT_BOOL, t, None, context)

    # Parses a reference node
    def parse_reference(self, stream, parent_context):
        context = ParseContext(ParseTask.PARSE_REFERENCE, stream.peek(), parent_context)
        t = read_token(stream, None, context)
        if t.value in reserved_names:
            raise ParseErrorException(ParseError.RESERVED_NAME, t, None, context)
        return Reference(t.value)

    # Parses a value
    def parse_value(self, stream, parent_context):
        context = ParseContext(ParseTask.PARSE_VALUE, stream.peek(), parent_context)
        t = stream.peek()
        if t is None:
            raise ParseErrorException(ParseError.NO_TOKEN, None, None, context)
        elif t.value in ["True", "False"]:
            return self.parse_bool(stream, context)
        elif t.value == "StartText":
            return self.parse_text(stream, context)
        elif t.value in reserved_names:
            raise ParseErrorException(ParseError.RESERVED_NAME, t, None, context)
        else:
            return self.parse_reference(stream, context)

    # Parses a statement until a specified terminator
    def parse_statement(self, stream, parent_context, terminator):
        context = ParseContext(ParseTask.PARSE_STATEMENT, stream.peek(), parent_context)
        peeked_subject = stream.peek()
        context_subject = ParseContext(ParseTask.PARSE_SUBJECT, peeked_subject, context)
        if peeked_subject is not None and peeked_subject.value == terminator:
            raise ParseErrorException(
                ParseError.FOUND_TERMINATOR, peeked_subject, None, context_subject
            )
        subject = self.parse_value(stream, context_subject)
        context_verb = ParseContext(ParseTask.PARSE_VERB, stream.peek(), context)
        verb = read_token(stream, None, context_verb)
        if verb.value == terminator:
            return Statement(subject, None, [])
        elif verb.value in reserved_names:
            raise ParseErrorException(
                ParseError.RESERVED_NAME, verb, None, context_verb
            )
        arguments = []
        # Parse following arguments
        while True:
            peeked_arg = stream.peek()
            context_arg = ParseContext(ParseTask.PARSE_ARGUMENT, peeked_arg, context)
            if peeked_arg is not None and peeked_arg.value == terminator:
                stream.pop()
                return Statement(subject, verb.value, arguments)
            arg = self.parse_value(stream, context_arg)  # pragma: no mutate
            arguments.append(arg)

    # Parses a set node
    def parse_set(self, stream, parent_context):
        context = ParseContext(ParseTask.PARSE_SET, stream.peek(), parent_context)
        read_token(stream, "Set", context)
        subcontext = ParseContext(ParseTask.PARSE_SUBJECT, stream.peek(), context)
        subject = read_token(stream, None, subcontext)
        if subject.value in reserved_names:
            raise ParseErrorException(
                ParseError.RESERVED_NAME, subject, None, subcontext
            )
        read_token(stream, "To", context)
        statement = self.parse_statement(stream, context, "EndSet")
        return Set(subject.value, statement)

    # Parses a conditional node
    def parse_conditional(self, stream, parent_context):
        context = ParseContext(
            ParseTask.PARSE_CONDITIONAL, stream.peek(), parent_context
        )
        read_token(stream, "If", context)
        test_context = ParseContext(ParseTask.PARSE_TEST, stream.peek(), context)
        test = self.parse_statement(stream, test_context, "Then")
        success_context = ParseContext(ParseTask.PARSE_SUCCESS, stream.peek(), context)
        success = self.parse_statement(stream, success_context, "Else")
        failure_context = ParseContext(ParseTask.PARSE_FAILURE, stream.peek(), context)
        failure = self.parse_statement(stream, failure_context, "EndIf")
        return Conditional(test, success, failure)

    # Parses a directive
    def parse_directive(self, stream, parent_context):
        context = ParseContext(ParseTask.PARSE_DIRECTIVE, stream.peek(), parent_context)
        t = stream.peek()
        if t is None:
            raise ParseErrorException(ParseError.NO_TOKEN, None, None, context)
        elif t.value == "Set":
            return self.parse_set(stream, context)
        elif t.value == "If":
            return self.parse_conditional(stream, context)
        else:
            return self.parse_statement(stream, context, "Done")


# Parses a file
def parse_file(stream, parent_context):
    context = ParseContext(ParseTask.PARSE_FILE, stream.peek(), parent_context)
    directives = []
    next = stream.peek()
    while next is not None:
        dir = Parser().parse_directive(stream, context)  # pragma: no mutate
        directives.append(dir)
        next = stream.peek()
    return directives


# Parses tokens
def parse(tokens, context):
    stream = TokenStream(tokens)
    cleared = clear_notes(stream, context)
    stream2 = TokenStream(cleared)
    parsed = parse_file(stream2, context)
    return parsed


# Formats a ParseContext
def format_context(context):
    task = Message(ParseTaskMessageIDs[context.task], [])
    if context.token:
        file = context.token.location.file
        line = context.token.location.line
        offset = context.token.location.offset
        return Message("ParseContextAt", [task, file, line, offset])
    else:
        return Message("ParseContext", [task])


# Formats a ParseErrorException
def format_exception(exception):
    has_expected = exception.expected is not None
    has_token = exception.token is not None
    error = Message(ParseErrorMessageIDs[exception.error], [])
    if has_expected:
        args = [exception.expected]
    else:
        args = [error]
    if has_token:
        file = exception.token.location.file
        line = exception.token.location.line
        offset = exception.token.location.offset
        args = args + [file, line, offset]
    ids = [
        ["ParserError", "ParserErrorAt"],
        ["ParserErrorExpected", "ParserErrorExpectedAt"],
    ]
    id = ids[has_expected][has_token]
    return Message(id, args)


# Formats a ParseErrorException and its contexts
def format_full_error(exception):
    formatted = [format_exception(exception)]
    context = exception.context
    while context is not None:
        formatted.append(format_context(context))
        context = context.parent
    return formatted