Newer
Older
NewLang / src / parse.py
# SPDX-License-Identifier: LGPL-2.1-only
# Copyright 2022 Jookia <contact@jookia.org>

import enum
from src.i18n import Message
from src.ast_types import Bool, Conditional, Reference, Set, Statement, Text
from src.token import TokenStream


# Words that can't be used as references
# This should include keywords and literals
reserved_names = [
    "Done",
    "Set",
    "To",
    "EndSet",
    "If",
    "Then",
    "Else",
    "EndIf",
    "StartNote",
    "EndNote",
    "StartText",
    "EndText",
    "True",
    "False",
]


# Tasks that happen during parsing
class ParseTask(enum.Enum):
    TEST_TASK = enum.auto()  # pragma: no mutate
    PARSE_NOTE = enum.auto()  # pragma: no mutate
    CLEAR_NOTES = enum.auto()  # pragma: no mutate
    PARSE_TEXT = enum.auto()  # pragma: no mutate
    PARSE_BOOL = enum.auto()  # pragma: no mutate
    PARSE_REFERENCE = enum.auto()  # pragma: no mutate
    PARSE_VALUE = enum.auto()  # pragma: no mutate
    PARSE_STATEMENT = enum.auto()  # pragma: no mutate
    PARSE_SUBJECT = enum.auto()  # pragma: no mutate
    PARSE_VERB = enum.auto()  # pragma: no mutate
    PARSE_ARGUMENT = enum.auto()  # pragma: no mutate
    PARSE_SET = enum.auto()  # pragma: no mutate
    PARSE_CONDITIONAL = enum.auto()  # pragma: no mutate
    PARSE_TEST = enum.auto()  # pragma: no mutate
    PARSE_SUCCESS = enum.auto()  # pragma: no mutate
    PARSE_FAILURE = enum.auto()  # pragma: no mutate
    PARSE_DIRECTIVE = enum.auto()  # pragma: no mutate
    PARSE_FILE = enum.auto()  # pragma: no mutate


# Context used for parse error exception
class ParseContext:
    def __init__(self, task, token, parent):
        self.task = task
        self.token = token
        self.parent = parent

    def __repr__(self):
        return (
            "ParseContext(task %s, token %s, parent\n  %s)"  # pragma: no mutate
            % (  # pragma: no mutate
                self.task,
                self.token,
                self.parent,
            )
        )

    def __eq__(self, other):
        if other is None:
            return False
        return (
            self.task == other.task
            and self.token == other.token
            and self.parent == other.parent
        )


# Errors that can happen when parsing
class ParseError(enum.Enum):
    TEST_ERROR = enum.auto()  # pragma: no mutate
    NO_TOKEN = enum.auto()  # pragma: no mutate
    WRONG_TOKEN = enum.auto()  # pragma: no mutate
    FOUND_STARTTEXT = enum.auto()  # pragma: no mutate
    FOUND_STARTNOTE = enum.auto()  # pragma: no mutate
    NOT_BOOL = enum.auto()  # pragma: no mutate
    FOUND_ENDNOTE = enum.auto()  # pragma: no mutate
    RESERVED_NAME = enum.auto()  # pragma: no mutate
    FOUND_TERMINATOR = enum.auto()  # pragma: no mutate


# Message identifiers for ParseErrors
ParseErrorMessageIDs = {
    ParseError.TEST_ERROR: "ParseErrorTestError",
    ParseError.NO_TOKEN: "ParseErrorNoToken",
    ParseError.WRONG_TOKEN: "ParseErrorWrongToken",
    ParseError.FOUND_STARTTEXT: "ParseErrorFoundStartText",
    ParseError.FOUND_STARTNOTE: "ParseErrorFoundStartNote",
    ParseError.NOT_BOOL: "ParseErrorNotBool",
    ParseError.FOUND_ENDNOTE: "ParseErrorFoundEndNote",
    ParseError.RESERVED_NAME: "ParseErrorReservedName",
    ParseError.FOUND_TERMINATOR: "ParseErrorFoundTerminator",
}


# Exception thrown when a parse error is encountered
class ParseErrorException(BaseException):
    def __init__(self, error, token, expected, context):
        self.error = error
        self.token = token
        self.expected = expected
        self.context = context

    def __repr__(self):
        return (
            "ParseErrorException(error %s, token %s, expected %s, context %s)"  # pragma: no mutate
            % (  # pragma: no mutate
                self.error,
                self.token,
                self.expected,
                self.context,
            )
        )

    def __eq__(self, other):
        if other is None:
            return False
        return (
            self.error == other.error
            and self.token == other.token
            and self.expected == other.expected
            and self.context == other.context
        )


# Reads a token, possibly of a certain value
def read_token(stream, value, context):
    t = stream.pop()
    if t is None:
        raise ParseErrorException(ParseError.NO_TOKEN, None, None, context)
    elif value is not None and t.value != value:
        raise ParseErrorException(ParseError.WRONG_TOKEN, t, value, context)
    return t


# The note skipper in a wrapper class for easy testing
class NoteSkipper:
    # Skip a note
    def skip_note(self, stream, parent_context):
        context = ParseContext(ParseTask.PARSE_NOTE, stream.peek(), parent_context)
        read_token(stream, "StartNote", context)
        while True:
            t = read_token(stream, None, context)
            # Don't allow StartNote in notes
            if t.value in ["StartNote"]:
                raise ParseErrorException(ParseError.FOUND_STARTNOTE, t, None, context)
            # EndNote found, end things
            elif t.value == "EndNote":
                break
        return None

    # Clear notes
    def clear_notes(self, stream, parent_context):
        context = ParseContext(ParseTask.CLEAR_NOTES, stream.peek(), parent_context)
        tokens = []
        token = stream.peek()
        while token is not None:
            # Found a note, skip it
            if token.value == "StartNote":
                self.skip_note(stream, context)
            # EndNote found outside note
            elif token.value == "EndNote":
                raise ParseErrorException(
                    ParseError.FOUND_ENDNOTE, token, None, context
                )
            # Add the token if it's not note related
            else:
                tokens.append(stream.pop())
            token = stream.peek()
        return tokens


# The recursive descent parser in a wrapper class for easy testing
class Parser:
    # Parses a text node
    def parse_text(self, stream, parent_context):
        context = ParseContext(ParseTask.PARSE_TEXT, stream.peek(), parent_context)
        buffer = ""
        t = read_token(stream, "StartText", context)
        # Parse following tokens
        while True:
            t = read_token(stream, None, context)
            # Don't allow StartText in text
            if t.value in ["StartText"]:
                raise ParseErrorException(ParseError.FOUND_STARTTEXT, t, None, context)
            # EndText found, end things
            elif t.value == "EndText":
                break
            else:
                buffer += t.value + " "
        value = buffer[:-1]  # Drop trailing space
        return Text(value)

    # Parses a boolean node
    def parse_bool(self, stream, parent_context):
        context = ParseContext(ParseTask.PARSE_BOOL, stream.peek(), parent_context)
        t = read_token(stream, None, context)
        if t.value == "True":
            return Bool(True)
        elif t.value == "False":
            return Bool(False)
        else:
            raise ParseErrorException(ParseError.NOT_BOOL, t, None, context)

    # Parses a reference node
    def parse_reference(self, stream, parent_context):
        context = ParseContext(ParseTask.PARSE_REFERENCE, stream.peek(), parent_context)
        t = read_token(stream, None, context)
        if t.value in reserved_names:
            raise ParseErrorException(ParseError.RESERVED_NAME, t, None, context)
        return Reference(t.value)

    # Parses a value
    def parse_value(self, stream, parent_context):
        context = ParseContext(ParseTask.PARSE_VALUE, stream.peek(), parent_context)
        t = stream.peek()
        if t is None:
            raise ParseErrorException(ParseError.NO_TOKEN, None, None, context)
        elif t.value in ["True", "False"]:
            return self.parse_bool(stream, context)
        elif t.value == "StartText":
            return self.parse_text(stream, context)
        elif t.value in reserved_names:
            raise ParseErrorException(ParseError.RESERVED_NAME, t, None, context)
        else:
            return self.parse_reference(stream, context)

    # Parses a statement until a specified terminator
    def parse_statement(self, stream, parent_context, terminator):
        context = ParseContext(ParseTask.PARSE_STATEMENT, stream.peek(), parent_context)
        peeked_subject = stream.peek()
        context_subject = ParseContext(ParseTask.PARSE_SUBJECT, peeked_subject, context)
        if peeked_subject is not None and peeked_subject.value == terminator:
            raise ParseErrorException(
                ParseError.FOUND_TERMINATOR, peeked_subject, None, context_subject
            )
        subject = self.parse_value(stream, context_subject)
        context_verb = ParseContext(ParseTask.PARSE_VERB, stream.peek(), context)
        verb = read_token(stream, None, context_verb)
        if verb.value == terminator:
            return Statement(subject, None, [])
        elif verb.value in reserved_names:
            raise ParseErrorException(
                ParseError.RESERVED_NAME, verb, None, context_verb
            )
        arguments = []
        # Parse following arguments
        while True:
            peeked_arg = stream.peek()
            context_arg = ParseContext(ParseTask.PARSE_ARGUMENT, peeked_arg, context)
            if peeked_arg is not None and peeked_arg.value == terminator:
                stream.pop()
                return Statement(subject, verb.value, arguments)
            arg = self.parse_value(stream, context_arg)  # pragma: no mutate
            arguments.append(arg)

    # Parses a set node
    def parse_set(self, stream, parent_context):
        context = ParseContext(ParseTask.PARSE_SET, stream.peek(), parent_context)
        read_token(stream, "Set", context)
        subcontext = ParseContext(ParseTask.PARSE_SUBJECT, stream.peek(), context)
        subject = read_token(stream, None, subcontext)
        if subject.value in reserved_names:
            raise ParseErrorException(
                ParseError.RESERVED_NAME, subject, None, subcontext
            )
        read_token(stream, "To", context)
        statement = self.parse_statement(stream, context, "EndSet")
        return Set(subject.value, statement)

    # Parses a conditional node
    def parse_conditional(self, stream, parent_context):
        context = ParseContext(
            ParseTask.PARSE_CONDITIONAL, stream.peek(), parent_context
        )
        read_token(stream, "If", context)
        test_context = ParseContext(ParseTask.PARSE_TEST, stream.peek(), context)
        test = self.parse_statement(stream, test_context, "Then")
        success_context = ParseContext(ParseTask.PARSE_SUCCESS, stream.peek(), context)
        success = self.parse_statement(stream, success_context, "Else")
        failure_context = ParseContext(ParseTask.PARSE_FAILURE, stream.peek(), context)
        failure = self.parse_statement(stream, failure_context, "EndIf")
        return Conditional(test, success, failure)

    # Parses a directive
    def parse_directive(self, stream, parent_context):
        context = ParseContext(ParseTask.PARSE_DIRECTIVE, stream.peek(), parent_context)
        t = stream.peek()
        if t is None:
            raise ParseErrorException(ParseError.NO_TOKEN, None, None, context)
        elif t.value == "Set":
            return self.parse_set(stream, context)
        elif t.value == "If":
            return self.parse_conditional(stream, context)
        else:
            return self.parse_statement(stream, context, "Done")

    # Parses a file
    def parse_file(self, stream, parent_context):
        context = ParseContext(ParseTask.PARSE_FILE, stream.peek(), parent_context)
        directives = []
        next = stream.peek()
        while next is not None:
            dir = self.parse_directive(stream, context)  # pragma: no mutate
            directives.append(dir)
            next = stream.peek()
        return directives


# Parses tokens
def parse(tokens, context):
    stream = TokenStream(tokens)
    cleared = NoteSkipper().clear_notes(stream, context)
    stream2 = TokenStream(cleared)
    parsed = Parser().parse_file(stream2, context)
    return parsed


# Formats a ParseContext
def format_context(context):
    task = context.task
    if context.token:
        file = context.token.location.file
        line = context.token.location.line
        offset = context.token.location.offset
        return Message("ParseContextAt", [task, file, line, offset])
    else:
        return Message("ParseContext", [task])


# Formats a ParseErrorException
def format_exception(exception):
    has_expected = exception.expected is not None
    has_token = exception.token is not None
    error = Message(ParseErrorMessageIDs[exception.error], [])
    if has_expected:
        args = [exception.expected]
    else:
        args = [error]
    if has_token:
        file = exception.token.location.file
        line = exception.token.location.line
        offset = exception.token.location.offset
        args = args + [file, line, offset]
    ids = [
        ["ParserError", "ParserErrorAt"],
        ["ParserErrorExpected", "ParserErrorExpectedAt"],
    ]
    id = ids[has_expected][has_token]
    return Message(id, args)


# Formats a ParseErrorException and its contexts
def format_full_error(exception):
    formatted = [format_exception(exception)]
    context = exception.context
    while context is not None:
        formatted.append(format_context(context))
        context = context.parent
    return formatted