NewLang/src/parse.py at 7b22b1fcdacd7081ac86e0d6b7abb85275993cc2

Fork: 0
LuminaSensum / NewLang
Find file
Newer
Older
NewLang / src / parse.py
Jookia on 30 Jun 2022 8 KB parse: Add newline to make printing ParseContexts easier
Raw Blame History
# SPDX-License-Identifier: LGPL-2.1-only
# Copyright 2022 Jookia <contact@jookia.org>

import enum
from src.ast_types import Bool, Reference, Statement, Text
from src.token import TokenStream


# Words that can't be used as references
# This should include keywords and literals
reserved_names = [
    "Done",
    "Set",
    "To",
    "EndSet",
    "If",
    "Then",
    "Else",
    "EndIf",
    "StartNote",
    "EndNote",
    "StartText",
    "EndText",
    "True",
    "False",
]


# Tasks that happen during parsing
class ParseTask(enum.Enum):
    TEST_TASK = enum.auto()  # pragma: no mutate
    PARSE_NOTE = enum.auto()  # pragma: no mutate
    CLEAR_NOTES = enum.auto()  # pragma: no mutate
    PARSE_TEXT = enum.auto()  # pragma: no mutate
    PARSE_BOOL = enum.auto()  # pragma: no mutate
    PARSE_REFERENCE = enum.auto()  # pragma: no mutate
    PARSE_VALUE = enum.auto()  # pragma: no mutate
    PARSE_STATEMENT = enum.auto()  # pragma: no mutate
    PARSE_SUBJECT = enum.auto()  # pragma: no mutate
    PARSE_VERB = enum.auto()  # pragma: no mutate
    PARSE_ARGUMENT = enum.auto()  # pragma: no mutate


# Context used for parse error exception
class ParseContext:
    def __init__(self, task, token, parent):
        self.task = task
        self.token = token
        self.parent = parent

    def __repr__(self):
        return (
            "ParseContext(task %s, token %s, parent\n  %s)"  # pragma: no mutate
            % (  # pragma: no mutate
                self.task,
                self.token,
                self.parent,
            )
        )

    def __eq__(self, other):
        return (
            self.task == other.task
            and self.token == other.token
            and self.parent == other.parent
        )


# Errors that can happen when parsing
class ParseError(enum.Enum):
    TEST_ERROR = enum.auto()  # pragma: no mutate
    NO_TOKEN = enum.auto()  # pragma: no mutate
    WRONG_TOKEN = enum.auto()  # pragma: no mutate
    FOUND_STARTTEXT = enum.auto()  # pragma: no mutate
    FOUND_STARTNOTE = enum.auto()  # pragma: no mutate
    NOT_BOOL = enum.auto()  # pragma: no mutate
    FOUND_ENDNOTE = enum.auto()  # pragma: no mutate
    RESERVED_NAME = enum.auto()  # pragma: no mutate
    FOUND_TERMINATOR = enum.auto()  # pragma: no mutate


# Exception thrown when a parse error is encountered
class ParseErrorException(BaseException):
    def __init__(self, error, token, expected, context):
        self.error = error
        self.token = token
        self.expected = expected
        self.context = context

    def __repr__(self):
        return (
            "ParseErrorException(error %s, token %s, expected %s, context %s)"  # pragma: no mutate
            % (  # pragma: no mutate
                self.error,
                self.token,
                self.expected,
                self.context,
            )
        )

    def __eq__(self, other):
        return (
            self.error == other.error
            and self.token == other.token
            and self.expected == other.expected
            and self.context == other.context
        )


# Reads a token, possibly of a certain value
def read_token(stream, value, context):
    t = stream.pop()
    if t is None:
        raise ParseErrorException(ParseError.NO_TOKEN, None, None, context)
    elif value is not None and t.value != value:
        raise ParseErrorException(ParseError.WRONG_TOKEN, t, value, context)
    return t


# The note skipper in a wrapper class for easy testing
class NoteSkipper:
    # Skip a note
    def skip_note(self, stream, parent_context):
        context = ParseContext(ParseTask.PARSE_NOTE, stream.peek(), parent_context)
        read_token(stream, "StartNote", context)
        while True:
            t = read_token(stream, None, context)
            # Don't allow StartNote in notes
            if t.value in ["StartNote"]:
                raise ParseErrorException(ParseError.FOUND_STARTNOTE, t, None, context)
            # EndNote found, end things
            elif t.value == "EndNote":
                break
        return None

    # Clear notes
    def clear_notes(self, stream, parent_context):
        context = ParseContext(ParseTask.CLEAR_NOTES, stream.peek(), parent_context)
        tokens = []
        token = stream.peek()
        while token is not None:
            # Found a note, skip it
            if token.value == "StartNote":
                self.skip_note(stream, context)
            # EndNote found outside note
            elif token.value == "EndNote":
                raise ParseErrorException(
                    ParseError.FOUND_ENDNOTE, token, None, context
                )
            # Add the token if it's not note related
            else:
                tokens.append(stream.pop())
            token = stream.peek()
        return tokens


# The recursive descent parser in a wrapper class for easy testing
class Parser:
    # Parses a text node
    def parse_text(self, stream, parent_context):
        context = ParseContext(ParseTask.PARSE_TEXT, stream.peek(), parent_context)
        buffer = ""
        t = read_token(stream, "StartText", context)
        # Parse following tokens
        while True:
            t = read_token(stream, None, context)
            # Don't allow StartText in text
            if t.value in ["StartText"]:
                raise ParseErrorException(ParseError.FOUND_STARTTEXT, t, None, context)
            # EndText found, end things
            elif t.value == "EndText":
                break
            else:
                buffer += t.value + " "
        value = buffer[:-1]  # Drop trailing space
        return Text(value)

    # Parses a boolean node
    def parse_bool(self, stream, parent_context):
        context = ParseContext(ParseTask.PARSE_BOOL, stream.peek(), parent_context)
        t = read_token(stream, None, context)
        if t.value == "True":
            return Bool(True)
        elif t.value == "False":
            return Bool(False)
        else:
            raise ParseErrorException(ParseError.NOT_BOOL, t, None, context)

    # Parses a reference node
    def parse_reference(self, stream, parent_context):
        context = ParseContext(ParseTask.PARSE_REFERENCE, stream.peek(), parent_context)
        t = read_token(stream, None, context)
        if t.value in reserved_names:
            raise ParseErrorException(ParseError.RESERVED_NAME, t, None, context)
        return Reference(t.value)

    # Parses a value
    def parse_value(self, stream, parent_context):
        context = ParseContext(ParseTask.PARSE_VALUE, stream.peek(), parent_context)
        t = stream.peek()
        if t is None:
            raise ParseErrorException(ParseError.NO_TOKEN, None, None, context)
        elif t.value in ["True", "False"]:
            return self.parse_bool(stream, context)
        elif t.value == "StartText":
            return self.parse_text(stream, context)
        elif t.value in reserved_names:
            raise ParseErrorException(ParseError.RESERVED_NAME, t, None, context)
        else:
            return self.parse_reference(stream, context)

    # Parses a statement until a specified terminator
    def parse_statement(self, stream, parent_context, terminator):
        context = ParseContext(ParseTask.PARSE_STATEMENT, stream.peek(), parent_context)
        peeked_subject = stream.peek()
        context_subject = ParseContext(ParseTask.PARSE_SUBJECT, peeked_subject, context)
        if peeked_subject is not None and peeked_subject.value == terminator:
            raise ParseErrorException(
                ParseError.FOUND_TERMINATOR, peeked_subject, None, context_subject
            )
        subject = self.parse_value(stream, context_subject)
        context_verb = ParseContext(ParseTask.PARSE_VERB, stream.peek(), context)
        verb = read_token(stream, None, context_verb)
        if verb.value == terminator:
            return Statement(subject, None, [])
        elif verb.value in reserved_names:
            raise ParseErrorException(
                ParseError.RESERVED_NAME, verb, None, context_verb
            )
        arguments = []
        # Parse following arguments
        while True:
            peeked_arg = stream.peek()
            context_arg = ParseContext(ParseTask.PARSE_ARGUMENT, peeked_arg, context)
            if peeked_arg is not None and peeked_arg.value == terminator:
                stream.pop()
                return Statement(subject, verb.value, arguments)
            arg = self.parse_value(stream, context_arg)
            arguments.append(arg)


# Parses tokens
def parse(tokens, context):
    stream = TokenStream(tokens)
    cleared = NoteSkipper().clear_notes(stream, context)
    return cleared