Newer
Older
NewLang / src / parse.py
# SPDX-License-Identifier: LGPL-2.1-only
# Copyright 2022 Jookia <contact@jookia.org>

from src import tokenize
import enum


# The type of syntax
class SyntaxType(enum.Enum):
    TOKEN = enum.auto()  # pragma: no mutate
    TEXT = enum.auto()  # pragma: no mutate


# Represents a syntax node
class Syntax:
    def __init__(self, value, location, type):
        self.value = value
        self.location = location
        self.type = type

    def __repr__(self):
        return "Syntax(value %s, location %s, type %s)" % (  # pragma: no mutate
            repr(self.value),
            repr(self.location),
            str(self.type),
        )

    def __eq__(self, other):
        return (
            self.type == other.type
            and self.value == other.value
            and self.location == other.location
        )


# Represents a stream of consumable syntax nodes
class SyntaxStream:
    def __init__(self, nodes):
        self.nodes = nodes

    def __repr__(self):
        return "SyntaxStream(%s)" % (self.nodes)  # pragma: no mutate

    def pop(self):
        if self.nodes:
            return self.nodes.pop(0)
        else:
            return None


# Converts tokens to syntax
def import_tokens(tokens):
    output = []
    for t in tokens:
        output.append(Syntax(t, t.location, SyntaxType.TOKEN))
    return output


# Removes whitespace syntax tokens
def strip_whitespace(syntax):
    output = []
    for s in syntax:
        if s.type != SyntaxType.TOKEN or s.value.type not in [
            tokenize.TokenType.SPACE,
            tokenize.TokenType.NEWLINE,
        ]:
            output.append(s)
    return output


# Parses a text syntax node
def parse_text(stream):
    buffer = ""
    # Parse the starting tokens
    s = stream.pop()
    # Error if there's not a valid StartText token
    if s is None:
        return None
    elif s.type != SyntaxType.TOKEN:
        return None
    elif s.value.type != tokenize.TokenType.KEYWORD:
        return None
    elif s.value.value != "StartText":
        return None
    location = s.location
    # Parse following tokens
    while True:
        s = stream.pop()
        # Error if there's no EndText
        if s is None:
            return None
        # Error if any of the text isn't a token
        elif s.type != SyntaxType.TOKEN:
            return None
        # Don't allow StartText in text
        elif s.value.value in ["StartText"]:
            return None
        # EndText found, end things
        elif s.value.type == tokenize.TokenType.KEYWORD and s.value.value == "EndText":
            break
        else:
            buffer += s.value.value
    value = buffer.strip("\n\t ")
    type = SyntaxType.TEXT
    return Syntax(value, location, type)


# Parses tokens
def parse(tokens):
    converted = import_tokens(tokens)
    stripped = strip_whitespace(converted)
    return stripped