NewLang/src/parse.py at a67bf63b510071cef71e14b8e1a21c092828c803

Fork: 0

LuminaSensum / NewLang

Find file

Newer

Older

NewLang / src / parse.py

Jookia on 14 Mar 2022 1 KB parse: Move Syntax to tokenize and replace Token

Raw Blame History

# SPDX-License-Identifier: LGPL-2.1-only
# Copyright 2022 Jookia <contact@jookia.org>

from src import tokenize


# Represents a stream of consumable syntax nodes
class SyntaxStream:
    def __init__(self, nodes):
        self.nodes = nodes

    def __repr__(self):
        return "SyntaxStream(%s)" % (self.nodes)  # pragma: no mutate

    def pop(self):
        if self.nodes:
            return self.nodes.pop(0)
        else:
            return None


# Removes whitespace syntax tokens
def strip_whitespace(syntax):
    output = []
    for s in syntax:
        if s.type != tokenize.SyntaxType.TOKEN or s.value not in ["\n", " "]:
            output.append(s)
    return output


# Parses a text syntax node
def parse_text(stream):
    buffer = ""
    # Parse the starting tokens
    s = stream.pop()
    # Error if there's not a valid StartText token
    if s is None:
        return None
    elif s.type != tokenize.SyntaxType.TOKEN:
        return None
    elif s.value != "StartText":
        return None
    location = s.location
    # Parse following tokens
    while True:
        s = stream.pop()
        # Error if there's no EndText
        if s is None:
            return None
        # Error if any of the text isn't a token
        elif s.type != tokenize.SyntaxType.TOKEN:
            return None
        # Don't allow StartText in text
        elif s.value in ["StartText"]:
            return None
        # EndText found, end things
        elif s.value == "EndText":
            break
        else:
            buffer += s.value
    value = buffer.strip("\n\t ")
    type = tokenize.SyntaxType.TEXT
    return tokenize.Syntax(value, location, type)


# Parses tokens
def parse(tokens):
    stripped = strip_whitespace(tokens)
    return stripped