# SPDX-License-Identifier: LGPL-2.1-only # Copyright 2022 Jookia <contact@jookia.org> from src import tokenize import enum # The type of syntax class SyntaxType(enum.Enum): TOKEN = enum.auto() # pragma: no mutate TEXT = enum.auto() # pragma: no mutate # Represents a syntax node class Syntax: def __init__(self, value, location, type): self.value = value self.location = location self.type = type def __repr__(self): return "Syntax(value %s, location %s, type %s)" % ( # pragma: no mutate repr(self.value), repr(self.location), str(self.type), ) def __eq__(self, other): return ( self.value == other.value and self.location == other.location and self.type == other.type ) # Represents a stream of consumable syntax nodes class SyntaxStream: def __init__(self, nodes): self.nodes = nodes def __repr__(self): return "SyntaxStream(%s)" % (self.nodes) # pragma: no mutate def pop(self): if self.nodes: return self.nodes.pop(0) else: return None # Converts tokens to syntax def import_tokens(tokens): output = [] for t in tokens: output.append(Syntax(t, t.location, SyntaxType.TOKEN)) return output # Removes whitespace syntax tokens def strip_whitespace(syntax): output = [] for s in syntax: if s.type != SyntaxType.TOKEN or s.value.type not in [ tokenize.TokenType.SPACE, tokenize.TokenType.NEWLINE, ]: output.append(s) return output # Parses a text syntax node def parse_text(stream): buffer = "" # Parse the starting tokens s = stream.pop() if s is None: return None elif s.type != SyntaxType.TOKEN: return None elif s.value.type != tokenize.TokenType.KEYWORD: return None elif s.value.value != "StartText": return None location = s.location # Parse following tokens while True: s = stream.pop() if s is None: return None elif s.type != SyntaxType.TOKEN: return None # Don't allow StartText in text elif s.value.value in ["StartText"]: return None # EndText found, end things elif s.value.type == tokenize.TokenType.KEYWORD and s.value.value == "EndText": break else: buffer += s.value.value value = buffer.strip("\n\t ") type = SyntaxType.TEXT return Syntax(value, location, type) # Parses tokens def parse(tokens): converted = import_tokens(tokens) stripped = strip_whitespace(converted) return stripped