# SPDX-License-Identifier: MIT # Copyright 2021 Jookia <contact@jookia.org> import log class Token: def __init__(self, type, value): self.type = type self.value = value def __repr__(self): return "Token(type %s, value '%s')" % (self.type, self.value) def tokenize(code): tokens = [] token = "" text = "" mode = "normal" # normal/note/text for symbol in code: if symbol == '\n': symbol_print = "new line" else: symbol_print = "symbol '%s'" % (symbol) log.log(log.LEXER, log.TRACE, "Read %s" % (symbol_print)) if symbol == " " or symbol == "\t" or symbol == "\n": log.log(log.LEXER, log.TRACE, "Read token '%s'" % (token)) if token == "": pass elif token == "BeginNote": log.log(log.LEXER, log.TRACE, "Switching to note mode") mode = "note" elif token == "EndNote": log.log(log.LEXER, log.TRACE, "Ending note mode") mode = "normal" elif token == "BeginText": log.log(log.LEXER, log.TRACE, "Switching to text mode") mode = "text" elif token == "EndText": log.log(log.LEXER, log.TRACE, "Ending text mode") content = text[1:-8] log.log(log.LEXER, log.DEBUG, "Appending text '%s'" % (content)) tokens.append(Token("text", content)) mode = "normal" text = "" elif token != "": if mode == "normal": keywords = ["NewLang", "Done", "Set", "To", "EndSet", "If", "Then", "Else", "EndIf"] if token in keywords: type = "keyword" token = token.lower() else: type = "symbol" tok = Token(type, token) log.log(log.LEXER, log.DEBUG, "Appending token %s" % (tok)) tokens.append(tok) else: log.log(log.LEXER, log.TRACE, "Skipping token '%s'" % (token)) token = "" else: token += symbol if mode == "text": text += symbol log.log(log.LEXER, log.TRACE, "Done lexing, added EOF") tokens.append(Token("EOF", None)) return tokens class Reference: def __init__(self, value): self.value = value def __repr__(self): return "Reference('%s')" % (self.value) class Text: def __init__(self, value): self.value = value def __repr__(self): return "Text('%s')" % (self.value) class Parser: def __init__(self, tokens): self.tokens = tokens self.pos = 0 def next(self): token = self.tokens[self.pos] if self.pos < (len(self.tokens) - 1): self.pos += 1 log.log(log.PARSER, log.TRACE, "Read %s" % (token)) return token def peek(self): token = self.tokens[self.pos] log.log(log.PARSER, log.TRACE, "Peeked %s" % (token)) return token def parse_version(self): token = self.next() if token.type != "keyword" or token.value != "newlang": log.log(log.PARSER, log.NORMAL, "Expected NewLang keyword") return None token = self.next() log.log(log.PARSER, log.DEBUG, "Parsed language version %s" % (token.value)) return token.value def parse_value(self, type, value): if type == "symbol": return Reference(value) elif type == "text": return Text(value) else: log.log(log.PARSER, log.NORMAL, "Unexpected type %s" % (type)) return None def parse_arguments(self, terminator): args = [] while True: token = self.next() if token.type == "keyword": if token.value == terminator: return args else: log.log(log.PARSER, log.NORMAL, "Unexpected keyword %s" % (token.value)) return None else: arg = self.parse_value(token.type, token.value) if not arg: log.log(log.PARSER, log.NORMAL, "While parsing argument") return None args.append(arg) def parse_statement(self, terminator): token = self.next() subject = self.parse_value(token.type, token.value) if not subject: log.log(log.PARSER, log.NORMAL, "While parsing subject") return None token = self.next() if token.type == "keyword": if token.value == terminator: verb = None else: log.log(log.PARSER, log.NORMAL, "Unexpected keyword %s" % (token.value)) return None elif token.type == "symbol": verb = token.value else: verb = token.value if verb: arguments = self.parse_arguments(terminator) if arguments is None: log.log(log.PARSER, log.NORMAL, "While parsing arguments") return None else: arguments = [] log.log(log.PARSER, log.DEBUG, "Parsed statement: subject %s verb %s args %s" % (subject, verb, arguments)) return ('statement', subject, verb, arguments) def parse_set(self): token = self.next() if token.type != "symbol": log.log(log.PARSER, log.NORMAL, "Expect symbol, got %s" % (token.type)) return None subject = token.value token = self.next() if token.type != "keyword" or token.value != "to": log.log(log.PARSER, log.NORMAL, "Expected To, got %s %s" % (token.type, token.value)) return None log.log(log.PARSER, log.TRACE, "Parsing set value...") ast = self.parse_statement("endset") if not ast: log.log(log.PARSER, log.NORMAL, "While parsing statement") return None log.log(log.PARSER, log.DEBUG, "Parsed set for %s" % (subject)) return ('set', subject, ast) def parse_if(self): log.log(log.PARSER, log.TRACE, "Parsing if test condition...") test = self.parse_statement("then") if not test: log.log(log.PARSER, log.NORMAL, "While parsing test condition") return None log.log(log.PARSER, log.TRACE, "Parsing if success statement...") success = self.parse_statement("else") if not success: log.log(log.PARSER, log.NORMAL, "While parsing success statement") return None log.log(log.PARSER, log.TRACE, "Parsing if failure statement...") failure = self.parse_statement("endif") if not failure: log.log(log.PARSER, log.NORMAL, "While parsing failure statement") return None log.log(log.PARSER, log.DEBUG, "Parsed conditional") return ('if', test, success, failure) def parse_directive(self): token = self.peek() if token.type != "keyword" and token.type != "symbol": log.log(log.PARSER, log.NORMAL, "Expected keyword or symbol here, got %s" % (token.type)) return None if token.type == "keyword": self.next() if token.value == "set": ast = self.parse_set() if not ast: log.log(log.PARSER, log.NORMAL, "While parsing set directive") return None return ast elif token.value == "if": ast = self.parse_if() if not ast: log.log(log.PARSER, log.NORMAL, "While parsing set directive") return None return ast else: log.log(log.PARSER, log.NORMAL, "Unexpected keyword %s" % (token.value)) return None else: ast = self.parse_statement("done") if not ast: log.log(log.PARSER, log.NORMAL, "While parsing statement") return None return ast def parse_file(self): log.log(log.PARSER, log.TRACE, "Parsing file...") ast = [] version = self.parse_version() if not version: log.log(log.PARSER, log.NORMAL, "While parsing version identifier at start of file") return None if version != "0": log.log(log.PARSER, log.NORMAL, "Invalid version identifier %s" % (version)) return None while self.peek().type != "EOF": directive = self.parse_directive() if directive == None: log.log(log.PARSER, log.NORMAL, "While parsing directive in file") return None else: ast.append(directive) log.log(log.PARSER, log.DEBUG, "Parsed file") return ast def parse_file(code): tokens = tokenize(code) parser = Parser(tokens) return parser.parse_file()