# SPDX-License-Identifier: MIT # Copyright 2021 Jookia <contact@jookia.org> import log class Token: def __init__(self, type, value): self.type = type self.value = value def __repr__(self): return "Token(type %s, value '%s')" % (self.type, self.value) def is_whitespace(symbol): return symbol == " " or symbol == "\t" or symbol == "\n" class Tokenizer: def __init__(self, input): self.code = input self.pos = 0 def next(self): if self.pos >= len(self.code): log.log(log.LEXER, log.LEXER, "Reached end of file") return None else: symbol = self.code[self.pos] symbol_print = "character '%s'" % (symbol) if symbol == '\n': symbol_print = "new line" log.log(log.LEXER, log.LEXER, "Read %s" % (symbol_print)) self.pos += 1 return symbol def read_token(self): token = "" symbol = self.next() if not symbol: log.log(log.LEXER, log.LEXER, "No token to read") return None while not is_whitespace(symbol): token += symbol symbol = self.next() log.log(log.LEXER, log.LEXER, "Read token '%s'" % (token)) return token def tokenize(self): tokens = [] text = "" mode = "normal" # normal/note/text token = self.read_token() while token != None: if token == "": pass elif token == "BeginNote": log.log(log.LEXER, log.TRACE, "Switching to note mode") mode = "note" elif token == "EndNote": log.log(log.LEXER, log.TRACE, "Ending note mode") mode = "normal" elif token == "BeginText": log.log(log.LEXER, log.TRACE, "Switching to text mode") mode = "text" elif token == "EndText": log.log(log.LEXER, log.TRACE, "Ending text mode") content = text[10:-1] log.log(log.LEXER, log.DEBUG, "Appending text '%s'" % (content)) tokens.append(Token("text", content)) mode = "normal" text = "" elif token != "": if mode == "normal": keywords = ["NewLang", "Done", "Set", "To", "EndSet", "If", "Then", "Else", "EndIf"] if token in keywords: type = "keyword" token = token.lower() else: type = "symbol" tok = Token(type, token) log.log(log.LEXER, log.DEBUG, "Appending %s" % (tok)) tokens.append(tok) else: log.log(log.LEXER, log.TRACE, "Skipping token '%s'" % (token)) if mode == "text": text += token + " " token = self.read_token() log.log(log.LEXER, log.TRACE, "Done lexing, adding EOF") tokens.append(Token("EOF", None)) return tokens class Reference: def __init__(self, value): self.value = value def __repr__(self): return "Reference('%s')" % (self.value) class Text: def __init__(self, value): self.value = value def __repr__(self): return "Text('%s')" % (self.value) class Statement: def __init__(self, subject, verb, arguments): self.subject = subject self.verb = verb self.arguments = arguments def __repr__(self): return "Statement(subject %s, verb '%s', arguments %s)" % (self.subject, self.verb, self.arguments) class Set: def __init__(self, subject, statement): self.subject = subject self.statement = statement def __repr__(self): return "Set(subject %s, statement %s)" % (self.subject, self.statement) class Conditional: def __init__(self, test, success, failure): self.test = test self.success = success self.failure = failure def __repr__(self): return "Conditional(test %s, success %s, failure %s)" % (self.test, self.success, self.failure) class ParseContext: def __init__(self, parent, context): self.parent = parent self.context = context def __repr__(self): return "ParseContext(parent %s, context '%s')" % (self.parent, self.context) class ParseError(BaseException): def __init__(self, context, error): self.context = context self.error = error def __repr__(self): return "ParseError(context %s, error '%s')" % (self.context, self.error) class Parser: def __init__(self, tokens): self.tokens = tokens self.pos = 0 def next(self): token = self.tokens[self.pos] if self.pos < (len(self.tokens) - 1): self.pos += 1 log.log(log.PARSER, log.TRACE, "Read %s" % (token)) return token def peek(self): token = self.tokens[self.pos] log.log(log.PARSER, log.TRACE, "Peeked %s" % (token)) return token def parse_version(self, context): log.log(log.PARSER, log.TRACE, "Parsing version identifier...") context = ParseContext(context, "parsing version identifier") token = self.next() if token.type != "keyword" or token.value != "newlang": raise ParseError(context, "Expected NewLang keyword") token = self.next() version = token.value if version != "0": raise ParseError(context, "Invalid version %s" % (version)) log.log(log.PARSER, log.DEBUG, "Parsed version %s" % (version)) return version def parse_value(self, context, subject, type, value): log.log(log.PARSER, log.TRACE, "Parsing value...") context = ParseContext(context, "parsing %s value" % (subject)) if type == "symbol": ret = Reference(value) elif type == "text": ret = Text(value) else: raise ParseError(context, "Unexpected value type '%s'" % (type)) log.log(log.PARSER, log.TRACE, "Parsed value, AST is %s" % (ret)) return ret def parse_arguments(self, context, terminator): log.log(log.PARSER, log.TRACE, "Parsing arguments until '%s'..." % (terminator)) context = ParseContext(context, "parsing arguments until '%s'" % (terminator)) args = [] while True: log.log(log.PARSER, log.TRACE, "Parsing next argument...") token = self.next() if token.type == "keyword": if token.value == terminator: log.log(log.PARSER, log.TRACE, "Parsed arguments, AST is %s" % (args)) return args else: raise ParseError(context, "Unexpected keyword '%s' in arguments" % (token.value)) else: arg = self.parse_value(context, "argument", token.type, token.value) log.log(log.PARSER, log.TRACE, "Parsed argument %s" % (arg)) args.append(arg) def parse_statement(self, context, terminator, type): log.log(log.PARSER, log.TRACE, "Parsing %s statement until '%s'..." % (type, terminator)) context = ParseContext(context, "parsing %s statement" % (type)) log.log(log.PARSER, log.TRACE, "Parsing statement subject...") token = self.next() subject = self.parse_value(context, "subject", token.type, token.value) log.log(log.PARSER, log.TRACE, "Parsing statement verb...") token = self.next() if token.type == "keyword": if token.value == terminator: verb = None else: raise ParseError(context, "Unexpected keyword '%s' in statement" % (token.value)) elif token.type == "symbol": verb = token.value else: verb = token.value log.log(log.PARSER, log.TRACE, "Parsing statement arguments...") if verb: arguments = self.parse_arguments(context, terminator) else: arguments = [] statement = Statement(subject, verb, arguments) log.log(log.PARSER, log.DEBUG, "Parsed statement, AST is %s" % (statement)) return statement def parse_set(self, context): log.log(log.PARSER, log.TRACE, "Parsing set subject...") meta_context = ParseContext(context, "parsing set directive") context = ParseContext(meta_context, "parsing subject") token = self.next() if token.type != "symbol": raise ParseError(context, "Expected symbol, got %s" % (token.type)) subject = token.value token = self.next() if token.type != "keyword" or token.value != "to": raise ParseError(meta_context, "Expected To, got %s" % (token)) log.log(log.PARSER, log.TRACE, "Parsing set value...") ast = self.parse_statement(meta_context, "endset", "set value") set = Set(subject, ast) log.log(log.PARSER, log.DEBUG, "Parsed set, AST is %s" % (set)) return set def parse_if(self, context): log.log(log.PARSER, log.TRACE, "Parsing if test condition...") context = ParseContext(context, "parsing if directive") test = self.parse_statement(context, "then", "test condition") log.log(log.PARSER, log.TRACE, "Parsing if success statement...") success = self.parse_statement(context, "else", "success") log.log(log.PARSER, log.TRACE, "Parsing if failure statement...") failure = self.parse_statement(context, "endif", "failure") conditional = Conditional(test, success, failure) log.log(log.PARSER, log.DEBUG, "Parsed if, AST is %s" % (conditional)) return conditional def parse_directive(self, context): token = self.peek() if token.type != "keyword" and token.type != "symbol": raise ParseError(context, "Expected keyword or symbol, got %s" % (token.type)) if token.type == "keyword": self.next() if token.value == "set": return self.parse_set(context) elif token.value == "if": return self.parse_if(context) else: raise ParseError(context, "Unexpected keyword %s" % (token.value)) else: ast = self.parse_statement(context, "done", "command") return ast def parse_file(self): log.log(log.PARSER, log.TRACE, "Parsing file...") context = ParseContext(None, "parsing file") ast = [] version = self.parse_version(context) while self.peek().type != "EOF": log.log(log.PARSER, log.TRACE, "Parsing next directive in file...") ast.append(self.parse_directive(context)) log.log(log.PARSER, log.DEBUG, "Parsed file, AST is %s" % (ast)) return ast def parse_file(code): tokenizer = Tokenizer(code) tokens = tokenizer.tokenize() try: parser = Parser(tokens) return parser.parse_file() except ParseError as e: print("Parse error: %s" % (e.error)) context = e.context while context: print("While %s " % (context.context)) context = context.parent return None