# SPDX-License-Identifier: LGPL-2.1-or-later # Copyright 2021 Jookia <contact@jookia.org> from src import log from src import ast_types class ParseLocation: def __init__(self, line, column, file): self.line = line self.column = column self.file = file def __repr__(self): return "ParseLocation(line %i, column %i, file '%s')" % ( self.line, self.column, self.file, ) class ParseContext: def __init__(self, parent, context, location): self.parent = parent self.context = context self.location = location def __repr__(self): return "ParseContext(parent %s, context '%s', location %s)" % ( self.parent, self.context, self.location, ) class ParseError(BaseException): def __init__(self, context, error): self.context = context self.error = error def __repr__(self): return "ParseError(context %s, error '%s')" % (self.context, self.error) class Token: def __init__(self, type, value, location): self.type = type self.value = value self.location = location def __repr__(self): return "Token(type %s, value '%s', location %s)" % ( self.type, self.value, self.location, ) class Word: def __init__(self, value, position, line, column): self.value = value self.position = position self.line = line self.column = column def __repr__(self): return "Word(Value '%s', position %i, line %i, column %i)" % ( self.value, self.position, self.line, self.column, ) def is_whitespace(symbol): return symbol == " " or symbol == "\t" or symbol == "\n" class Tokenizer: def __init__(self, input, filename): self.code = input self.pos = -1 self.pos_line = 1 self.pos_column = 0 self.filename = filename def next(self): if self.pos == len(self.code) - 1: log.log(log.LEXER, log.TRACE, "Reached end of file") return None else: self.pos += 1 symbol = self.code[self.pos] if symbol == "\n": self.pos_line += 1 self.pos_column = 0 else: self.pos_column += 1 pos_string = "line %i column %i" % (self.pos_line, self.pos_column) symbol_string = symbol if is_whitespace(symbol): symbol_string = repr(symbol) log.log( log.LEXER, log.TRACE, "Read character %s at %s" % (symbol_string, pos_string), ) return symbol def read_word(self): value = "" symbol = self.next() while is_whitespace(symbol): log.log(log.LEXER, log.TRACE, "Skipping whitespace") symbol = self.next() if not symbol: log.log(log.LEXER, log.TRACE, "No word to read") return None pos = self.pos line = self.pos_line column = self.pos_column while symbol and not is_whitespace(symbol): value += symbol symbol = self.next() word = Word(value, pos, line, column) log.log(log.LEXER, log.TRACE, "Read %s" % (word)) return word def skip_note(self, line, column): log.log(log.LEXER, log.TRACE, "Skipping words until EndNote") context = ParseContext( None, "reading note", ParseLocation(line, column, self.filename) ) word = self.read_word() while word and word.value != "EndNote": word = self.read_word() if not word: raise ParseError(context, "Hit end of file before EndNote") def read_text(self, line, column): log.log(log.LEXER, log.TRACE, "Reading characters until EndText") context = ParseContext( None, "reading text", ParseLocation(line, column, self.filename) ) start = self.pos word = self.read_word() while word and word.value != "EndText": word = self.read_word() if not word: raise ParseError(context, "Hit end of file before EndText") else: return self.code[start + 1 : word.position - 1] def skip_shebang(self): log.log(log.LEXER, log.TRACE, "Skipping shebang") next_line = self.code.find("\n") + 1 self.code = self.code[next_line:] self.pos_line = 2 def tokenize(self): if self.code[0:2] == "#!": self.skip_shebang() keywords = [ "NewLang", "Done", "Set", "To", "EndSet", "If", "Then", "Else", "EndIf", ] tokens = [] word = self.read_word() while word: token = word.value line = word.line column = word.column context = ParseContext( None, "reading word", ParseLocation(line, column, self.filename) ) if token == "BeginNote": self.skip_note(line, column) word = self.read_word() continue elif token == "EndNote": raise ParseError(context, "Found stray EndNote") elif token == "BeginText": type = "text" value = self.read_text(line, column) elif token == "EndText": raise ParseError(context, "Found stray EndText") elif token in ["True", "False"]: type = "bool" value = token == "True" elif token in keywords: type = "keyword" value = token else: type = "symbol" value = token tok = Token(type, value, ParseLocation(line, column, self.filename)) log.log(log.LEXER, log.DEBUG, "Appending %s" % (tok)) tokens.append(tok) word = self.read_word() log.log(log.LEXER, log.TRACE, "Done tokenizing, adding EOF") tokens.append( Token( "EOF", None, ParseLocation(self.pos_line, self.pos_column, self.filename), ) ) log.log(log.LEXER, log.DEBUG, "Tokens are %s" % (tokens)) return tokens class Parser: def __init__(self, tokens): self.tokens = tokens self.pos = 0 def next(self): token = self.tokens[self.pos] if self.pos < (len(self.tokens) - 1): self.pos += 1 log.log(log.PARSER, log.TRACE, "Read %s" % (token)) return token def peek(self): token = self.tokens[self.pos] log.log(log.PARSER, log.TRACE, "Peeked %s" % (token)) return token def eof(self): return self.tokens[self.pos].type == "EOF" def create_context(self, context, text): token = self.tokens[self.pos] return ParseContext(context, text, token.location) def parse_version(self, context): log.log(log.PARSER, log.TRACE, "Parsing version identifier...") context = self.create_context(context, "parsing version identifier") token = self.next() if token.type != "keyword" or token.value != "NewLang": raise ParseError( context, "Expected 'NewLang' keyword, got '%s'" % (token.value) ) token = self.next() version = token.value if version != "0": raise ParseError(context, "Unknown version '%s'" % (version)) log.log(log.PARSER, log.DEBUG, "Parsed version %s" % (version)) return version def parse_value(self, context, subject, type, value): log.log(log.PARSER, log.TRACE, "Parsing value...") if type == "symbol": ret = ast_types.Reference(value) elif type == "text": ret = ast_types.Text(value) elif type == "bool": ret = ast_types.Bool(value) else: raise ParseError(context, "Unexpected value type %s" % (type)) log.log(log.PARSER, log.TRACE, "Parsed value, AST is %s" % (ret)) return ret def parse_arguments(self, meta_context, terminator): log.log(log.PARSER, log.TRACE, "Parsing arguments until '%s'..." % (terminator)) context = self.create_context(meta_context, "parsing statement arguments") args = [] arg_num = 1 while True: log.log(log.PARSER, log.TRACE, "Parsing argument %i..." % (arg_num)) arg_context = self.create_context( context, "parsing argument %i" % (arg_num) ) end_context = self.create_context(context, "parsing terminator") token = self.next() arg_num += 1 if token.type == "keyword": if token.value == terminator: log.log( log.PARSER, log.TRACE, "Parsed arguments, AST is %s" % (args) ) return args else: raise ParseError( end_context, "Expected %s, got %s" % (terminator, token.value) ) else: arg = self.parse_value(arg_context, "argument", token.type, token.value) log.log(log.PARSER, log.TRACE, "Parsed argument %s" % (arg)) args.append(arg) def parse_statement(self, context, terminator, type): log.log( log.PARSER, log.TRACE, "Parsing %s statement until '%s'..." % (type, terminator), ) meta_context = self.create_context(context, "parsing %s statement" % (type)) log.log(log.PARSER, log.TRACE, "Parsing statement subject...") context = self.create_context(meta_context, "parsing subject") token = self.next() subject = self.parse_value(context, "subject", token.type, token.value) log.log(log.PARSER, log.TRACE, "Parsing statement verb...") context = self.create_context(meta_context, "parsing statement verb") end_context = self.create_context(context, "parsing terminator") token = self.next() if token.type == "keyword": if token.value == terminator: verb = None else: raise ParseError( end_context, "Expected %s, got %s" % (terminator, token.value) ) elif token.type == "symbol": verb = token.value else: raise ParseError(context, "Expected symbol, got %s" % (token.type)) log.log(log.PARSER, log.TRACE, "Parsing statement arguments...") if verb: arguments = self.parse_arguments(meta_context, terminator) else: arguments = [] statement = ast_types.Statement(subject, verb, arguments) log.log(log.PARSER, log.DEBUG, "Parsed statement, AST is %s" % (statement)) return statement def parse_set(self, context): log.log(log.PARSER, log.TRACE, "Parsing set directive...") meta_context = self.create_context(context, "parsing set directive") self.next() # Skip 'Set' log.log(log.PARSER, log.TRACE, "Parsing set subject...") context = self.create_context(meta_context, "parsing subject") token = self.next() if token.type != "symbol": raise ParseError(context, "Expected symbol, got %s" % (token.type)) subject = token.value log.log(log.PARSER, log.TRACE, "Parsing set separator...") context = self.create_context(meta_context, "parsing set separator") token = self.next() if token.type != "keyword" or token.value != "To": pretty_value = token.value if token.type != "keyword": pretty_value = "'%s'" % (pretty_value) raise ParseError(context, "Expected To, got %s" % (pretty_value)) log.log(log.PARSER, log.TRACE, "Parsing set value...") ast = self.parse_statement(meta_context, "EndSet", "set value") set = ast_types.Set(subject, ast) log.log(log.PARSER, log.DEBUG, "Parsed set, AST is %s" % (set)) return set def parse_if(self, context): log.log(log.PARSER, log.TRACE, "Parsing if directive...") context = self.create_context(context, "parsing if directive") self.next() # Skip 'If' test = self.parse_statement(context, "Then", "test condition") log.log(log.PARSER, log.TRACE, "Parsing if success statement...") success = self.parse_statement(context, "Else", "success") log.log(log.PARSER, log.TRACE, "Parsing if failure statement...") failure = self.parse_statement(context, "EndIf", "failure") conditional = ast_types.Conditional(test, success, failure) log.log(log.PARSER, log.DEBUG, "Parsed if, AST is %s" % (conditional)) return conditional def parse_directive(self, context): token = self.peek() if token.type != "keyword" and token.type != "symbol" and token.type != "bool": raise ParseError( context, "Expected keyword, symbol or bool, got %s" % (token.type) ) if token.type == "keyword": if token.value == "Set": return self.parse_set(context) elif token.value == "If": return self.parse_if(context) else: raise ParseError(context, "Unexpected keyword %s" % (token.value)) else: ast = self.parse_statement(context, "Done", "command") return ast def parse_file(self): log.log(log.PARSER, log.TRACE, "Parsing file...") ast = [] version = self.parse_version(None) while not self.eof(): log.log(log.PARSER, log.TRACE, "Parsing next directive in file...") ast.append(self.parse_directive(None)) log.log(log.PARSER, log.DEBUG, "Parsed file, AST is %s" % (ast)) return ast def parse_file(filename): try: code = open(filename, encoding="utf-8").read() except UnicodeDecodeError: print("Parse error: %s is not valid UTF-8" % (filename)) return None try: tokenizer = Tokenizer(code, filename) tokens = tokenizer.tokenize() parser = Parser(tokens) return parser.parse_file() except ParseError as e: print("Parse error: %s" % (e.error)) context = e.context while context: line = context.location.line column = context.location.column print("While %s at line %i column %i" % (context.context, line, column)) context = context.parent print("While parsing file %s" % (filename)) return None