diff --git a/ast_types.py b/ast_types.py deleted file mode 100644 index 4398021..0000000 --- a/ast_types.py +++ /dev/null @@ -1,49 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-or-later -# Copyright 2021 Jookia - -class Reference: - def __init__(self, value): - self.value = value - - def __repr__(self): - return "Reference('%s')" % (self.value) - -class Bool: - def __init__(self, value): - self.value = value - - def __repr__(self): - return "Bool(%s)" % (self.value) - -class Text: - def __init__(self, value): - self.value = value - - def __repr__(self): - return "Text('%s')" % (self.value) - -class Statement: - def __init__(self, subject, verb, arguments): - self.subject = subject - self.verb = verb - self.arguments = arguments - - def __repr__(self): - return "Statement(subject %s, verb '%s', arguments %s)" % (self.subject, self.verb, self.arguments) - -class Set: - def __init__(self, subject, statement): - self.subject = subject - self.statement = statement - - def __repr__(self): - return "Set(subject %s, statement %s)" % (self.subject, self.statement) - -class Conditional: - def __init__(self, test, success, failure): - self.test = test - self.success = success - self.failure = failure - - def __repr__(self): - return "Conditional(test %s, success %s, failure %s)" % (self.test, self.success, self.failure) diff --git a/code.txt b/code.txt deleted file mode 100755 index 4a954dd..0000000 --- a/code.txt +++ /dev/null @@ -1,21 +0,0 @@ -#!./main.py -NewLang 0 - -BeginNote -Copyright 2021 Jookia -SPDX-License-Identifier: LGPL-2.1-or-later -This is a comment and should be ignored by the parser -EndNote - -System Print BeginText Hello, there! EndText Done -System Print BeginText Please enter your name: EndText Done -Set Name To System Read EndSet BeginNote Reads an entire line EndNote -Set Prefix To BeginText Hi there, EndText EndSet -Set Greeting To Prefix Append Name EndSet - -If Name Equals BeginText Jookia EndText -Then System Print BeginText Hi creator! EndText -Else System Print Greeting -EndIf - -System Exit Done diff --git a/examples/code.txt b/examples/code.txt new file mode 100755 index 0000000..dd2ae41 --- /dev/null +++ b/examples/code.txt @@ -0,0 +1,21 @@ +#!../src/main.py +NewLang 0 + +BeginNote +Copyright 2021 Jookia +SPDX-License-Identifier: LGPL-2.1-or-later +This is a comment and should be ignored by the parser +EndNote + +System Print BeginText Hello, there! EndText Done +System Print BeginText Please enter your name: EndText Done +Set Name To System Read EndSet BeginNote Reads an entire line EndNote +Set Prefix To BeginText Hi there, EndText EndSet +Set Greeting To Prefix Append Name EndSet + +If Name Equals BeginText Jookia EndText +Then System Print BeginText Hi creator! EndText +Else System Print Greeting +EndIf + +System Exit Done diff --git a/interp.py b/interp.py deleted file mode 100644 index 4b45c89..0000000 --- a/interp.py +++ /dev/null @@ -1,137 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-or-later -# Copyright 2021 Jookia - -import ast_types -import sys - -class Bool: - def __init__(self, value): - self.value = value - - def __repr__(self): - return "Bool(%s)" % (self.value) - - def verb_ToText(self, args): - return Text(self._value) - -class Text: - def __init__(self, value): - self.value = value - - def __repr__(self): - return "Text('%s')" % (self.value) - - def verb_Append(self, args): - args_count = len(args) - if args_count != 1: - raise InterpreterError("Invalid argument count %i, expected 1" % (args_count)) - appendix = args[0] - if appendix.__class__ != Text: - raise InterpreterError("Invalid argument type %s, expected Text" % (appendix.__class__.__name__)) - return Text(self.value + " " + appendix.value) - - def verb_Equals(self, args): - args_count = len(args) - if args_count != 1: - raise InterpreterError("Invalid argument count %i, expected 1" % (args_count)) - compare = args[0] - if compare.__class__ != Text: - raise InterpreterError("Invalid argument type %s, expected Text" % (appendix.__class__.__name__)) - return Bool(self.value == compare.value) - -class Module_System: - def verb_Print(self, args): - args_count = len(args) - if args_count != 1: - raise InterpreterError("Invalid argument count %i, expected 1" % (args_count)) - line = args[0] - if line.__class__ != Text: - raise InterpreterError("Invalid argument type %s, expected Text" % (line.__class__.__name__)) - print(line.value) - - def verb_Read(self, args): - try: - return Text(input()) - except KeyboardInterrupt: - return Text("") - except EOFError: - return Text("") - - def verb_Exit(self, args): - sys.exit(0) - -class InterpreterError(BaseException): - def __init__(self, error): - self.error = error - - def __repr__(self): - return "InterpreterError(error '%s')" % (self.error) - -class Interpreter: - def __init__(self, env): - self.env = env - - def resolve_value(self, value): - if value.__class__ == ast_types.Reference: - if value.value in self.env: - return self.env[value.value] - else: - raise InterpreterError("Unknown environment value %s" % (value.value)) - elif value.__class__ == ast_types.Text: - return Text(value.value) - elif value.__class__ == ast_types.Bool: - return Bool(value.value) - else: - raise InterpreterError("Unknown value type %s" % (value.__class__.__name__)) - - def run_statement(self, ast): - subject = self.resolve_value(ast.subject) - if ast.verb == None: - return subject - args = [] - for arg in ast.arguments: - args.append(self.resolve_value(arg)) - verb = getattr(subject, "verb_" + ast.verb, None) - if verb: - return verb(args) - else: - raise InterpreterError("Unknown verb %s for subject %s" % (ast.verb, subject.__class__.__name__)) - - def run_set(self, ast): - self.env[ast.subject] = self.run_statement(ast.statement) - return self.env[ast.subject] - - def run_conditional(self, ast): - test = self.run_statement(ast.test) - if test.__class__ != Bool: - raise InterpreterError("Test condition didn't return a boolean") - if test.value: - ret = self.run_statement(ast.success) - else: - ret = self.run_statement(ast.failure) - return ret - - def run_command(self, ast): - if ast.__class__ == ast_types.Statement: - return self.run_statement(ast) - elif ast.__class__ == ast_types.Set: - return self.run_set(ast) - elif ast.__class__ == ast_types.Conditional: - return self.run_conditional(ast) - else: - raise InterpreterError("Unknown command type %s" % (ast.__class__.__name__)) - - def run(self, ast): - while True: - for command in ast: - ret = self.run_command(command) - -def run_ast(ast): - env = { - "System": Module_System(), - } - try: - return Interpreter(env).run(ast) - except InterpreterError as e: - print("Interpreter error: %s" % (e)) - return None diff --git a/log.py b/log.py deleted file mode 100644 index 64896ee..0000000 --- a/log.py +++ /dev/null @@ -1,47 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-or-later -# Copyright 2021 Jookia - -import os - -LEXER = 0 -PARSER = 1 - -NORMAL = 0 # messages always shown -DEBUG = 1 # high-level information about what's happening -TRACE = 2 # trace output - -log_level = 0 - -def log(module, level, text): - if level > log_level: - return - - if module == LEXER: - module_name = "LEXER" - elif module == PARSER: - module_name = "PARSER" - else: - module_name = "UNKNOWN" - - if level == NORMAL: - level_name = "NORMAL" - elif level == DEBUG: - level_name = "DEBUG" - elif level == TRACE: - level_name = "TRACE" - else: - level_name = "UNKNOWN" - - print("%s %s: %s" % (level_name, module_name, text)) - -def set_default_log_level(): - level = os.getenv("NEWLANG_LOG_LEVEL") - global log_level - if not level or level == "NORMAL": - log_level = 0 - elif level == "DEBUG": - log_level = 1 - elif level == "TRACE": - log_level = 2 - else: - print("ERROR: Unknown log level %s" % (level)) diff --git a/main.py b/main.py deleted file mode 100755 index be678ee..0000000 --- a/main.py +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: LGPL-2.1-or-later -# Copyright 2021 Jookia - -import log -import parse -import interp - -def main(args): - if len(args) != 2: - print("Usage: main.py FILENAME") - return 1 - log.set_default_log_level() - ast = parse.parse_file(args[1]) - if not ast: - return 1 - interp.run_ast(ast) - return 0 - -if __name__ == "__main__": - import sys - sys.exit(main(sys.argv)) diff --git a/parse.py b/parse.py deleted file mode 100644 index 1865515..0000000 --- a/parse.py +++ /dev/null @@ -1,331 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-or-later -# Copyright 2021 Jookia - -import log -import ast_types - -class ParseLocation: - def __init__(self, line, column, file): - self.line = line - self.column = column - self.file = file - - def __repr__(self): - return "ParseLocation(line %i, column %i, file '%s')" % (self.line, self.column, self.file) - -class ParseContext: - def __init__(self, parent, context, location): - self.parent = parent - self.context = context - self.location = location - - def __repr__(self): - return "ParseContext(parent %s, context '%s', location %s)" % (self.parent, self.context, self.location) - -class ParseError(BaseException): - def __init__(self, context, error): - self.context = context - self.error = error - - def __repr__(self): - return "ParseError(context %s, error '%s')" % (self.context, self.error) - -class Token: - def __init__(self, type, value, location): - self.type = type - self.value = value - self.location = location - - def __repr__(self): - return "Token(type %s, value '%s', location %s)" % (self.type, self.value, self.location) - -def is_whitespace(symbol): - return symbol == " " or symbol == "\t" or symbol == "\n" - -class Tokenizer: - def __init__(self, input, filename): - self.code = input - self.pos = 0 - self.pos_line = 1 - self.pos_column = 1 - self.filename = filename - - def next(self): - if self.pos >= len(self.code): - log.log(log.LEXER, log.TRACE, "Reached end of file") - return None - else: - symbol = self.code[self.pos] - pos_string = "line %i column %i" % (self.pos_line, self.pos_column) - if symbol == '\n': - log.log(log.LEXER, log.TRACE, "Read new line at %s" % (pos_string)) - self.pos_line += 1 - self.pos_column = 1 - else: - log.log(log.LEXER, log.TRACE, "Read character %s at %s" % (symbol, pos_string)) - self.pos_column += 1 - self.pos += 1 - return symbol - - def read_token(self): - token = "" - symbol = self.next() - while is_whitespace(symbol): - log.log(log.LEXER, log.TRACE, "Skipping whitespace") - symbol = self.next() - if not symbol: - log.log(log.LEXER, log.TRACE, "No token to read") - return (None, self.pos_line, self.pos_column) - line = self.pos_line - column = self.pos_column - 1 # pos_column is one ahead - while not is_whitespace(symbol): - token += symbol - symbol = self.next() - log.log(log.LEXER, log.TRACE, "Read token '%s' at line %i column %i" % (token, line, column)) - return (token, line, column) - - def skip_note(self, line, column): - log.log(log.LEXER, log.TRACE, "Skipping tokens until EndNote") - context = ParseContext(None, "reading note", ParseLocation(line, column, self.filename)) - (token, _, _) = self.read_token() - while token and token != "EndNote": - (token, _, _) = self.read_token() - if not token: - raise ParseError(context, "Hit end of file before EndNote") - - def read_text(self, line, column): - log.log(log.LEXER, log.TRACE, "Reading characters until EndText") - context = ParseContext(None, "reading text", ParseLocation(line, column, self.filename)) - start = self.pos - (token, _, _) = self.read_token() - while token and token != "EndText": - (token, _, _) = self.read_token() - if not token: - raise ParseError(context, "Hit end of file before EndText") - else: - return self.code[start:self.pos - len(" EndText") - 1] - - def skip_shebang(self): - log.log(log.LEXER, log.TRACE, "Skipping shebang") - next_line = self.code.find('\n') + 1 - self.code = self.code[next_line:] - self.pos_line = 2 - - def tokenize(self): - if self.code[0:2] == '#!': - self.skip_shebang() - keywords = ["NewLang", "Done", "Set", "To", "EndSet", - "If", "Then", "Else", "EndIf"] - tokens = [] - (token, line, column) = self.read_token() - while token: - context = ParseContext(None, "reading token", ParseLocation(line, column, self.filename)) - if token == "BeginNote": - self.skip_note(line, column) - (token, line, column) = self.read_token() - continue - elif token == "EndNote": - raise ParseError(context, "Found stray EndNote") - elif token == "BeginText": - type = "text" - value = self.read_text(line, column) - elif token == "EndText": - raise ParseError(context, "Found stray EndText") - elif token in ["True", "False"]: - type = "bool" - value = (token == "True") - elif token in keywords: - type = "keyword" - value = token - else: - type = "symbol" - value = token - tok = Token(type, value, ParseLocation(line, column, self.filename)) - log.log(log.LEXER, log.DEBUG, "Appending %s" % (tok)) - tokens.append(tok) - (token, line, column) = self.read_token() - log.log(log.LEXER, log.TRACE, "Done tokenizing, adding EOF") - tokens.append(Token("EOF", None, ParseLocation(self.pos_line, self.pos_column, self.filename))) - log.log(log.LEXER, log.DEBUG, "Tokens are %s" % (tokens)) - return tokens - -class Parser: - def __init__(self, tokens): - self.tokens = tokens - self.pos = 0 - - def next(self): - token = self.tokens[self.pos] - if self.pos < (len(self.tokens) - 1): - self.pos += 1 - log.log(log.PARSER, log.TRACE, "Read %s" % (token)) - return token - - def peek(self): - token = self.tokens[self.pos] - log.log(log.PARSER, log.TRACE, "Peeked %s" % (token)) - return token - - def eof(self): - return self.tokens[self.pos].type == "EOF" - - def create_context(self, context, text): - token = self.tokens[self.pos] - return ParseContext(context, text, token.location) - - def parse_version(self, context): - log.log(log.PARSER, log.TRACE, "Parsing version identifier...") - context = self.create_context(context, "parsing version identifier") - token = self.next() - if token.type != "keyword" or token.value != "NewLang": - raise ParseError(context, "Expected 'NewLang' keyword, got '%s'" % (token.value)) - token = self.next() - version = token.value - if version != "0": - raise ParseError(context, "Unknown version '%s'" % (version)) - log.log(log.PARSER, log.DEBUG, "Parsed version %s" % (version)) - return version - - def parse_value(self, context, subject, type, value): - log.log(log.PARSER, log.TRACE, "Parsing value...") - if type == "symbol": - ret = ast_types.Reference(value) - elif type == "text": - ret = ast_types.Text(value) - elif type == "bool": - ret = ast_types.Bool(value) - else: - raise ParseError(context, "Unexpected value type %s" % (type)) - log.log(log.PARSER, log.TRACE, "Parsed value, AST is %s" % (ret)) - return ret - - def parse_arguments(self, meta_context, terminator): - log.log(log.PARSER, log.TRACE, "Parsing arguments until '%s'..." % (terminator)) - context = self.create_context(meta_context, "parsing statement arguments") - args = [] - arg_num = 1 - while True: - log.log(log.PARSER, log.TRACE, "Parsing argument %i..." % (arg_num)) - arg_context = self.create_context(context, "parsing argument %i" % (arg_num)) - end_context = self.create_context(context, "parsing terminator") - token = self.next() - arg_num += 1 - if token.type == "keyword": - if token.value == terminator: - log.log(log.PARSER, log.TRACE, "Parsed arguments, AST is %s" % (args)) - return args - else: - raise ParseError(end_context, "Expected %s, got %s" % (terminator, token.value)) - else: - arg = self.parse_value(arg_context, "argument", token.type, token.value) - log.log(log.PARSER, log.TRACE, "Parsed argument %s" % (arg)) - args.append(arg) - - def parse_statement(self, context, terminator, type): - log.log(log.PARSER, log.TRACE, "Parsing %s statement until '%s'..." % (type, terminator)) - meta_context = self.create_context(context, "parsing %s statement" % (type)) - log.log(log.PARSER, log.TRACE, "Parsing statement subject...") - context = self.create_context(meta_context, "parsing subject") - token = self.next() - subject = self.parse_value(context, "subject", token.type, token.value) - log.log(log.PARSER, log.TRACE, "Parsing statement verb...") - context = self.create_context(meta_context, "parsing statement verb") - end_context = self.create_context(context, "parsing terminator") - token = self.next() - if token.type == "keyword": - if token.value == terminator: - verb = None - else: - raise ParseError(end_context, "Expected %s, got %s" % (terminator, token.value)) - elif token.type == "symbol": - verb = token.value - else: - raise ParseError(context, "Expected symbol, got %s" % (token.type)) - log.log(log.PARSER, log.TRACE, "Parsing statement arguments...") - if verb: - arguments = self.parse_arguments(meta_context, terminator) - else: - arguments = [] - statement = ast_types.Statement(subject, verb, arguments) - log.log(log.PARSER, log.DEBUG, "Parsed statement, AST is %s" % (statement)) - return statement - - def parse_set(self, context): - log.log(log.PARSER, log.TRACE, "Parsing set directive...") - meta_context = self.create_context(context, "parsing set directive") - self.next() # Skip 'Set' - log.log(log.PARSER, log.TRACE, "Parsing set subject...") - context = self.create_context(meta_context, "parsing subject") - token = self.next() - if token.type != "symbol": - raise ParseError(context, "Expected symbol, got %s" % (token.type)) - subject = token.value - log.log(log.PARSER, log.TRACE, "Parsing set separator...") - context = self.create_context(meta_context, "parsing set separator") - token = self.next() - if token.type != "keyword" or token.value != "To": - pretty_value = token.value - if token.type != "keyword": - pretty_value = "'%s'" % (pretty_value) - raise ParseError(context, "Expected To, got %s" % (pretty_value)) - log.log(log.PARSER, log.TRACE, "Parsing set value...") - ast = self.parse_statement(meta_context, "EndSet", "set value") - set = ast_types.Set(subject, ast) - log.log(log.PARSER, log.DEBUG, "Parsed set, AST is %s" % (set)) - return set - - def parse_if(self, context): - log.log(log.PARSER, log.TRACE, "Parsing if directive...") - context = self.create_context(context, "parsing if directive") - self.next() # Skip 'If' - test = self.parse_statement(context, "Then", "test condition") - log.log(log.PARSER, log.TRACE, "Parsing if success statement...") - success = self.parse_statement(context, "Else", "success") - log.log(log.PARSER, log.TRACE, "Parsing if failure statement...") - failure = self.parse_statement(context, "EndIf", "failure") - conditional = ast_types.Conditional(test, success, failure) - log.log(log.PARSER, log.DEBUG, "Parsed if, AST is %s" % (conditional)) - return conditional - - def parse_directive(self, context): - token = self.peek() - if token.type != "keyword" and token.type != "symbol" and token.type != "bool": - raise ParseError(context, "Expected keyword, symbol or bool, got %s" % (token.type)) - if token.type == "keyword": - if token.value == "Set": - return self.parse_set(context) - elif token.value == "If": - return self.parse_if(context) - else: - raise ParseError(context, "Unexpected keyword %s" % (token.value)) - else: - ast = self.parse_statement(context, "Done", "command") - return ast - - def parse_file(self): - log.log(log.PARSER, log.TRACE, "Parsing file...") - ast = [] - version = self.parse_version(None) - while not self.eof(): - log.log(log.PARSER, log.TRACE, "Parsing next directive in file...") - ast.append(self.parse_directive(None)) - log.log(log.PARSER, log.DEBUG, "Parsed file, AST is %s" % (ast)) - return ast - -def parse_file(filename): - code = open(filename).read() - try: - tokenizer = Tokenizer(code, filename) - tokens = tokenizer.tokenize() - parser = Parser(tokens) - return parser.parse_file() - except ParseError as e: - print("Parse error: %s" % (e.error)) - context = e.context - while context: - line = context.location.line - column = context.location.column - print("While %s at line %i column %i" % (context.context, line, column)) - context = context.parent - print("While parsing file %s" % (filename)) - return None diff --git a/src/ast_types.py b/src/ast_types.py new file mode 100644 index 0000000..4398021 --- /dev/null +++ b/src/ast_types.py @@ -0,0 +1,49 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later +# Copyright 2021 Jookia + +class Reference: + def __init__(self, value): + self.value = value + + def __repr__(self): + return "Reference('%s')" % (self.value) + +class Bool: + def __init__(self, value): + self.value = value + + def __repr__(self): + return "Bool(%s)" % (self.value) + +class Text: + def __init__(self, value): + self.value = value + + def __repr__(self): + return "Text('%s')" % (self.value) + +class Statement: + def __init__(self, subject, verb, arguments): + self.subject = subject + self.verb = verb + self.arguments = arguments + + def __repr__(self): + return "Statement(subject %s, verb '%s', arguments %s)" % (self.subject, self.verb, self.arguments) + +class Set: + def __init__(self, subject, statement): + self.subject = subject + self.statement = statement + + def __repr__(self): + return "Set(subject %s, statement %s)" % (self.subject, self.statement) + +class Conditional: + def __init__(self, test, success, failure): + self.test = test + self.success = success + self.failure = failure + + def __repr__(self): + return "Conditional(test %s, success %s, failure %s)" % (self.test, self.success, self.failure) diff --git a/src/interp.py b/src/interp.py new file mode 100644 index 0000000..4b45c89 --- /dev/null +++ b/src/interp.py @@ -0,0 +1,137 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later +# Copyright 2021 Jookia + +import ast_types +import sys + +class Bool: + def __init__(self, value): + self.value = value + + def __repr__(self): + return "Bool(%s)" % (self.value) + + def verb_ToText(self, args): + return Text(self._value) + +class Text: + def __init__(self, value): + self.value = value + + def __repr__(self): + return "Text('%s')" % (self.value) + + def verb_Append(self, args): + args_count = len(args) + if args_count != 1: + raise InterpreterError("Invalid argument count %i, expected 1" % (args_count)) + appendix = args[0] + if appendix.__class__ != Text: + raise InterpreterError("Invalid argument type %s, expected Text" % (appendix.__class__.__name__)) + return Text(self.value + " " + appendix.value) + + def verb_Equals(self, args): + args_count = len(args) + if args_count != 1: + raise InterpreterError("Invalid argument count %i, expected 1" % (args_count)) + compare = args[0] + if compare.__class__ != Text: + raise InterpreterError("Invalid argument type %s, expected Text" % (appendix.__class__.__name__)) + return Bool(self.value == compare.value) + +class Module_System: + def verb_Print(self, args): + args_count = len(args) + if args_count != 1: + raise InterpreterError("Invalid argument count %i, expected 1" % (args_count)) + line = args[0] + if line.__class__ != Text: + raise InterpreterError("Invalid argument type %s, expected Text" % (line.__class__.__name__)) + print(line.value) + + def verb_Read(self, args): + try: + return Text(input()) + except KeyboardInterrupt: + return Text("") + except EOFError: + return Text("") + + def verb_Exit(self, args): + sys.exit(0) + +class InterpreterError(BaseException): + def __init__(self, error): + self.error = error + + def __repr__(self): + return "InterpreterError(error '%s')" % (self.error) + +class Interpreter: + def __init__(self, env): + self.env = env + + def resolve_value(self, value): + if value.__class__ == ast_types.Reference: + if value.value in self.env: + return self.env[value.value] + else: + raise InterpreterError("Unknown environment value %s" % (value.value)) + elif value.__class__ == ast_types.Text: + return Text(value.value) + elif value.__class__ == ast_types.Bool: + return Bool(value.value) + else: + raise InterpreterError("Unknown value type %s" % (value.__class__.__name__)) + + def run_statement(self, ast): + subject = self.resolve_value(ast.subject) + if ast.verb == None: + return subject + args = [] + for arg in ast.arguments: + args.append(self.resolve_value(arg)) + verb = getattr(subject, "verb_" + ast.verb, None) + if verb: + return verb(args) + else: + raise InterpreterError("Unknown verb %s for subject %s" % (ast.verb, subject.__class__.__name__)) + + def run_set(self, ast): + self.env[ast.subject] = self.run_statement(ast.statement) + return self.env[ast.subject] + + def run_conditional(self, ast): + test = self.run_statement(ast.test) + if test.__class__ != Bool: + raise InterpreterError("Test condition didn't return a boolean") + if test.value: + ret = self.run_statement(ast.success) + else: + ret = self.run_statement(ast.failure) + return ret + + def run_command(self, ast): + if ast.__class__ == ast_types.Statement: + return self.run_statement(ast) + elif ast.__class__ == ast_types.Set: + return self.run_set(ast) + elif ast.__class__ == ast_types.Conditional: + return self.run_conditional(ast) + else: + raise InterpreterError("Unknown command type %s" % (ast.__class__.__name__)) + + def run(self, ast): + while True: + for command in ast: + ret = self.run_command(command) + +def run_ast(ast): + env = { + "System": Module_System(), + } + try: + return Interpreter(env).run(ast) + except InterpreterError as e: + print("Interpreter error: %s" % (e)) + return None diff --git a/src/log.py b/src/log.py new file mode 100644 index 0000000..64896ee --- /dev/null +++ b/src/log.py @@ -0,0 +1,47 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later +# Copyright 2021 Jookia + +import os + +LEXER = 0 +PARSER = 1 + +NORMAL = 0 # messages always shown +DEBUG = 1 # high-level information about what's happening +TRACE = 2 # trace output + +log_level = 0 + +def log(module, level, text): + if level > log_level: + return + + if module == LEXER: + module_name = "LEXER" + elif module == PARSER: + module_name = "PARSER" + else: + module_name = "UNKNOWN" + + if level == NORMAL: + level_name = "NORMAL" + elif level == DEBUG: + level_name = "DEBUG" + elif level == TRACE: + level_name = "TRACE" + else: + level_name = "UNKNOWN" + + print("%s %s: %s" % (level_name, module_name, text)) + +def set_default_log_level(): + level = os.getenv("NEWLANG_LOG_LEVEL") + global log_level + if not level or level == "NORMAL": + log_level = 0 + elif level == "DEBUG": + log_level = 1 + elif level == "TRACE": + log_level = 2 + else: + print("ERROR: Unknown log level %s" % (level)) diff --git a/src/main.py b/src/main.py new file mode 100755 index 0000000..be678ee --- /dev/null +++ b/src/main.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: LGPL-2.1-or-later +# Copyright 2021 Jookia + +import log +import parse +import interp + +def main(args): + if len(args) != 2: + print("Usage: main.py FILENAME") + return 1 + log.set_default_log_level() + ast = parse.parse_file(args[1]) + if not ast: + return 1 + interp.run_ast(ast) + return 0 + +if __name__ == "__main__": + import sys + sys.exit(main(sys.argv)) diff --git a/src/parse.py b/src/parse.py new file mode 100644 index 0000000..1865515 --- /dev/null +++ b/src/parse.py @@ -0,0 +1,331 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later +# Copyright 2021 Jookia + +import log +import ast_types + +class ParseLocation: + def __init__(self, line, column, file): + self.line = line + self.column = column + self.file = file + + def __repr__(self): + return "ParseLocation(line %i, column %i, file '%s')" % (self.line, self.column, self.file) + +class ParseContext: + def __init__(self, parent, context, location): + self.parent = parent + self.context = context + self.location = location + + def __repr__(self): + return "ParseContext(parent %s, context '%s', location %s)" % (self.parent, self.context, self.location) + +class ParseError(BaseException): + def __init__(self, context, error): + self.context = context + self.error = error + + def __repr__(self): + return "ParseError(context %s, error '%s')" % (self.context, self.error) + +class Token: + def __init__(self, type, value, location): + self.type = type + self.value = value + self.location = location + + def __repr__(self): + return "Token(type %s, value '%s', location %s)" % (self.type, self.value, self.location) + +def is_whitespace(symbol): + return symbol == " " or symbol == "\t" or symbol == "\n" + +class Tokenizer: + def __init__(self, input, filename): + self.code = input + self.pos = 0 + self.pos_line = 1 + self.pos_column = 1 + self.filename = filename + + def next(self): + if self.pos >= len(self.code): + log.log(log.LEXER, log.TRACE, "Reached end of file") + return None + else: + symbol = self.code[self.pos] + pos_string = "line %i column %i" % (self.pos_line, self.pos_column) + if symbol == '\n': + log.log(log.LEXER, log.TRACE, "Read new line at %s" % (pos_string)) + self.pos_line += 1 + self.pos_column = 1 + else: + log.log(log.LEXER, log.TRACE, "Read character %s at %s" % (symbol, pos_string)) + self.pos_column += 1 + self.pos += 1 + return symbol + + def read_token(self): + token = "" + symbol = self.next() + while is_whitespace(symbol): + log.log(log.LEXER, log.TRACE, "Skipping whitespace") + symbol = self.next() + if not symbol: + log.log(log.LEXER, log.TRACE, "No token to read") + return (None, self.pos_line, self.pos_column) + line = self.pos_line + column = self.pos_column - 1 # pos_column is one ahead + while not is_whitespace(symbol): + token += symbol + symbol = self.next() + log.log(log.LEXER, log.TRACE, "Read token '%s' at line %i column %i" % (token, line, column)) + return (token, line, column) + + def skip_note(self, line, column): + log.log(log.LEXER, log.TRACE, "Skipping tokens until EndNote") + context = ParseContext(None, "reading note", ParseLocation(line, column, self.filename)) + (token, _, _) = self.read_token() + while token and token != "EndNote": + (token, _, _) = self.read_token() + if not token: + raise ParseError(context, "Hit end of file before EndNote") + + def read_text(self, line, column): + log.log(log.LEXER, log.TRACE, "Reading characters until EndText") + context = ParseContext(None, "reading text", ParseLocation(line, column, self.filename)) + start = self.pos + (token, _, _) = self.read_token() + while token and token != "EndText": + (token, _, _) = self.read_token() + if not token: + raise ParseError(context, "Hit end of file before EndText") + else: + return self.code[start:self.pos - len(" EndText") - 1] + + def skip_shebang(self): + log.log(log.LEXER, log.TRACE, "Skipping shebang") + next_line = self.code.find('\n') + 1 + self.code = self.code[next_line:] + self.pos_line = 2 + + def tokenize(self): + if self.code[0:2] == '#!': + self.skip_shebang() + keywords = ["NewLang", "Done", "Set", "To", "EndSet", + "If", "Then", "Else", "EndIf"] + tokens = [] + (token, line, column) = self.read_token() + while token: + context = ParseContext(None, "reading token", ParseLocation(line, column, self.filename)) + if token == "BeginNote": + self.skip_note(line, column) + (token, line, column) = self.read_token() + continue + elif token == "EndNote": + raise ParseError(context, "Found stray EndNote") + elif token == "BeginText": + type = "text" + value = self.read_text(line, column) + elif token == "EndText": + raise ParseError(context, "Found stray EndText") + elif token in ["True", "False"]: + type = "bool" + value = (token == "True") + elif token in keywords: + type = "keyword" + value = token + else: + type = "symbol" + value = token + tok = Token(type, value, ParseLocation(line, column, self.filename)) + log.log(log.LEXER, log.DEBUG, "Appending %s" % (tok)) + tokens.append(tok) + (token, line, column) = self.read_token() + log.log(log.LEXER, log.TRACE, "Done tokenizing, adding EOF") + tokens.append(Token("EOF", None, ParseLocation(self.pos_line, self.pos_column, self.filename))) + log.log(log.LEXER, log.DEBUG, "Tokens are %s" % (tokens)) + return tokens + +class Parser: + def __init__(self, tokens): + self.tokens = tokens + self.pos = 0 + + def next(self): + token = self.tokens[self.pos] + if self.pos < (len(self.tokens) - 1): + self.pos += 1 + log.log(log.PARSER, log.TRACE, "Read %s" % (token)) + return token + + def peek(self): + token = self.tokens[self.pos] + log.log(log.PARSER, log.TRACE, "Peeked %s" % (token)) + return token + + def eof(self): + return self.tokens[self.pos].type == "EOF" + + def create_context(self, context, text): + token = self.tokens[self.pos] + return ParseContext(context, text, token.location) + + def parse_version(self, context): + log.log(log.PARSER, log.TRACE, "Parsing version identifier...") + context = self.create_context(context, "parsing version identifier") + token = self.next() + if token.type != "keyword" or token.value != "NewLang": + raise ParseError(context, "Expected 'NewLang' keyword, got '%s'" % (token.value)) + token = self.next() + version = token.value + if version != "0": + raise ParseError(context, "Unknown version '%s'" % (version)) + log.log(log.PARSER, log.DEBUG, "Parsed version %s" % (version)) + return version + + def parse_value(self, context, subject, type, value): + log.log(log.PARSER, log.TRACE, "Parsing value...") + if type == "symbol": + ret = ast_types.Reference(value) + elif type == "text": + ret = ast_types.Text(value) + elif type == "bool": + ret = ast_types.Bool(value) + else: + raise ParseError(context, "Unexpected value type %s" % (type)) + log.log(log.PARSER, log.TRACE, "Parsed value, AST is %s" % (ret)) + return ret + + def parse_arguments(self, meta_context, terminator): + log.log(log.PARSER, log.TRACE, "Parsing arguments until '%s'..." % (terminator)) + context = self.create_context(meta_context, "parsing statement arguments") + args = [] + arg_num = 1 + while True: + log.log(log.PARSER, log.TRACE, "Parsing argument %i..." % (arg_num)) + arg_context = self.create_context(context, "parsing argument %i" % (arg_num)) + end_context = self.create_context(context, "parsing terminator") + token = self.next() + arg_num += 1 + if token.type == "keyword": + if token.value == terminator: + log.log(log.PARSER, log.TRACE, "Parsed arguments, AST is %s" % (args)) + return args + else: + raise ParseError(end_context, "Expected %s, got %s" % (terminator, token.value)) + else: + arg = self.parse_value(arg_context, "argument", token.type, token.value) + log.log(log.PARSER, log.TRACE, "Parsed argument %s" % (arg)) + args.append(arg) + + def parse_statement(self, context, terminator, type): + log.log(log.PARSER, log.TRACE, "Parsing %s statement until '%s'..." % (type, terminator)) + meta_context = self.create_context(context, "parsing %s statement" % (type)) + log.log(log.PARSER, log.TRACE, "Parsing statement subject...") + context = self.create_context(meta_context, "parsing subject") + token = self.next() + subject = self.parse_value(context, "subject", token.type, token.value) + log.log(log.PARSER, log.TRACE, "Parsing statement verb...") + context = self.create_context(meta_context, "parsing statement verb") + end_context = self.create_context(context, "parsing terminator") + token = self.next() + if token.type == "keyword": + if token.value == terminator: + verb = None + else: + raise ParseError(end_context, "Expected %s, got %s" % (terminator, token.value)) + elif token.type == "symbol": + verb = token.value + else: + raise ParseError(context, "Expected symbol, got %s" % (token.type)) + log.log(log.PARSER, log.TRACE, "Parsing statement arguments...") + if verb: + arguments = self.parse_arguments(meta_context, terminator) + else: + arguments = [] + statement = ast_types.Statement(subject, verb, arguments) + log.log(log.PARSER, log.DEBUG, "Parsed statement, AST is %s" % (statement)) + return statement + + def parse_set(self, context): + log.log(log.PARSER, log.TRACE, "Parsing set directive...") + meta_context = self.create_context(context, "parsing set directive") + self.next() # Skip 'Set' + log.log(log.PARSER, log.TRACE, "Parsing set subject...") + context = self.create_context(meta_context, "parsing subject") + token = self.next() + if token.type != "symbol": + raise ParseError(context, "Expected symbol, got %s" % (token.type)) + subject = token.value + log.log(log.PARSER, log.TRACE, "Parsing set separator...") + context = self.create_context(meta_context, "parsing set separator") + token = self.next() + if token.type != "keyword" or token.value != "To": + pretty_value = token.value + if token.type != "keyword": + pretty_value = "'%s'" % (pretty_value) + raise ParseError(context, "Expected To, got %s" % (pretty_value)) + log.log(log.PARSER, log.TRACE, "Parsing set value...") + ast = self.parse_statement(meta_context, "EndSet", "set value") + set = ast_types.Set(subject, ast) + log.log(log.PARSER, log.DEBUG, "Parsed set, AST is %s" % (set)) + return set + + def parse_if(self, context): + log.log(log.PARSER, log.TRACE, "Parsing if directive...") + context = self.create_context(context, "parsing if directive") + self.next() # Skip 'If' + test = self.parse_statement(context, "Then", "test condition") + log.log(log.PARSER, log.TRACE, "Parsing if success statement...") + success = self.parse_statement(context, "Else", "success") + log.log(log.PARSER, log.TRACE, "Parsing if failure statement...") + failure = self.parse_statement(context, "EndIf", "failure") + conditional = ast_types.Conditional(test, success, failure) + log.log(log.PARSER, log.DEBUG, "Parsed if, AST is %s" % (conditional)) + return conditional + + def parse_directive(self, context): + token = self.peek() + if token.type != "keyword" and token.type != "symbol" and token.type != "bool": + raise ParseError(context, "Expected keyword, symbol or bool, got %s" % (token.type)) + if token.type == "keyword": + if token.value == "Set": + return self.parse_set(context) + elif token.value == "If": + return self.parse_if(context) + else: + raise ParseError(context, "Unexpected keyword %s" % (token.value)) + else: + ast = self.parse_statement(context, "Done", "command") + return ast + + def parse_file(self): + log.log(log.PARSER, log.TRACE, "Parsing file...") + ast = [] + version = self.parse_version(None) + while not self.eof(): + log.log(log.PARSER, log.TRACE, "Parsing next directive in file...") + ast.append(self.parse_directive(None)) + log.log(log.PARSER, log.DEBUG, "Parsed file, AST is %s" % (ast)) + return ast + +def parse_file(filename): + code = open(filename).read() + try: + tokenizer = Tokenizer(code, filename) + tokens = tokenizer.tokenize() + parser = Parser(tokens) + return parser.parse_file() + except ParseError as e: + print("Parse error: %s" % (e.error)) + context = e.context + while context: + line = context.location.line + column = context.location.column + print("While %s at line %i column %i" % (context.context, line, column)) + context = context.parent + print("While parsing file %s" % (filename)) + return None