diff --git a/build.py b/build.py index 596725c..5c20934 100644 --- a/build.py +++ b/build.py @@ -38,7 +38,7 @@ def find_all_src(srcpath): - paths = srcpath.glob("**/*") + paths = srcpath.rglob("*") new_paths = [srcpath] for p in paths: if "__pycache__" in p.as_posix(): @@ -57,33 +57,33 @@ def write_main(zip, date): name = "__main__.py" - data = "import src.main\nsrc.main.wait_main()" + data = "import newlang.main\nnewlang.main.wait_main()" write_zip_entry(zip, name, data, date) -def write_dir(zip, path, date): +def write_dir(zip, path, name, date): # For some reason Python needs empty files with the names of directories ending in / - name = path.as_posix() + "/" + name = name + "/" data = "" write_zip_entry(zip, name, data, date) -def write_file(zip, path, date): +def write_file(zip, path, name, date): file = path.open("rb") data = file.read() new_data = data.replace(b"\r\n", b"\n") file.close() - name = path.as_posix() write_zip_entry(zip, name, new_data, date) def write_src(zip, date): - srcs = sorted(find_all_src(pathlib.Path("src"))) + srcs = sorted(find_all_src(pathlib.Path("src/newlang"))) for p in srcs: + name = p.relative_to("src").as_posix() if p.is_dir(): - write_dir(zip, p, date) + write_dir(zip, p, name, date) else: - write_file(zip, p, date) + write_file(zip, p, name, date) def main(): diff --git a/env.ps1 b/env.ps1 index 5820c9b..47592b6 100644 --- a/env.ps1 +++ b/env.ps1 @@ -30,6 +30,8 @@ If (Test-Path -Path venv\.stamp) { Activate } Else { Setup } +$env:PYTHONPATH += ";$PWD\src"; + # NOTE: PowerShell functions always return success even if the commands inside # failed. This makes it infeasible to do proper error checking. # See https://github.com/PowerShell/PowerShell/issues/12218 diff --git a/env.sh b/env.sh index 460e1fc..5d80915 100644 --- a/env.sh +++ b/env.sh @@ -23,6 +23,8 @@ if test -e venv/.stamp; then activate; else setup; fi +export PYTHONPATH="$PYTHONPATH:$PWD/src" + unset -f activate die setup resetenv() { rm -r venv; . ./env.sh; } diff --git a/run.py b/run.py index 0facbe1..0283f97 100755 --- a/run.py +++ b/run.py @@ -2,5 +2,5 @@ # SPDX-License-Identifier: LGPL-2.1-only # Copyright 2022 Jookia -import src.main -src.main.wait_main() +import newlang.main +newlang.main.wait_main() diff --git a/src/ast_types.py b/src/ast_types.py deleted file mode 100644 index 0b16c7f..0000000 --- a/src/ast_types.py +++ /dev/null @@ -1,101 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-only -# Copyright 2022 Jookia - - -class Reference: - def __init__(self, value): - self.value = value - - def __repr__(self): - return "Reference('%s')" % (self.value) - - def __eq__(self, other): - if other is None: - return False - return self.value == other.value - - -class Bool: - def __init__(self, value): - self.value = value - - def __repr__(self): - return "Bool(%s)" % (self.value) - - def __eq__(self, other): - if other is None: - return False - return self.value == other.value - - -class Text: - def __init__(self, value): - self.value = value - - def __repr__(self): - return "Text('%s')" % (self.value) - - def __eq__(self, other): - if other is None: - return False - return self.value == other.value - - -class Statement: - def __init__(self, subject, verb, arguments): - self.subject = subject - self.verb = verb - self.arguments = arguments - - def __repr__(self): - return "Statement(subject %s, verb '%s', arguments %s)" % ( - self.subject, - self.verb, - self.arguments, - ) - - def __eq__(self, other): - if other is None: - return False - return ( - self.subject == other.subject - and self.verb == other.verb - and self.arguments == other.arguments - ) - - -class Set: - def __init__(self, subject, statement): - self.subject = subject - self.statement = statement - - def __repr__(self): - return "Set(subject %s, statement %s)" % (self.subject, self.statement) - - def __eq__(self, other): - if other is None: - return False - return self.subject == other.subject and self.statement == other.statement - - -class Conditional: - def __init__(self, test, success, failure): - self.test = test - self.success = success - self.failure = failure - - def __repr__(self): - return "Conditional(test %s, success %s, failure %s)" % ( - self.test, - self.success, - self.failure, - ) - - def __eq__(self, other): - if other is None: - return False - return ( - self.test == other.test - and self.success == other.success - and self.failure == other.failure - ) diff --git a/src/i18n.py b/src/i18n.py deleted file mode 100644 index a5243f6..0000000 --- a/src/i18n.py +++ /dev/null @@ -1,16 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-only -# Copyright 2022 Jookia - - -class Message: - def __init__(self, id, values): - self.id = id - self.values = values - - def __repr__(self): - return "Message('%s', %s)" % (self.id, self.values) # pragma: no mutate - - def __eq__(self, other): - if other is None: - return False - return self.id == other.id and self.values == other.values diff --git a/src/interp.py b/src/interp.py deleted file mode 100644 index a88cb02..0000000 --- a/src/interp.py +++ /dev/null @@ -1,159 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-only -# Copyright 2021 Jookia - -from src import ast_types -import sys - - -class Bool: - def __init__(self, value): - self.value = value - - def __repr__(self): - return "Bool(%s)" % (self.value) - - def verb_ToText(self, args): - return Text(self._value) - - -class Text: - def __init__(self, value): - self.value = value - - def __repr__(self): - return "Text('%s')" % (self.value) - - def verb_Append(self, args): - args_count = len(args) - if args_count != 1: - raise InterpreterError( - "Invalid argument count %i, expected 1" % (args_count) - ) - appendix = args[0] - if appendix.__class__ != Text: - raise InterpreterError( - "Invalid argument type %s, expected Text" - % (appendix.__class__.__name__) - ) - return Text(self.value + " " + appendix.value) - - def verb_Equals(self, args): - args_count = len(args) - if args_count != 1: - raise InterpreterError( - "Invalid argument count %i, expected 1" % (args_count) - ) - compare = args[0] - if compare.__class__ != Text: - raise InterpreterError( - "Invalid argument type %s, expected Text" % (compare.__class__.__name__) - ) - return Bool(self.value == compare.value) - - -class Module_System: - def verb_Print(self, args): - args_count = len(args) - if args_count != 1: - raise InterpreterError( - "Invalid argument count %i, expected 1" % (args_count) - ) - line = args[0] - if line.__class__ != Text: - raise InterpreterError( - "Invalid argument type %s, expected Text" % (line.__class__.__name__) - ) - print(line.value) - - def verb_Read(self, args): - try: - return Text(input()) - except KeyboardInterrupt: - return Text("") - except EOFError: - return Text("") - - def verb_Exit(self, args): - sys.exit(0) - - -class InterpreterError(BaseException): - def __init__(self, error): - self.error = error - - def __repr__(self): - return "InterpreterError(error '%s')" % (self.error) - - -class Interpreter: - def __init__(self, env): - self.env = env - - def resolve_value(self, value): - if value.__class__ == ast_types.Reference: - if value.value in self.env: - return self.env[value.value] - else: - raise InterpreterError("Unknown environment value %s" % (value.value)) - elif value.__class__ == ast_types.Text: - return Text(value.value) - elif value.__class__ == ast_types.Bool: - return Bool(value.value) - else: - raise InterpreterError("Unknown value type %s" % (value.__class__.__name__)) - - def run_statement(self, ast): - subject = self.resolve_value(ast.subject) - if not ast.verb: - return subject - args = [] - for arg in ast.arguments: - args.append(self.resolve_value(arg)) - verb = getattr(subject, "verb_" + ast.verb, None) - if verb: - return verb(args) - else: - raise InterpreterError( - "Unknown verb %s for subject %s" - % (ast.verb, subject.__class__.__name__) - ) - - def run_set(self, ast): - self.env[ast.subject] = self.run_statement(ast.statement) - return self.env[ast.subject] - - def run_conditional(self, ast): - test = self.run_statement(ast.test) - if test.__class__ != Bool: - raise InterpreterError("Test condition didn't return a boolean") - if test.value: - ret = self.run_statement(ast.success) - else: - ret = self.run_statement(ast.failure) - return ret - - def run_command(self, ast): - if ast.__class__ == ast_types.Statement: - return self.run_statement(ast) - elif ast.__class__ == ast_types.Set: - return self.run_set(ast) - elif ast.__class__ == ast_types.Conditional: - return self.run_conditional(ast) - else: - raise InterpreterError("Unknown command type %s" % (ast.__class__.__name__)) - - def run(self, ast): - while True: - for command in ast: - self.run_command(command) - - -def run_ast(ast): - env = { - "System": Module_System(), - } - try: - return Interpreter(env).run(ast) - except InterpreterError as e: - print("Interpreter error: %s" % (e)) - return None diff --git a/src/log.py b/src/log.py deleted file mode 100644 index 4a41adc..0000000 --- a/src/log.py +++ /dev/null @@ -1,46 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-only -# Copyright 2022 Jookia - -LEXER = 0 -PARSER = 1 - -NORMAL = 0 # messages always shown -DEBUG = 1 # high-level information about what's happening -TRACE = 2 # trace output - -log_level = 0 - - -def log(module, level, text): - if level > log_level: - return - - if module == LEXER: - module_name = "LEXER" - elif module == PARSER: - module_name = "PARSER" - else: - module_name = "UNKNOWN" - - if level == NORMAL: - level_name = "NORMAL" - elif level == DEBUG: - level_name = "DEBUG" - elif level == TRACE: - level_name = "TRACE" - else: - level_name = "UNKNOWN" - - print("%s %s: %s" % (level_name, module_name, text)) - - -def set_default_log_level(level): - global log_level - if not level or level == "NORMAL": - log_level = 0 - elif level == "DEBUG": - log_level = 1 - elif level == "TRACE": - log_level = 2 - else: - print("ERROR: Unknown log level %s" % (level)) diff --git a/src/main.py b/src/main.py deleted file mode 100644 index 0fa0c4f..0000000 --- a/src/main.py +++ /dev/null @@ -1,54 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-only -# Copyright 2022 Jookia - -import os -import sys - -from src import port -from src import log -from src.parse2 import tokenize as tokenize2 -from src.parse2 import parse as parse2 -from src import parse -from src import interp - - -def run_file(file, log_level=log.NORMAL): - log.set_default_log_level(log_level) - ast = None - if os.getenv("NEWLANG_PARSE2"): - try: - code = open(file, encoding="utf-8").read() - tokens = tokenize2.tokenize(code, file) - ast = parse2.parse(tokens, None) - except UnicodeError: - pass - else: - ast = parse.parse_file(file) - if not ast: - return 1 - interp.run_ast(ast) - return 0 - - -def main(): - args = sys.argv - log_level = os.getenv("NEWLANG_LOG_LEVEL") - exe = "NewLang" - if len(args) == 1: - exe = args[0] - if len(args) != 2: - print("Usage: %s FILENAME" % (exe)) - return 1 - return run_file(args[1], log_level) - - -def unexpected_exit(): - input("Unexpected exit. Press any key to continue.") - - -def wait_main(): - port.atexit_register_one(unexpected_exit) - code = main() - port.atexit_unregister() - input("Exited with code %s. Press any key to continue." % (code)) - sys.exit(code) diff --git a/src/newlang/ast_types.py b/src/newlang/ast_types.py new file mode 100644 index 0000000..0b16c7f --- /dev/null +++ b/src/newlang/ast_types.py @@ -0,0 +1,101 @@ +# SPDX-License-Identifier: LGPL-2.1-only +# Copyright 2022 Jookia + + +class Reference: + def __init__(self, value): + self.value = value + + def __repr__(self): + return "Reference('%s')" % (self.value) + + def __eq__(self, other): + if other is None: + return False + return self.value == other.value + + +class Bool: + def __init__(self, value): + self.value = value + + def __repr__(self): + return "Bool(%s)" % (self.value) + + def __eq__(self, other): + if other is None: + return False + return self.value == other.value + + +class Text: + def __init__(self, value): + self.value = value + + def __repr__(self): + return "Text('%s')" % (self.value) + + def __eq__(self, other): + if other is None: + return False + return self.value == other.value + + +class Statement: + def __init__(self, subject, verb, arguments): + self.subject = subject + self.verb = verb + self.arguments = arguments + + def __repr__(self): + return "Statement(subject %s, verb '%s', arguments %s)" % ( + self.subject, + self.verb, + self.arguments, + ) + + def __eq__(self, other): + if other is None: + return False + return ( + self.subject == other.subject + and self.verb == other.verb + and self.arguments == other.arguments + ) + + +class Set: + def __init__(self, subject, statement): + self.subject = subject + self.statement = statement + + def __repr__(self): + return "Set(subject %s, statement %s)" % (self.subject, self.statement) + + def __eq__(self, other): + if other is None: + return False + return self.subject == other.subject and self.statement == other.statement + + +class Conditional: + def __init__(self, test, success, failure): + self.test = test + self.success = success + self.failure = failure + + def __repr__(self): + return "Conditional(test %s, success %s, failure %s)" % ( + self.test, + self.success, + self.failure, + ) + + def __eq__(self, other): + if other is None: + return False + return ( + self.test == other.test + and self.success == other.success + and self.failure == other.failure + ) diff --git a/src/newlang/i18n.py b/src/newlang/i18n.py new file mode 100644 index 0000000..a5243f6 --- /dev/null +++ b/src/newlang/i18n.py @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: LGPL-2.1-only +# Copyright 2022 Jookia + + +class Message: + def __init__(self, id, values): + self.id = id + self.values = values + + def __repr__(self): + return "Message('%s', %s)" % (self.id, self.values) # pragma: no mutate + + def __eq__(self, other): + if other is None: + return False + return self.id == other.id and self.values == other.values diff --git a/src/newlang/interp.py b/src/newlang/interp.py new file mode 100644 index 0000000..fd6c685 --- /dev/null +++ b/src/newlang/interp.py @@ -0,0 +1,159 @@ +# SPDX-License-Identifier: LGPL-2.1-only +# Copyright 2021 Jookia + +from newlang import ast_types +import sys + + +class Bool: + def __init__(self, value): + self.value = value + + def __repr__(self): + return "Bool(%s)" % (self.value) + + def verb_ToText(self, args): + return Text(self._value) + + +class Text: + def __init__(self, value): + self.value = value + + def __repr__(self): + return "Text('%s')" % (self.value) + + def verb_Append(self, args): + args_count = len(args) + if args_count != 1: + raise InterpreterError( + "Invalid argument count %i, expected 1" % (args_count) + ) + appendix = args[0] + if appendix.__class__ != Text: + raise InterpreterError( + "Invalid argument type %s, expected Text" + % (appendix.__class__.__name__) + ) + return Text(self.value + " " + appendix.value) + + def verb_Equals(self, args): + args_count = len(args) + if args_count != 1: + raise InterpreterError( + "Invalid argument count %i, expected 1" % (args_count) + ) + compare = args[0] + if compare.__class__ != Text: + raise InterpreterError( + "Invalid argument type %s, expected Text" % (compare.__class__.__name__) + ) + return Bool(self.value == compare.value) + + +class Module_System: + def verb_Print(self, args): + args_count = len(args) + if args_count != 1: + raise InterpreterError( + "Invalid argument count %i, expected 1" % (args_count) + ) + line = args[0] + if line.__class__ != Text: + raise InterpreterError( + "Invalid argument type %s, expected Text" % (line.__class__.__name__) + ) + print(line.value) + + def verb_Read(self, args): + try: + return Text(input()) + except KeyboardInterrupt: + return Text("") + except EOFError: + return Text("") + + def verb_Exit(self, args): + sys.exit(0) + + +class InterpreterError(BaseException): + def __init__(self, error): + self.error = error + + def __repr__(self): + return "InterpreterError(error '%s')" % (self.error) + + +class Interpreter: + def __init__(self, env): + self.env = env + + def resolve_value(self, value): + if value.__class__ == ast_types.Reference: + if value.value in self.env: + return self.env[value.value] + else: + raise InterpreterError("Unknown environment value %s" % (value.value)) + elif value.__class__ == ast_types.Text: + return Text(value.value) + elif value.__class__ == ast_types.Bool: + return Bool(value.value) + else: + raise InterpreterError("Unknown value type %s" % (value.__class__.__name__)) + + def run_statement(self, ast): + subject = self.resolve_value(ast.subject) + if not ast.verb: + return subject + args = [] + for arg in ast.arguments: + args.append(self.resolve_value(arg)) + verb = getattr(subject, "verb_" + ast.verb, None) + if verb: + return verb(args) + else: + raise InterpreterError( + "Unknown verb %s for subject %s" + % (ast.verb, subject.__class__.__name__) + ) + + def run_set(self, ast): + self.env[ast.subject] = self.run_statement(ast.statement) + return self.env[ast.subject] + + def run_conditional(self, ast): + test = self.run_statement(ast.test) + if test.__class__ != Bool: + raise InterpreterError("Test condition didn't return a boolean") + if test.value: + ret = self.run_statement(ast.success) + else: + ret = self.run_statement(ast.failure) + return ret + + def run_command(self, ast): + if ast.__class__ == ast_types.Statement: + return self.run_statement(ast) + elif ast.__class__ == ast_types.Set: + return self.run_set(ast) + elif ast.__class__ == ast_types.Conditional: + return self.run_conditional(ast) + else: + raise InterpreterError("Unknown command type %s" % (ast.__class__.__name__)) + + def run(self, ast): + while True: + for command in ast: + self.run_command(command) + + +def run_ast(ast): + env = { + "System": Module_System(), + } + try: + return Interpreter(env).run(ast) + except InterpreterError as e: + print("Interpreter error: %s" % (e)) + return None diff --git a/src/newlang/log.py b/src/newlang/log.py new file mode 100644 index 0000000..4a41adc --- /dev/null +++ b/src/newlang/log.py @@ -0,0 +1,46 @@ +# SPDX-License-Identifier: LGPL-2.1-only +# Copyright 2022 Jookia + +LEXER = 0 +PARSER = 1 + +NORMAL = 0 # messages always shown +DEBUG = 1 # high-level information about what's happening +TRACE = 2 # trace output + +log_level = 0 + + +def log(module, level, text): + if level > log_level: + return + + if module == LEXER: + module_name = "LEXER" + elif module == PARSER: + module_name = "PARSER" + else: + module_name = "UNKNOWN" + + if level == NORMAL: + level_name = "NORMAL" + elif level == DEBUG: + level_name = "DEBUG" + elif level == TRACE: + level_name = "TRACE" + else: + level_name = "UNKNOWN" + + print("%s %s: %s" % (level_name, module_name, text)) + + +def set_default_log_level(level): + global log_level + if not level or level == "NORMAL": + log_level = 0 + elif level == "DEBUG": + log_level = 1 + elif level == "TRACE": + log_level = 2 + else: + print("ERROR: Unknown log level %s" % (level)) diff --git a/src/newlang/main.py b/src/newlang/main.py new file mode 100644 index 0000000..3b57a52 --- /dev/null +++ b/src/newlang/main.py @@ -0,0 +1,54 @@ +# SPDX-License-Identifier: LGPL-2.1-only +# Copyright 2022 Jookia + +import os +import sys + +from newlang import port +from newlang import log +from newlang.parse2 import tokenize as tokenize2 +from newlang.parse2 import parse as parse2 +from newlang import parse +from newlang import interp + + +def run_file(file, log_level=log.NORMAL): + log.set_default_log_level(log_level) + ast = None + if os.getenv("NEWLANG_PARSE2"): + try: + code = open(file, encoding="utf-8").read() + tokens = tokenize2.tokenize(code, file) + ast = parse2.parse(tokens, None) + except UnicodeError: + pass + else: + ast = parse.parse_file(file) + if not ast: + return 1 + interp.run_ast(ast) + return 0 + + +def main(): + args = sys.argv + log_level = os.getenv("NEWLANG_LOG_LEVEL") + exe = "NewLang" + if len(args) == 1: + exe = args[0] + if len(args) != 2: + print("Usage: %s FILENAME" % (exe)) + return 1 + return run_file(args[1], log_level) + + +def unexpected_exit(): + input("Unexpected exit. Press any key to continue.") + + +def wait_main(): + port.atexit_register_one(unexpected_exit) + code = main() + port.atexit_unregister() + input("Exited with code %s. Press any key to continue." % (code)) + sys.exit(code) diff --git a/src/newlang/parse.py b/src/newlang/parse.py new file mode 100644 index 0000000..713cd36 --- /dev/null +++ b/src/newlang/parse.py @@ -0,0 +1,403 @@ +# SPDX-License-Identifier: LGPL-2.1-only +# Copyright 2021 Jookia + +from newlang import log +from newlang import ast_types + + +class ParseLocation: + def __init__(self, line, column, file): + self.line = line + self.column = column + self.file = file + + def __repr__(self): + return "ParseLocation(line %i, column %i, file '%s')" % ( + self.line, + self.column, + self.file, + ) + + def __eq__(self, other): + if other is None: + return False + return ( + self.line == other.line + and self.column == other.column + and self.file == other.file + ) + + +class ParseContext: + def __init__(self, parent, context, location): + self.parent = parent + self.context = context + self.location = location + + def __repr__(self): + return "ParseContext(parent %s, context '%s', location %s)" % ( + self.parent, + self.context, + self.location, + ) + + +class ParseError(BaseException): + def __init__(self, context, error): + self.context = context + self.error = error + + def __str__(self): + return "ParseError(context %s, error '%s')" % (self.context, self.error) + + +class Token: + def __init__(self, type, value, location): + self.type = type + self.value = value + self.location = location + + def __repr__(self): + return "Token(type %s, value '%s', location %s)" % ( + self.type, + self.value, + self.location, + ) + + +class Word: + def __init__(self, value, position, line, column): + self.value = value + self.position = position + self.line = line + self.column = column + + def __repr__(self): + return "Word(value '%s', position %i, line %i, column %i)" % ( + self.value, + self.position, + self.line, + self.column, + ) + + +def is_whitespace(symbol): + return symbol == " " or symbol == "\t" or symbol == "\n" + + +class Tokenizer: + def __init__(self, input, filename): + self.code = input + self.pos = -1 + self.pos_line = 1 + self.pos_column = 0 + self.filename = filename + + def next(self): + if self.pos == len(self.code) - 1: + log.log(log.LEXER, log.TRACE, "Reached end of file") + return None + else: + self.pos += 1 + symbol = self.code[self.pos] + if symbol == "\n": + self.pos_line += 1 + self.pos_column = 0 + else: + self.pos_column += 1 + pos_string = "line %i column %i" % (self.pos_line, self.pos_column) + symbol_string = symbol + if is_whitespace(symbol): + symbol_string = repr(symbol) + log.log( + log.LEXER, + log.TRACE, + "Read character %s at %s" % (symbol_string, pos_string), + ) + return symbol + + def read_word(self): + value = "" + symbol = self.next() + while is_whitespace(symbol): + log.log(log.LEXER, log.TRACE, "Skipping whitespace") + symbol = self.next() + if not symbol: + log.log(log.LEXER, log.TRACE, "No word to read") + return None + pos = self.pos + line = self.pos_line + column = self.pos_column + while symbol and not is_whitespace(symbol): + value += symbol + symbol = self.next() + word = Word(value, pos, line, column) + log.log(log.LEXER, log.TRACE, "Read %s" % (word)) + return word + + def skip_note(self, line, column): + log.log(log.LEXER, log.TRACE, "Skipping words until EndNote") + context = ParseContext( + None, "reading note", ParseLocation(line, column, self.filename) + ) + word = self.read_word() + while word and word.value != "EndNote": + word = self.read_word() + if not word: + raise ParseError(context, "Hit end of file before EndNote") + + def read_text(self, line, column): + log.log(log.LEXER, log.TRACE, "Reading characters until EndText") + context = ParseContext( + None, "reading text", ParseLocation(line, column, self.filename) + ) + start = self.pos + word = self.read_word() + while word and word.value != "EndText": + word = self.read_word() + if not word: + raise ParseError(context, "Hit end of file before EndText") + else: + return self.code[start + 1 : word.position - 1].strip("\n\t ") + + def tokenize(self): + keywords = [ + "Done", + "Set", + "To", + "EndSet", + "If", + "Then", + "Else", + "EndIf", + ] + tokens = [] + word = self.read_word() + while word: + token = word.value + line = word.line + column = word.column + context = ParseContext( + None, "reading word", ParseLocation(line, column, self.filename) + ) + if token == "StartNote": + self.skip_note(line, column) + word = self.read_word() + continue + elif token == "EndNote": + raise ParseError(context, "Found stray EndNote") + elif token == "StartText": + type = "text" + value = self.read_text(line, column) + elif token == "EndText": + raise ParseError(context, "Found stray EndText") + elif token in ["True", "False"]: + type = "bool" + value = token == "True" + elif token in keywords: + type = "keyword" + value = token + else: + type = "symbol" + value = token + tok = Token(type, value, ParseLocation(line, column, self.filename)) + log.log(log.LEXER, log.DEBUG, "Appending %s" % (tok)) + tokens.append(tok) + word = self.read_word() + log.log(log.LEXER, log.TRACE, "Done tokenizing, adding EOF") + tokens.append( + Token( + "EOF", + None, + ParseLocation(self.pos_line, self.pos_column, self.filename), + ) + ) + log.log(log.LEXER, log.DEBUG, "Tokens are %s" % (tokens)) + return tokens + + +class Parser: + def __init__(self, tokens): + self.tokens = tokens + self.pos = 0 + + def next(self): + token = self.tokens[self.pos] + if self.pos < (len(self.tokens) - 1): + self.pos += 1 + log.log(log.PARSER, log.TRACE, "Read %s" % (token)) + return token + + def peek(self): + token = self.tokens[self.pos] + log.log(log.PARSER, log.TRACE, "Peeked %s" % (token)) + return token + + def eof(self): + return self.tokens[self.pos].type == "EOF" + + def create_context(self, context, text): + token = self.tokens[self.pos] + return ParseContext(context, text, token.location) + + def parse_value(self, context, subject, type, value): + log.log(log.PARSER, log.TRACE, "Parsing value...") + if type == "symbol": + ret = ast_types.Reference(value) + elif type == "text": + ret = ast_types.Text(value) + elif type == "bool": + ret = ast_types.Bool(value) + else: + raise ParseError(context, "Unexpected value type %s" % (type)) + log.log(log.PARSER, log.TRACE, "Parsed value, AST is %s" % (ret)) + return ret + + def parse_arguments(self, meta_context, terminator): + log.log(log.PARSER, log.TRACE, "Parsing arguments until '%s'..." % (terminator)) + context = self.create_context(meta_context, "parsing statement arguments") + args = [] + arg_num = 1 + while True: + log.log(log.PARSER, log.TRACE, "Parsing argument %i..." % (arg_num)) + arg_context = self.create_context( + context, "parsing argument %i" % (arg_num) + ) + end_context = self.create_context(context, "parsing terminator") + token = self.next() + arg_num += 1 + if token.type == "keyword": + if token.value == terminator: + log.log( + log.PARSER, log.TRACE, "Parsed arguments, AST is %s" % (args) + ) + return args + else: + raise ParseError( + end_context, "Expected %s, got %s" % (terminator, token.value) + ) + else: + arg = self.parse_value(arg_context, "argument", token.type, token.value) + log.log(log.PARSER, log.TRACE, "Parsed argument %s" % (arg)) + args.append(arg) + + def parse_statement(self, context, terminator, type): + log.log( + log.PARSER, + log.TRACE, + "Parsing %s statement until '%s'..." % (type, terminator), + ) + meta_context = self.create_context(context, "parsing %s statement" % (type)) + log.log(log.PARSER, log.TRACE, "Parsing statement subject...") + context = self.create_context(meta_context, "parsing subject") + token = self.next() + subject = self.parse_value(context, "subject", token.type, token.value) + log.log(log.PARSER, log.TRACE, "Parsing statement verb...") + context = self.create_context(meta_context, "parsing statement verb") + end_context = self.create_context(context, "parsing terminator") + token = self.next() + if token.type == "keyword": + if token.value == terminator: + verb = None + else: + raise ParseError( + end_context, "Expected %s, got %s" % (terminator, token.value) + ) + elif token.type == "symbol": + verb = token.value + else: + raise ParseError(context, "Expected symbol, got %s" % (token.type)) + log.log(log.PARSER, log.TRACE, "Parsing statement arguments...") + if verb: + arguments = self.parse_arguments(meta_context, terminator) + else: + arguments = [] + statement = ast_types.Statement(subject, verb, arguments) + log.log(log.PARSER, log.DEBUG, "Parsed statement, AST is %s" % (statement)) + return statement + + def parse_set(self, context): + log.log(log.PARSER, log.TRACE, "Parsing set directive...") + meta_context = self.create_context(context, "parsing set directive") + self.next() # Skip 'Set' + log.log(log.PARSER, log.TRACE, "Parsing set subject...") + context = self.create_context(meta_context, "parsing subject") + token = self.next() + if token.type != "symbol": + raise ParseError(context, "Expected symbol, got %s" % (token.type)) + subject = token.value + log.log(log.PARSER, log.TRACE, "Parsing set separator...") + context = self.create_context(meta_context, "parsing set separator") + token = self.next() + if token.type != "keyword" or token.value != "To": + pretty_value = token.value + if token.type != "keyword": + pretty_value = "'%s'" % (pretty_value) + raise ParseError(context, "Expected To, got %s" % (pretty_value)) + log.log(log.PARSER, log.TRACE, "Parsing set value...") + ast = self.parse_statement(meta_context, "EndSet", "set value") + set = ast_types.Set(subject, ast) + log.log(log.PARSER, log.DEBUG, "Parsed set, AST is %s" % (set)) + return set + + def parse_if(self, context): + log.log(log.PARSER, log.TRACE, "Parsing if directive...") + context = self.create_context(context, "parsing if directive") + self.next() # Skip 'If' + test = self.parse_statement(context, "Then", "test condition") + log.log(log.PARSER, log.TRACE, "Parsing if success statement...") + success = self.parse_statement(context, "Else", "success") + log.log(log.PARSER, log.TRACE, "Parsing if failure statement...") + failure = self.parse_statement(context, "EndIf", "failure") + conditional = ast_types.Conditional(test, success, failure) + log.log(log.PARSER, log.DEBUG, "Parsed if, AST is %s" % (conditional)) + return conditional + + def parse_directive(self, context): + token = self.peek() + if token.type != "keyword" and token.type != "symbol" and token.type != "bool": + raise ParseError( + context, "Expected keyword, symbol or bool, got %s" % (token.type) + ) + if token.type == "keyword": + if token.value == "Set": + return self.parse_set(context) + elif token.value == "If": + return self.parse_if(context) + else: + raise ParseError(context, "Unexpected keyword %s" % (token.value)) + else: + ast = self.parse_statement(context, "Done", "command") + return ast + + def parse_file(self): + log.log(log.PARSER, log.TRACE, "Parsing file...") + ast = [] + while not self.eof(): + log.log(log.PARSER, log.TRACE, "Parsing next directive in file...") + ast.append(self.parse_directive(None)) + log.log(log.PARSER, log.DEBUG, "Parsed file, AST is %s" % (ast)) + return ast + + +def parse_file(filename): + try: + code = open(filename, encoding="utf-8").read() + except UnicodeError: + print("Parse error: %s is not valid UTF-8" % (filename)) + return None + try: + tokenizer = Tokenizer(code, filename) + tokens = tokenizer.tokenize() + parser = Parser(tokens) + return parser.parse_file() + except ParseError as e: + print("Parse error: %s" % (e.error)) + context = e.context + while context: + line = context.location.line + column = context.location.column + print("While %s at line %i column %i" % (context.context, line, column)) + context = context.parent + print("While parsing file %s" % (filename)) + return None diff --git a/src/newlang/parse2/parse.py b/src/newlang/parse2/parse.py new file mode 100644 index 0000000..89e8927 --- /dev/null +++ b/src/newlang/parse2/parse.py @@ -0,0 +1,400 @@ +# SPDX-License-Identifier: LGPL-2.1-only +# Copyright 2022 Jookia + +from newlang.i18n import Message +from newlang.ast_types import Bool, Conditional, Reference, Set, Statement, Text +from newlang.parse2.token import TokenStream + + +# Words that can't be used as references +# This should include keywords and literals +reserved_names = [ + "Done", + "Set", + "To", + "EndSet", + "If", + "Then", + "Else", + "EndIf", + "StartNote", + "EndNote", + "StartText", + "EndText", + "True", + "False", +] + + +# Tasks that happen during parsing +class ParseTask: + TEST_TASK = 1 # pragma: no mutate + PARSE_NOTE = 2 # pragma: no mutate + CLEAR_NOTES = 3 # pragma: no mutate + PARSE_TEXT = 4 # pragma: no mutate + PARSE_BOOL = 5 # pragma: no mutate + PARSE_REFERENCE = 6 # pragma: no mutate + PARSE_VALUE = 7 # pragma: no mutate + PARSE_STATEMENT = 8 # pragma: no mutate + PARSE_SUBJECT = 9 # pragma: no mutate + PARSE_VERB = 10 # pragma: no mutate + PARSE_ARGUMENT = 11 # pragma: no mutate + PARSE_SET = 12 # pragma: no mutate + PARSE_CONDITIONAL = 13 # pragma: no mutate + PARSE_TEST = 14 # pragma: no mutate + PARSE_SUCCESS = 15 # pragma: no mutate + PARSE_FAILURE = 16 # pragma: no mutate + PARSE_DIRECTIVE = 17 # pragma: no mutate + PARSE_FILE = 18 # pragma: no mutate + MAX = 19 # pragma: no mutate + + # Returns a list of all tasks + def list(): + return list(range(1, ParseTask.MAX)) # pragma: no mutate + + +# Message identifiers for ParseTasks +ParseTaskMessageIDs = { + ParseTask.TEST_TASK: "ParseTaskTestTask", + ParseTask.PARSE_NOTE: "ParseTaskNote", + ParseTask.CLEAR_NOTES: "ParseTaskClearNotes", + ParseTask.PARSE_TEXT: "ParseTaskText", + ParseTask.PARSE_BOOL: "ParseTaskBool", + ParseTask.PARSE_REFERENCE: "ParseTaskReference", + ParseTask.PARSE_VALUE: "ParseTaskValue", + ParseTask.PARSE_STATEMENT: "ParseTaskStatement", + ParseTask.PARSE_SUBJECT: "ParseTaskSubject", + ParseTask.PARSE_VERB: "ParseTaskVerb", + ParseTask.PARSE_ARGUMENT: "ParseTaskArgument", + ParseTask.PARSE_SET: "ParseTaskSet", + ParseTask.PARSE_CONDITIONAL: "ParseTaskConditional", + ParseTask.PARSE_TEST: "ParseTaskTest", + ParseTask.PARSE_SUCCESS: "ParseTaskSuccess", + ParseTask.PARSE_FAILURE: "ParseTaskFailure", + ParseTask.PARSE_DIRECTIVE: "ParseTaskDirective", + ParseTask.PARSE_FILE: "ParseTaskFile", +} + + +# Context used for parse error exception +class ParseContext: + def __init__(self, task, token, parent): + self.task = task + self.token = token + self.parent = parent + + def __repr__(self): + return ( + "ParseContext(task %s, token %s, parent\n %s)" # pragma: no mutate + % ( # pragma: no mutate + self.task, + self.token, + self.parent, + ) + ) + + def __eq__(self, other): + if other is None: + return False + return ( + self.task == other.task + and self.token == other.token + and self.parent == other.parent + ) + + +# Errors that can happen when parsing +class ParseError: + TEST_ERROR = 1 # pragma: no mutate + NO_TOKEN = 2 # pragma: no mutate + WRONG_TOKEN = 3 # pragma: no mutate + FOUND_STARTTEXT = 4 # pragma: no mutate + FOUND_STARTNOTE = 5 # pragma: no mutate + NOT_BOOL = 6 # pragma: no mutate + FOUND_ENDNOTE = 7 # pragma: no mutate + RESERVED_NAME = 8 # pragma: no mutate + FOUND_TERMINATOR = 9 # pragma: no mutate + MAX = 10 # pragma: no mutate + + # Returns a list of all errors + def list(): + return list(range(1, ParseError.MAX)) # pragma: no mutate + + +# Message identifiers for ParseErrors +ParseErrorMessageIDs = { + ParseError.TEST_ERROR: "ParseErrorTestError", + ParseError.NO_TOKEN: "ParseErrorNoToken", + ParseError.WRONG_TOKEN: "ParseErrorWrongToken", + ParseError.FOUND_STARTTEXT: "ParseErrorFoundStartText", + ParseError.FOUND_STARTNOTE: "ParseErrorFoundStartNote", + ParseError.NOT_BOOL: "ParseErrorNotBool", + ParseError.FOUND_ENDNOTE: "ParseErrorFoundEndNote", + ParseError.RESERVED_NAME: "ParseErrorReservedName", + ParseError.FOUND_TERMINATOR: "ParseErrorFoundTerminator", +} + + +# Exception thrown when a parse error is encountered +class ParseErrorException(BaseException): + def __init__(self, error, token, expected, context): + self.error = error + self.token = token + self.expected = expected + self.context = context + + def __repr__(self): + return ( + "ParseErrorException(error %s, token %s, expected %s, context %s)" # pragma: no mutate + % ( # pragma: no mutate + self.error, + self.token, + self.expected, + self.context, + ) + ) + + def __eq__(self, other): + if other is None: + return False + return ( + self.error == other.error + and self.token == other.token + and self.expected == other.expected + and self.context == other.context + ) + + +# Reads a token, possibly of a certain value +def read_token(stream, value, context): + t = stream.pop() + if t is None: + raise ParseErrorException(ParseError.NO_TOKEN, None, None, context) + elif value is not None and t.value != value: + raise ParseErrorException(ParseError.WRONG_TOKEN, t, value, context) + return t + + +# Skip a note +def skip_note(stream, parent_context): + context = ParseContext(ParseTask.PARSE_NOTE, stream.peek(), parent_context) + read_token(stream, "StartNote", context) + while True: + t = read_token(stream, None, context) + # Don't allow StartNote in notes + if t.value in ["StartNote"]: + raise ParseErrorException(ParseError.FOUND_STARTNOTE, t, None, context) + # EndNote found, end things + elif t.value == "EndNote": + break + return None + + +# Clear notes +def clear_notes(stream, parent_context): + context = ParseContext(ParseTask.CLEAR_NOTES, stream.peek(), parent_context) + tokens = [] + token = stream.peek() + while token is not None: + # Found a note, skip it + if token.value == "StartNote": + skip_note(stream, context) + # EndNote found outside note + elif token.value == "EndNote": + raise ParseErrorException(ParseError.FOUND_ENDNOTE, token, None, context) + # Add the token if it's not note related + else: + tokens.append(stream.pop()) + token = stream.peek() + return tokens + + +# The recursive descent parser in a wrapper class for easy testing +class Parser: + # Parses a text node + def parse_text(self, stream, parent_context): + context = ParseContext(ParseTask.PARSE_TEXT, stream.peek(), parent_context) + buffer = "" + t = read_token(stream, "StartText", context) + # Parse following tokens + while True: + t = read_token(stream, None, context) + # Don't allow StartText in text + if t.value in ["StartText"]: + raise ParseErrorException(ParseError.FOUND_STARTTEXT, t, None, context) + # EndText found, end things + elif t.value == "EndText": + break + else: + buffer += t.value + " " + value = buffer[:-1] # Drop trailing space + return Text(value) + + # Parses a boolean node + def parse_bool(self, stream, parent_context): + context = ParseContext(ParseTask.PARSE_BOOL, stream.peek(), parent_context) + t = read_token(stream, None, context) + if t.value == "True": + return Bool(True) + elif t.value == "False": + return Bool(False) + else: + raise ParseErrorException(ParseError.NOT_BOOL, t, None, context) + + # Parses a reference node + def parse_reference(self, stream, parent_context): + context = ParseContext(ParseTask.PARSE_REFERENCE, stream.peek(), parent_context) + t = read_token(stream, None, context) + if t.value in reserved_names: + raise ParseErrorException(ParseError.RESERVED_NAME, t, None, context) + return Reference(t.value) + + # Parses a value + def parse_value(self, stream, parent_context): + context = ParseContext(ParseTask.PARSE_VALUE, stream.peek(), parent_context) + t = stream.peek() + if t is None: + raise ParseErrorException(ParseError.NO_TOKEN, None, None, context) + elif t.value in ["True", "False"]: + return self.parse_bool(stream, context) + elif t.value == "StartText": + return self.parse_text(stream, context) + elif t.value in reserved_names: + raise ParseErrorException(ParseError.RESERVED_NAME, t, None, context) + else: + return self.parse_reference(stream, context) + + # Parses a statement until a specified terminator + def parse_statement(self, stream, parent_context, terminator): + context = ParseContext(ParseTask.PARSE_STATEMENT, stream.peek(), parent_context) + peeked_subject = stream.peek() + context_subject = ParseContext(ParseTask.PARSE_SUBJECT, peeked_subject, context) + if peeked_subject is not None and peeked_subject.value == terminator: + raise ParseErrorException( + ParseError.FOUND_TERMINATOR, peeked_subject, None, context_subject + ) + subject = self.parse_value(stream, context_subject) + context_verb = ParseContext(ParseTask.PARSE_VERB, stream.peek(), context) + verb = read_token(stream, None, context_verb) + if verb.value == terminator: + return Statement(subject, None, []) + elif verb.value in reserved_names: + raise ParseErrorException( + ParseError.RESERVED_NAME, verb, None, context_verb + ) + arguments = [] + # Parse following arguments + while True: + peeked_arg = stream.peek() + context_arg = ParseContext(ParseTask.PARSE_ARGUMENT, peeked_arg, context) + if peeked_arg is not None and peeked_arg.value == terminator: + stream.pop() + return Statement(subject, verb.value, arguments) + arg = self.parse_value(stream, context_arg) # pragma: no mutate + arguments.append(arg) + + # Parses a set node + def parse_set(self, stream, parent_context): + context = ParseContext(ParseTask.PARSE_SET, stream.peek(), parent_context) + read_token(stream, "Set", context) + subcontext = ParseContext(ParseTask.PARSE_SUBJECT, stream.peek(), context) + subject = read_token(stream, None, subcontext) + if subject.value in reserved_names: + raise ParseErrorException( + ParseError.RESERVED_NAME, subject, None, subcontext + ) + read_token(stream, "To", context) + statement = self.parse_statement(stream, context, "EndSet") + return Set(subject.value, statement) + + # Parses a conditional node + def parse_conditional(self, stream, parent_context): + context = ParseContext( + ParseTask.PARSE_CONDITIONAL, stream.peek(), parent_context + ) + read_token(stream, "If", context) + test_context = ParseContext(ParseTask.PARSE_TEST, stream.peek(), context) + test = self.parse_statement(stream, test_context, "Then") + success_context = ParseContext(ParseTask.PARSE_SUCCESS, stream.peek(), context) + success = self.parse_statement(stream, success_context, "Else") + failure_context = ParseContext(ParseTask.PARSE_FAILURE, stream.peek(), context) + failure = self.parse_statement(stream, failure_context, "EndIf") + return Conditional(test, success, failure) + + # Parses a directive + def parse_directive(self, stream, parent_context): + context = ParseContext(ParseTask.PARSE_DIRECTIVE, stream.peek(), parent_context) + t = stream.peek() + if t is None: + raise ParseErrorException(ParseError.NO_TOKEN, None, None, context) + elif t.value == "Set": + return self.parse_set(stream, context) + elif t.value == "If": + return self.parse_conditional(stream, context) + else: + return self.parse_statement(stream, context, "Done") + + +# Parses a file +def parse_file(stream, parent_context): + context = ParseContext(ParseTask.PARSE_FILE, stream.peek(), parent_context) + directives = [] + next = stream.peek() + while next is not None: + dir = Parser().parse_directive(stream, context) # pragma: no mutate + directives.append(dir) + next = stream.peek() + return directives + + +# Parses tokens +def parse(tokens, context): + stream = TokenStream(tokens) + cleared = clear_notes(stream, context) + stream2 = TokenStream(cleared) + parsed = parse_file(stream2, context) + return parsed + + +# Formats a ParseContext +def format_context(context): + task = Message(ParseTaskMessageIDs[context.task], []) + if context.token: + file = context.token.location.file + line = context.token.location.line + offset = context.token.location.offset + return Message("ParseContextAt", [task, file, line, offset]) + else: + return Message("ParseContext", [task]) + + +# Formats a ParseErrorException +def format_exception(exception): + has_expected = exception.expected is not None + has_token = exception.token is not None + error = Message(ParseErrorMessageIDs[exception.error], []) + if has_expected: + args = [exception.expected] + else: + args = [error] + if has_token: + file = exception.token.location.file + line = exception.token.location.line + offset = exception.token.location.offset + args = args + [file, line, offset] + ids = [ + ["ParserError", "ParserErrorAt"], + ["ParserErrorExpected", "ParserErrorExpectedAt"], + ] + id = ids[has_expected][has_token] + return Message(id, args) + + +# Formats a ParseErrorException and its contexts +def format_full_error(exception): + formatted = [format_exception(exception)] + context = exception.context + while context is not None: + formatted.append(format_context(context)) + context = context.parent + return formatted diff --git a/src/newlang/parse2/token.py b/src/newlang/parse2/token.py new file mode 100644 index 0000000..cdf9ec9 --- /dev/null +++ b/src/newlang/parse2/token.py @@ -0,0 +1,65 @@ +# SPDX-License-Identifier: LGPL-2.1-only +# Copyright 2022 Jookia + + +# Represents a token +class Token: + def __init__(self, value, location): + self.value = value + self.location = location + + def __repr__(self): + return "Token(value %s, location %s)" % ( # pragma: no mutate + repr(self.value), + repr(self.location), + ) + + def __eq__(self, other): + if other is None: + return False + return self.value == other.value and self.location == other.location + + +# Location of a token +class TokenLocation: + def __init__(self, line, offset, file): + self.line = line + self.offset = offset + self.file = file + + def __repr__(self): + return "TokenLocation(line %i, offset %i, file '%s')" % ( # pragma: no mutate + self.line, + self.offset, + self.file, + ) + + def __eq__(self, other): + if other is None: + return False + return ( + self.line == other.line + and self.offset == other.offset + and self.file == other.file + ) + + +# Represents a stream of consumable tokens +class TokenStream: + def __init__(self, tokens): + self.tokens = tokens + + def __repr__(self): + return "TokenStream(%s)" % (self.tokens) # pragma: no mutate + + def pop(self): + if self.tokens: + return self.tokens.pop(0) + else: + return None + + def peek(self): + if self.tokens: + return self.tokens[0] + else: + return None diff --git a/src/newlang/parse2/tokenize.py b/src/newlang/parse2/tokenize.py new file mode 100644 index 0000000..36f0cad --- /dev/null +++ b/src/newlang/parse2/tokenize.py @@ -0,0 +1,87 @@ +# SPDX-License-Identifier: LGPL-2.1-only +# Copyright 2022 Jookia + +from newlang.parse2.token import Token, TokenLocation + +# Valid space code points +spaces = [ + "\t", # U+0009 HORIZONTAL TAB + " ", # U+0020 SPACE +] + +# Valid new line tokens +newlines = [ + "\n", # U+000A LINE FEED + "\v", # U+000B VERTICAL TAB + "\f", # U+000C FORM FEED + "\r", # U+000D CARRIAGE RETURN + "\r\n", # U+000A U+000D CARRIAGE RETURN then LINE FEED + "\u0085", # U+0085 NEXT LINE + "\u2028", # U+2028 LINE SEPARATOR + "\u2029", # U+2029 PARAGRAPH SEPARATOR +] + + +# Checks whether a symbol is general whitespace +def is_whitespace(symbol): + return symbol in spaces or symbol in newlines + + +# Splits text in to a list of tokens and whitespace +def split_tokens(input): + if input == "": + return [] + tokens = [] + prev = input[0] + buffer = prev + location = TokenLocation(1, 1, "") + for curr in input[1:]: + curr_space = is_whitespace(curr) + prev_space = is_whitespace(prev) + switching = curr_space != prev_space + crlf = prev == "\r" and curr == "\n" + # Flush if we switch between whitespace and non-whitespace code points + # Flush if we're working with a stream of whitespace + # Don't flush if we're in the middle of a CR LF sequence + flush = switching or (curr_space and not crlf) + if flush: + tokens.append(Token(buffer, location)) + buffer = "" + buffer += curr + prev = curr + tokens.append(Token(buffer, location)) + return tokens + + +# Generates a list of tokens with locations +def locate_tokens(tokens, filename): + new_tokens = [] + line = 1 + offset = 1 + for t in tokens: + location = TokenLocation(line, offset, filename) + new = Token(t.value, location) + new_tokens.append(new) + if t.value in newlines: + line = line + 1 + offset = 1 + else: + offset += len(t.value) + return new_tokens + + +# Removes whitespace tokens +def strip_whitespace(tokens): + output = [] + for t in tokens: + if not is_whitespace(t.value): + output.append(t) + return output + + +# Tokenizes source code +def tokenize(source, filename): + split = split_tokens(source) + located = locate_tokens(split, filename) + stripped = strip_whitespace(located) + return stripped diff --git a/src/newlang/port.py b/src/newlang/port.py new file mode 100644 index 0000000..7747999 --- /dev/null +++ b/src/newlang/port.py @@ -0,0 +1,26 @@ +# SPDX-License-Identifier: LGPL-2.1-only +# Copyright 2022 Jookia + +import sys + +if sys.implementation.name in ["micropython", "circuitpython"]: + + def atexit_register_one(function): + sys.atexit(function) + + def atexit_unregister(): + sys.atexit(None) + +else: + import atexit + + registered = None + + def atexit_register_one(function): + global registered + registered = function + atexit.register(function) + + def atexit_unregister(): + global registered + atexit.unregister(registered) diff --git a/src/parse.py b/src/parse.py deleted file mode 100644 index 5e27787..0000000 --- a/src/parse.py +++ /dev/null @@ -1,403 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-only -# Copyright 2021 Jookia - -from src import log -from src import ast_types - - -class ParseLocation: - def __init__(self, line, column, file): - self.line = line - self.column = column - self.file = file - - def __repr__(self): - return "ParseLocation(line %i, column %i, file '%s')" % ( - self.line, - self.column, - self.file, - ) - - def __eq__(self, other): - if other is None: - return False - return ( - self.line == other.line - and self.column == other.column - and self.file == other.file - ) - - -class ParseContext: - def __init__(self, parent, context, location): - self.parent = parent - self.context = context - self.location = location - - def __repr__(self): - return "ParseContext(parent %s, context '%s', location %s)" % ( - self.parent, - self.context, - self.location, - ) - - -class ParseError(BaseException): - def __init__(self, context, error): - self.context = context - self.error = error - - def __str__(self): - return "ParseError(context %s, error '%s')" % (self.context, self.error) - - -class Token: - def __init__(self, type, value, location): - self.type = type - self.value = value - self.location = location - - def __repr__(self): - return "Token(type %s, value '%s', location %s)" % ( - self.type, - self.value, - self.location, - ) - - -class Word: - def __init__(self, value, position, line, column): - self.value = value - self.position = position - self.line = line - self.column = column - - def __repr__(self): - return "Word(value '%s', position %i, line %i, column %i)" % ( - self.value, - self.position, - self.line, - self.column, - ) - - -def is_whitespace(symbol): - return symbol == " " or symbol == "\t" or symbol == "\n" - - -class Tokenizer: - def __init__(self, input, filename): - self.code = input - self.pos = -1 - self.pos_line = 1 - self.pos_column = 0 - self.filename = filename - - def next(self): - if self.pos == len(self.code) - 1: - log.log(log.LEXER, log.TRACE, "Reached end of file") - return None - else: - self.pos += 1 - symbol = self.code[self.pos] - if symbol == "\n": - self.pos_line += 1 - self.pos_column = 0 - else: - self.pos_column += 1 - pos_string = "line %i column %i" % (self.pos_line, self.pos_column) - symbol_string = symbol - if is_whitespace(symbol): - symbol_string = repr(symbol) - log.log( - log.LEXER, - log.TRACE, - "Read character %s at %s" % (symbol_string, pos_string), - ) - return symbol - - def read_word(self): - value = "" - symbol = self.next() - while is_whitespace(symbol): - log.log(log.LEXER, log.TRACE, "Skipping whitespace") - symbol = self.next() - if not symbol: - log.log(log.LEXER, log.TRACE, "No word to read") - return None - pos = self.pos - line = self.pos_line - column = self.pos_column - while symbol and not is_whitespace(symbol): - value += symbol - symbol = self.next() - word = Word(value, pos, line, column) - log.log(log.LEXER, log.TRACE, "Read %s" % (word)) - return word - - def skip_note(self, line, column): - log.log(log.LEXER, log.TRACE, "Skipping words until EndNote") - context = ParseContext( - None, "reading note", ParseLocation(line, column, self.filename) - ) - word = self.read_word() - while word and word.value != "EndNote": - word = self.read_word() - if not word: - raise ParseError(context, "Hit end of file before EndNote") - - def read_text(self, line, column): - log.log(log.LEXER, log.TRACE, "Reading characters until EndText") - context = ParseContext( - None, "reading text", ParseLocation(line, column, self.filename) - ) - start = self.pos - word = self.read_word() - while word and word.value != "EndText": - word = self.read_word() - if not word: - raise ParseError(context, "Hit end of file before EndText") - else: - return self.code[start + 1 : word.position - 1].strip("\n\t ") - - def tokenize(self): - keywords = [ - "Done", - "Set", - "To", - "EndSet", - "If", - "Then", - "Else", - "EndIf", - ] - tokens = [] - word = self.read_word() - while word: - token = word.value - line = word.line - column = word.column - context = ParseContext( - None, "reading word", ParseLocation(line, column, self.filename) - ) - if token == "StartNote": - self.skip_note(line, column) - word = self.read_word() - continue - elif token == "EndNote": - raise ParseError(context, "Found stray EndNote") - elif token == "StartText": - type = "text" - value = self.read_text(line, column) - elif token == "EndText": - raise ParseError(context, "Found stray EndText") - elif token in ["True", "False"]: - type = "bool" - value = token == "True" - elif token in keywords: - type = "keyword" - value = token - else: - type = "symbol" - value = token - tok = Token(type, value, ParseLocation(line, column, self.filename)) - log.log(log.LEXER, log.DEBUG, "Appending %s" % (tok)) - tokens.append(tok) - word = self.read_word() - log.log(log.LEXER, log.TRACE, "Done tokenizing, adding EOF") - tokens.append( - Token( - "EOF", - None, - ParseLocation(self.pos_line, self.pos_column, self.filename), - ) - ) - log.log(log.LEXER, log.DEBUG, "Tokens are %s" % (tokens)) - return tokens - - -class Parser: - def __init__(self, tokens): - self.tokens = tokens - self.pos = 0 - - def next(self): - token = self.tokens[self.pos] - if self.pos < (len(self.tokens) - 1): - self.pos += 1 - log.log(log.PARSER, log.TRACE, "Read %s" % (token)) - return token - - def peek(self): - token = self.tokens[self.pos] - log.log(log.PARSER, log.TRACE, "Peeked %s" % (token)) - return token - - def eof(self): - return self.tokens[self.pos].type == "EOF" - - def create_context(self, context, text): - token = self.tokens[self.pos] - return ParseContext(context, text, token.location) - - def parse_value(self, context, subject, type, value): - log.log(log.PARSER, log.TRACE, "Parsing value...") - if type == "symbol": - ret = ast_types.Reference(value) - elif type == "text": - ret = ast_types.Text(value) - elif type == "bool": - ret = ast_types.Bool(value) - else: - raise ParseError(context, "Unexpected value type %s" % (type)) - log.log(log.PARSER, log.TRACE, "Parsed value, AST is %s" % (ret)) - return ret - - def parse_arguments(self, meta_context, terminator): - log.log(log.PARSER, log.TRACE, "Parsing arguments until '%s'..." % (terminator)) - context = self.create_context(meta_context, "parsing statement arguments") - args = [] - arg_num = 1 - while True: - log.log(log.PARSER, log.TRACE, "Parsing argument %i..." % (arg_num)) - arg_context = self.create_context( - context, "parsing argument %i" % (arg_num) - ) - end_context = self.create_context(context, "parsing terminator") - token = self.next() - arg_num += 1 - if token.type == "keyword": - if token.value == terminator: - log.log( - log.PARSER, log.TRACE, "Parsed arguments, AST is %s" % (args) - ) - return args - else: - raise ParseError( - end_context, "Expected %s, got %s" % (terminator, token.value) - ) - else: - arg = self.parse_value(arg_context, "argument", token.type, token.value) - log.log(log.PARSER, log.TRACE, "Parsed argument %s" % (arg)) - args.append(arg) - - def parse_statement(self, context, terminator, type): - log.log( - log.PARSER, - log.TRACE, - "Parsing %s statement until '%s'..." % (type, terminator), - ) - meta_context = self.create_context(context, "parsing %s statement" % (type)) - log.log(log.PARSER, log.TRACE, "Parsing statement subject...") - context = self.create_context(meta_context, "parsing subject") - token = self.next() - subject = self.parse_value(context, "subject", token.type, token.value) - log.log(log.PARSER, log.TRACE, "Parsing statement verb...") - context = self.create_context(meta_context, "parsing statement verb") - end_context = self.create_context(context, "parsing terminator") - token = self.next() - if token.type == "keyword": - if token.value == terminator: - verb = None - else: - raise ParseError( - end_context, "Expected %s, got %s" % (terminator, token.value) - ) - elif token.type == "symbol": - verb = token.value - else: - raise ParseError(context, "Expected symbol, got %s" % (token.type)) - log.log(log.PARSER, log.TRACE, "Parsing statement arguments...") - if verb: - arguments = self.parse_arguments(meta_context, terminator) - else: - arguments = [] - statement = ast_types.Statement(subject, verb, arguments) - log.log(log.PARSER, log.DEBUG, "Parsed statement, AST is %s" % (statement)) - return statement - - def parse_set(self, context): - log.log(log.PARSER, log.TRACE, "Parsing set directive...") - meta_context = self.create_context(context, "parsing set directive") - self.next() # Skip 'Set' - log.log(log.PARSER, log.TRACE, "Parsing set subject...") - context = self.create_context(meta_context, "parsing subject") - token = self.next() - if token.type != "symbol": - raise ParseError(context, "Expected symbol, got %s" % (token.type)) - subject = token.value - log.log(log.PARSER, log.TRACE, "Parsing set separator...") - context = self.create_context(meta_context, "parsing set separator") - token = self.next() - if token.type != "keyword" or token.value != "To": - pretty_value = token.value - if token.type != "keyword": - pretty_value = "'%s'" % (pretty_value) - raise ParseError(context, "Expected To, got %s" % (pretty_value)) - log.log(log.PARSER, log.TRACE, "Parsing set value...") - ast = self.parse_statement(meta_context, "EndSet", "set value") - set = ast_types.Set(subject, ast) - log.log(log.PARSER, log.DEBUG, "Parsed set, AST is %s" % (set)) - return set - - def parse_if(self, context): - log.log(log.PARSER, log.TRACE, "Parsing if directive...") - context = self.create_context(context, "parsing if directive") - self.next() # Skip 'If' - test = self.parse_statement(context, "Then", "test condition") - log.log(log.PARSER, log.TRACE, "Parsing if success statement...") - success = self.parse_statement(context, "Else", "success") - log.log(log.PARSER, log.TRACE, "Parsing if failure statement...") - failure = self.parse_statement(context, "EndIf", "failure") - conditional = ast_types.Conditional(test, success, failure) - log.log(log.PARSER, log.DEBUG, "Parsed if, AST is %s" % (conditional)) - return conditional - - def parse_directive(self, context): - token = self.peek() - if token.type != "keyword" and token.type != "symbol" and token.type != "bool": - raise ParseError( - context, "Expected keyword, symbol or bool, got %s" % (token.type) - ) - if token.type == "keyword": - if token.value == "Set": - return self.parse_set(context) - elif token.value == "If": - return self.parse_if(context) - else: - raise ParseError(context, "Unexpected keyword %s" % (token.value)) - else: - ast = self.parse_statement(context, "Done", "command") - return ast - - def parse_file(self): - log.log(log.PARSER, log.TRACE, "Parsing file...") - ast = [] - while not self.eof(): - log.log(log.PARSER, log.TRACE, "Parsing next directive in file...") - ast.append(self.parse_directive(None)) - log.log(log.PARSER, log.DEBUG, "Parsed file, AST is %s" % (ast)) - return ast - - -def parse_file(filename): - try: - code = open(filename, encoding="utf-8").read() - except UnicodeError: - print("Parse error: %s is not valid UTF-8" % (filename)) - return None - try: - tokenizer = Tokenizer(code, filename) - tokens = tokenizer.tokenize() - parser = Parser(tokens) - return parser.parse_file() - except ParseError as e: - print("Parse error: %s" % (e.error)) - context = e.context - while context: - line = context.location.line - column = context.location.column - print("While %s at line %i column %i" % (context.context, line, column)) - context = context.parent - print("While parsing file %s" % (filename)) - return None diff --git a/src/parse2/parse.py b/src/parse2/parse.py deleted file mode 100644 index 70f867e..0000000 --- a/src/parse2/parse.py +++ /dev/null @@ -1,400 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-only -# Copyright 2022 Jookia - -from src.i18n import Message -from src.ast_types import Bool, Conditional, Reference, Set, Statement, Text -from src.parse2.token import TokenStream - - -# Words that can't be used as references -# This should include keywords and literals -reserved_names = [ - "Done", - "Set", - "To", - "EndSet", - "If", - "Then", - "Else", - "EndIf", - "StartNote", - "EndNote", - "StartText", - "EndText", - "True", - "False", -] - - -# Tasks that happen during parsing -class ParseTask: - TEST_TASK = 1 # pragma: no mutate - PARSE_NOTE = 2 # pragma: no mutate - CLEAR_NOTES = 3 # pragma: no mutate - PARSE_TEXT = 4 # pragma: no mutate - PARSE_BOOL = 5 # pragma: no mutate - PARSE_REFERENCE = 6 # pragma: no mutate - PARSE_VALUE = 7 # pragma: no mutate - PARSE_STATEMENT = 8 # pragma: no mutate - PARSE_SUBJECT = 9 # pragma: no mutate - PARSE_VERB = 10 # pragma: no mutate - PARSE_ARGUMENT = 11 # pragma: no mutate - PARSE_SET = 12 # pragma: no mutate - PARSE_CONDITIONAL = 13 # pragma: no mutate - PARSE_TEST = 14 # pragma: no mutate - PARSE_SUCCESS = 15 # pragma: no mutate - PARSE_FAILURE = 16 # pragma: no mutate - PARSE_DIRECTIVE = 17 # pragma: no mutate - PARSE_FILE = 18 # pragma: no mutate - MAX = 19 # pragma: no mutate - - # Returns a list of all tasks - def list(): - return list(range(1, ParseTask.MAX)) # pragma: no mutate - - -# Message identifiers for ParseTasks -ParseTaskMessageIDs = { - ParseTask.TEST_TASK: "ParseTaskTestTask", - ParseTask.PARSE_NOTE: "ParseTaskNote", - ParseTask.CLEAR_NOTES: "ParseTaskClearNotes", - ParseTask.PARSE_TEXT: "ParseTaskText", - ParseTask.PARSE_BOOL: "ParseTaskBool", - ParseTask.PARSE_REFERENCE: "ParseTaskReference", - ParseTask.PARSE_VALUE: "ParseTaskValue", - ParseTask.PARSE_STATEMENT: "ParseTaskStatement", - ParseTask.PARSE_SUBJECT: "ParseTaskSubject", - ParseTask.PARSE_VERB: "ParseTaskVerb", - ParseTask.PARSE_ARGUMENT: "ParseTaskArgument", - ParseTask.PARSE_SET: "ParseTaskSet", - ParseTask.PARSE_CONDITIONAL: "ParseTaskConditional", - ParseTask.PARSE_TEST: "ParseTaskTest", - ParseTask.PARSE_SUCCESS: "ParseTaskSuccess", - ParseTask.PARSE_FAILURE: "ParseTaskFailure", - ParseTask.PARSE_DIRECTIVE: "ParseTaskDirective", - ParseTask.PARSE_FILE: "ParseTaskFile", -} - - -# Context used for parse error exception -class ParseContext: - def __init__(self, task, token, parent): - self.task = task - self.token = token - self.parent = parent - - def __repr__(self): - return ( - "ParseContext(task %s, token %s, parent\n %s)" # pragma: no mutate - % ( # pragma: no mutate - self.task, - self.token, - self.parent, - ) - ) - - def __eq__(self, other): - if other is None: - return False - return ( - self.task == other.task - and self.token == other.token - and self.parent == other.parent - ) - - -# Errors that can happen when parsing -class ParseError: - TEST_ERROR = 1 # pragma: no mutate - NO_TOKEN = 2 # pragma: no mutate - WRONG_TOKEN = 3 # pragma: no mutate - FOUND_STARTTEXT = 4 # pragma: no mutate - FOUND_STARTNOTE = 5 # pragma: no mutate - NOT_BOOL = 6 # pragma: no mutate - FOUND_ENDNOTE = 7 # pragma: no mutate - RESERVED_NAME = 8 # pragma: no mutate - FOUND_TERMINATOR = 9 # pragma: no mutate - MAX = 10 # pragma: no mutate - - # Returns a list of all errors - def list(): - return list(range(1, ParseError.MAX)) # pragma: no mutate - - -# Message identifiers for ParseErrors -ParseErrorMessageIDs = { - ParseError.TEST_ERROR: "ParseErrorTestError", - ParseError.NO_TOKEN: "ParseErrorNoToken", - ParseError.WRONG_TOKEN: "ParseErrorWrongToken", - ParseError.FOUND_STARTTEXT: "ParseErrorFoundStartText", - ParseError.FOUND_STARTNOTE: "ParseErrorFoundStartNote", - ParseError.NOT_BOOL: "ParseErrorNotBool", - ParseError.FOUND_ENDNOTE: "ParseErrorFoundEndNote", - ParseError.RESERVED_NAME: "ParseErrorReservedName", - ParseError.FOUND_TERMINATOR: "ParseErrorFoundTerminator", -} - - -# Exception thrown when a parse error is encountered -class ParseErrorException(BaseException): - def __init__(self, error, token, expected, context): - self.error = error - self.token = token - self.expected = expected - self.context = context - - def __repr__(self): - return ( - "ParseErrorException(error %s, token %s, expected %s, context %s)" # pragma: no mutate - % ( # pragma: no mutate - self.error, - self.token, - self.expected, - self.context, - ) - ) - - def __eq__(self, other): - if other is None: - return False - return ( - self.error == other.error - and self.token == other.token - and self.expected == other.expected - and self.context == other.context - ) - - -# Reads a token, possibly of a certain value -def read_token(stream, value, context): - t = stream.pop() - if t is None: - raise ParseErrorException(ParseError.NO_TOKEN, None, None, context) - elif value is not None and t.value != value: - raise ParseErrorException(ParseError.WRONG_TOKEN, t, value, context) - return t - - -# Skip a note -def skip_note(stream, parent_context): - context = ParseContext(ParseTask.PARSE_NOTE, stream.peek(), parent_context) - read_token(stream, "StartNote", context) - while True: - t = read_token(stream, None, context) - # Don't allow StartNote in notes - if t.value in ["StartNote"]: - raise ParseErrorException(ParseError.FOUND_STARTNOTE, t, None, context) - # EndNote found, end things - elif t.value == "EndNote": - break - return None - - -# Clear notes -def clear_notes(stream, parent_context): - context = ParseContext(ParseTask.CLEAR_NOTES, stream.peek(), parent_context) - tokens = [] - token = stream.peek() - while token is not None: - # Found a note, skip it - if token.value == "StartNote": - skip_note(stream, context) - # EndNote found outside note - elif token.value == "EndNote": - raise ParseErrorException(ParseError.FOUND_ENDNOTE, token, None, context) - # Add the token if it's not note related - else: - tokens.append(stream.pop()) - token = stream.peek() - return tokens - - -# The recursive descent parser in a wrapper class for easy testing -class Parser: - # Parses a text node - def parse_text(self, stream, parent_context): - context = ParseContext(ParseTask.PARSE_TEXT, stream.peek(), parent_context) - buffer = "" - t = read_token(stream, "StartText", context) - # Parse following tokens - while True: - t = read_token(stream, None, context) - # Don't allow StartText in text - if t.value in ["StartText"]: - raise ParseErrorException(ParseError.FOUND_STARTTEXT, t, None, context) - # EndText found, end things - elif t.value == "EndText": - break - else: - buffer += t.value + " " - value = buffer[:-1] # Drop trailing space - return Text(value) - - # Parses a boolean node - def parse_bool(self, stream, parent_context): - context = ParseContext(ParseTask.PARSE_BOOL, stream.peek(), parent_context) - t = read_token(stream, None, context) - if t.value == "True": - return Bool(True) - elif t.value == "False": - return Bool(False) - else: - raise ParseErrorException(ParseError.NOT_BOOL, t, None, context) - - # Parses a reference node - def parse_reference(self, stream, parent_context): - context = ParseContext(ParseTask.PARSE_REFERENCE, stream.peek(), parent_context) - t = read_token(stream, None, context) - if t.value in reserved_names: - raise ParseErrorException(ParseError.RESERVED_NAME, t, None, context) - return Reference(t.value) - - # Parses a value - def parse_value(self, stream, parent_context): - context = ParseContext(ParseTask.PARSE_VALUE, stream.peek(), parent_context) - t = stream.peek() - if t is None: - raise ParseErrorException(ParseError.NO_TOKEN, None, None, context) - elif t.value in ["True", "False"]: - return self.parse_bool(stream, context) - elif t.value == "StartText": - return self.parse_text(stream, context) - elif t.value in reserved_names: - raise ParseErrorException(ParseError.RESERVED_NAME, t, None, context) - else: - return self.parse_reference(stream, context) - - # Parses a statement until a specified terminator - def parse_statement(self, stream, parent_context, terminator): - context = ParseContext(ParseTask.PARSE_STATEMENT, stream.peek(), parent_context) - peeked_subject = stream.peek() - context_subject = ParseContext(ParseTask.PARSE_SUBJECT, peeked_subject, context) - if peeked_subject is not None and peeked_subject.value == terminator: - raise ParseErrorException( - ParseError.FOUND_TERMINATOR, peeked_subject, None, context_subject - ) - subject = self.parse_value(stream, context_subject) - context_verb = ParseContext(ParseTask.PARSE_VERB, stream.peek(), context) - verb = read_token(stream, None, context_verb) - if verb.value == terminator: - return Statement(subject, None, []) - elif verb.value in reserved_names: - raise ParseErrorException( - ParseError.RESERVED_NAME, verb, None, context_verb - ) - arguments = [] - # Parse following arguments - while True: - peeked_arg = stream.peek() - context_arg = ParseContext(ParseTask.PARSE_ARGUMENT, peeked_arg, context) - if peeked_arg is not None and peeked_arg.value == terminator: - stream.pop() - return Statement(subject, verb.value, arguments) - arg = self.parse_value(stream, context_arg) # pragma: no mutate - arguments.append(arg) - - # Parses a set node - def parse_set(self, stream, parent_context): - context = ParseContext(ParseTask.PARSE_SET, stream.peek(), parent_context) - read_token(stream, "Set", context) - subcontext = ParseContext(ParseTask.PARSE_SUBJECT, stream.peek(), context) - subject = read_token(stream, None, subcontext) - if subject.value in reserved_names: - raise ParseErrorException( - ParseError.RESERVED_NAME, subject, None, subcontext - ) - read_token(stream, "To", context) - statement = self.parse_statement(stream, context, "EndSet") - return Set(subject.value, statement) - - # Parses a conditional node - def parse_conditional(self, stream, parent_context): - context = ParseContext( - ParseTask.PARSE_CONDITIONAL, stream.peek(), parent_context - ) - read_token(stream, "If", context) - test_context = ParseContext(ParseTask.PARSE_TEST, stream.peek(), context) - test = self.parse_statement(stream, test_context, "Then") - success_context = ParseContext(ParseTask.PARSE_SUCCESS, stream.peek(), context) - success = self.parse_statement(stream, success_context, "Else") - failure_context = ParseContext(ParseTask.PARSE_FAILURE, stream.peek(), context) - failure = self.parse_statement(stream, failure_context, "EndIf") - return Conditional(test, success, failure) - - # Parses a directive - def parse_directive(self, stream, parent_context): - context = ParseContext(ParseTask.PARSE_DIRECTIVE, stream.peek(), parent_context) - t = stream.peek() - if t is None: - raise ParseErrorException(ParseError.NO_TOKEN, None, None, context) - elif t.value == "Set": - return self.parse_set(stream, context) - elif t.value == "If": - return self.parse_conditional(stream, context) - else: - return self.parse_statement(stream, context, "Done") - - -# Parses a file -def parse_file(stream, parent_context): - context = ParseContext(ParseTask.PARSE_FILE, stream.peek(), parent_context) - directives = [] - next = stream.peek() - while next is not None: - dir = Parser().parse_directive(stream, context) # pragma: no mutate - directives.append(dir) - next = stream.peek() - return directives - - -# Parses tokens -def parse(tokens, context): - stream = TokenStream(tokens) - cleared = clear_notes(stream, context) - stream2 = TokenStream(cleared) - parsed = parse_file(stream2, context) - return parsed - - -# Formats a ParseContext -def format_context(context): - task = Message(ParseTaskMessageIDs[context.task], []) - if context.token: - file = context.token.location.file - line = context.token.location.line - offset = context.token.location.offset - return Message("ParseContextAt", [task, file, line, offset]) - else: - return Message("ParseContext", [task]) - - -# Formats a ParseErrorException -def format_exception(exception): - has_expected = exception.expected is not None - has_token = exception.token is not None - error = Message(ParseErrorMessageIDs[exception.error], []) - if has_expected: - args = [exception.expected] - else: - args = [error] - if has_token: - file = exception.token.location.file - line = exception.token.location.line - offset = exception.token.location.offset - args = args + [file, line, offset] - ids = [ - ["ParserError", "ParserErrorAt"], - ["ParserErrorExpected", "ParserErrorExpectedAt"], - ] - id = ids[has_expected][has_token] - return Message(id, args) - - -# Formats a ParseErrorException and its contexts -def format_full_error(exception): - formatted = [format_exception(exception)] - context = exception.context - while context is not None: - formatted.append(format_context(context)) - context = context.parent - return formatted diff --git a/src/parse2/token.py b/src/parse2/token.py deleted file mode 100644 index cdf9ec9..0000000 --- a/src/parse2/token.py +++ /dev/null @@ -1,65 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-only -# Copyright 2022 Jookia - - -# Represents a token -class Token: - def __init__(self, value, location): - self.value = value - self.location = location - - def __repr__(self): - return "Token(value %s, location %s)" % ( # pragma: no mutate - repr(self.value), - repr(self.location), - ) - - def __eq__(self, other): - if other is None: - return False - return self.value == other.value and self.location == other.location - - -# Location of a token -class TokenLocation: - def __init__(self, line, offset, file): - self.line = line - self.offset = offset - self.file = file - - def __repr__(self): - return "TokenLocation(line %i, offset %i, file '%s')" % ( # pragma: no mutate - self.line, - self.offset, - self.file, - ) - - def __eq__(self, other): - if other is None: - return False - return ( - self.line == other.line - and self.offset == other.offset - and self.file == other.file - ) - - -# Represents a stream of consumable tokens -class TokenStream: - def __init__(self, tokens): - self.tokens = tokens - - def __repr__(self): - return "TokenStream(%s)" % (self.tokens) # pragma: no mutate - - def pop(self): - if self.tokens: - return self.tokens.pop(0) - else: - return None - - def peek(self): - if self.tokens: - return self.tokens[0] - else: - return None diff --git a/src/parse2/tokenize.py b/src/parse2/tokenize.py deleted file mode 100644 index 1549701..0000000 --- a/src/parse2/tokenize.py +++ /dev/null @@ -1,87 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-only -# Copyright 2022 Jookia - -from src.parse2.token import Token, TokenLocation - -# Valid space code points -spaces = [ - "\t", # U+0009 HORIZONTAL TAB - " ", # U+0020 SPACE -] - -# Valid new line tokens -newlines = [ - "\n", # U+000A LINE FEED - "\v", # U+000B VERTICAL TAB - "\f", # U+000C FORM FEED - "\r", # U+000D CARRIAGE RETURN - "\r\n", # U+000A U+000D CARRIAGE RETURN then LINE FEED - "\u0085", # U+0085 NEXT LINE - "\u2028", # U+2028 LINE SEPARATOR - "\u2029", # U+2029 PARAGRAPH SEPARATOR -] - - -# Checks whether a symbol is general whitespace -def is_whitespace(symbol): - return symbol in spaces or symbol in newlines - - -# Splits text in to a list of tokens and whitespace -def split_tokens(input): - if input == "": - return [] - tokens = [] - prev = input[0] - buffer = prev - location = TokenLocation(1, 1, "") - for curr in input[1:]: - curr_space = is_whitespace(curr) - prev_space = is_whitespace(prev) - switching = curr_space != prev_space - crlf = prev == "\r" and curr == "\n" - # Flush if we switch between whitespace and non-whitespace code points - # Flush if we're working with a stream of whitespace - # Don't flush if we're in the middle of a CR LF sequence - flush = switching or (curr_space and not crlf) - if flush: - tokens.append(Token(buffer, location)) - buffer = "" - buffer += curr - prev = curr - tokens.append(Token(buffer, location)) - return tokens - - -# Generates a list of tokens with locations -def locate_tokens(tokens, filename): - new_tokens = [] - line = 1 - offset = 1 - for t in tokens: - location = TokenLocation(line, offset, filename) - new = Token(t.value, location) - new_tokens.append(new) - if t.value in newlines: - line = line + 1 - offset = 1 - else: - offset += len(t.value) - return new_tokens - - -# Removes whitespace tokens -def strip_whitespace(tokens): - output = [] - for t in tokens: - if not is_whitespace(t.value): - output.append(t) - return output - - -# Tokenizes source code -def tokenize(source, filename): - split = split_tokens(source) - located = locate_tokens(split, filename) - stripped = strip_whitespace(located) - return stripped diff --git a/src/port.py b/src/port.py deleted file mode 100644 index 7747999..0000000 --- a/src/port.py +++ /dev/null @@ -1,26 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-only -# Copyright 2022 Jookia - -import sys - -if sys.implementation.name in ["micropython", "circuitpython"]: - - def atexit_register_one(function): - sys.atexit(function) - - def atexit_unregister(): - sys.atexit(None) - -else: - import atexit - - registered = None - - def atexit_register_one(function): - global registered - registered = function - atexit.register(function) - - def atexit_unregister(): - global registered - atexit.unregister(registered) diff --git a/tests/__init__.py b/tests/__init__.py index 37db15a..da174bf 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1 +1 @@ -# Dummy file to help pytest find the src module +# Dummy file to help pytest find the tests module diff --git a/tests/parse2/templates.py b/tests/parse2/templates.py index 89dde15..3a8b98b 100644 --- a/tests/parse2/templates.py +++ b/tests/parse2/templates.py @@ -3,8 +3,8 @@ from hypothesis.strategies import composite, integers -from src.parse2.parse import ParseErrorException -from src.parse2.token import TokenStream +from newlang.parse2.parse import ParseErrorException +from newlang.parse2.token import TokenStream from tests.parse2.test_token import static_token_by_value diff --git a/tests/parse2/test_bool.py b/tests/parse2/test_bool.py index 22e32c6..533bd3a 100644 --- a/tests/parse2/test_bool.py +++ b/tests/parse2/test_bool.py @@ -4,8 +4,8 @@ from hypothesis import assume, given from hypothesis.strategies import composite -from src.ast_types import Bool -from src.parse2.parse import ( +from newlang.ast_types import Bool +from newlang.parse2.parse import ( ParseContext, ParseError, ParseErrorException, diff --git a/tests/parse2/test_clear_notes.py b/tests/parse2/test_clear_notes.py index b3c7a4e..e5d11e6 100644 --- a/tests/parse2/test_clear_notes.py +++ b/tests/parse2/test_clear_notes.py @@ -16,14 +16,14 @@ from hypothesis import given from hypothesis.strategies import composite, just, lists, one_of -from src.parse2.parse import ( +from newlang.parse2.parse import ( clear_notes, ParseContext, ParseError, ParseErrorException, ParseTask, ) -from src.parse2.token import TokenStream +from newlang.parse2.token import TokenStream from tests.parse2.templates import template_test_invalid from tests.parse2.test_error import static_parse_context from tests.parse2.test_note import ( diff --git a/tests/parse2/test_conditional.py b/tests/parse2/test_conditional.py index dd43e93..5b351fe 100644 --- a/tests/parse2/test_conditional.py +++ b/tests/parse2/test_conditional.py @@ -39,8 +39,8 @@ from hypothesis import assume, given from hypothesis.strategies import composite, data, integers, just, one_of -from src.ast_types import Conditional -from src.parse2.parse import ( +from newlang.ast_types import Conditional +from newlang.parse2.parse import ( ParseContext, ParseError, ParseErrorException, diff --git a/tests/parse2/test_directive.py b/tests/parse2/test_directive.py index aaec738..e87f3e5 100644 --- a/tests/parse2/test_directive.py +++ b/tests/parse2/test_directive.py @@ -26,7 +26,7 @@ from hypothesis import given from hypothesis.strategies import composite, just, one_of -from src.parse2.parse import ( +from newlang.parse2.parse import ( ParseContext, ParseError, ParseErrorException, @@ -42,7 +42,7 @@ static_token_by_value, ) from tests.parse2.test_error import static_parse_context -from src.ast_types import Bool, Statement +from newlang.ast_types import Bool, Statement # # Helper functions diff --git a/tests/parse2/test_error.py b/tests/parse2/test_error.py index 5266948..86d6646 100644 --- a/tests/parse2/test_error.py +++ b/tests/parse2/test_error.py @@ -27,8 +27,8 @@ from hypothesis import given from hypothesis.strategies import composite, integers, sampled_from, text -from src.i18n import Message -from src.parse2.parse import ( +from newlang.i18n import Message +from newlang.parse2.parse import ( ParseContext, ParseError, ParseErrorException, diff --git a/tests/parse2/test_file.py b/tests/parse2/test_file.py index ccc3e7c..371ce7e 100644 --- a/tests/parse2/test_file.py +++ b/tests/parse2/test_file.py @@ -15,8 +15,8 @@ from hypothesis import given from hypothesis.strategies import composite, just, lists -from src.parse2.token import TokenStream -from src.parse2.parse import ( +from newlang.parse2.token import TokenStream +from newlang.parse2.parse import ( ParseContext, ParseTask, parse_file, diff --git a/tests/parse2/test_note.py b/tests/parse2/test_note.py index c971249..77114e9 100644 --- a/tests/parse2/test_note.py +++ b/tests/parse2/test_note.py @@ -21,7 +21,7 @@ lists, ) -from src.parse2.parse import ( +from newlang.parse2.parse import ( skip_note, ParseContext, ParseError, diff --git a/tests/parse2/test_parse.py b/tests/parse2/test_parse.py index c2d3a59..edb1223 100644 --- a/tests/parse2/test_parse.py +++ b/tests/parse2/test_parse.py @@ -4,13 +4,13 @@ from hypothesis import given from hypothesis.strategies import lists -from src.parse2.parse import ( +from newlang.parse2.parse import ( clear_notes, ParseErrorException, parse, parse_file, ) -from src.parse2.token import TokenStream +from newlang.parse2.token import TokenStream from tests.parse2.test_token import draw_token_random from tests.parse2.test_error import draw_parse_context diff --git a/tests/parse2/test_reference.py b/tests/parse2/test_reference.py index d388da8..148e8ab 100644 --- a/tests/parse2/test_reference.py +++ b/tests/parse2/test_reference.py @@ -4,8 +4,8 @@ from hypothesis import given from hypothesis.strategies import composite -from src.ast_types import Reference -from src.parse2.parse import ( +from newlang.ast_types import Reference +from newlang.parse2.parse import ( ParseContext, ParseError, ParseErrorException, diff --git a/tests/parse2/test_set.py b/tests/parse2/test_set.py index 3a74d4b..9b9531c 100644 --- a/tests/parse2/test_set.py +++ b/tests/parse2/test_set.py @@ -36,8 +36,8 @@ from hypothesis import assume, given from hypothesis.strategies import composite, data, integers -from src.ast_types import Set -from src.parse2.parse import ( +from newlang.ast_types import Set +from newlang.parse2.parse import ( ParseContext, ParseError, ParseErrorException, diff --git a/tests/parse2/test_statement.py b/tests/parse2/test_statement.py index e89621b..45cdf96 100644 --- a/tests/parse2/test_statement.py +++ b/tests/parse2/test_statement.py @@ -6,8 +6,8 @@ from hypothesis import assume, given from hypothesis.strategies import composite, integers, lists -from src.ast_types import Statement -from src.parse2.parse import ( +from newlang.ast_types import Statement +from newlang.parse2.parse import ( ParseContext, ParseError, ParseErrorException, diff --git a/tests/parse2/test_text.py b/tests/parse2/test_text.py index 17ef21a..4648883 100644 --- a/tests/parse2/test_text.py +++ b/tests/parse2/test_text.py @@ -7,8 +7,8 @@ lists, ) -from src.ast_types import Text -from src.parse2.parse import ( +from newlang.ast_types import Text +from newlang.parse2.parse import ( ParseContext, ParseError, ParseErrorException, diff --git a/tests/parse2/test_token.py b/tests/parse2/test_token.py index 3e2de0e..e6f9865 100644 --- a/tests/parse2/test_token.py +++ b/tests/parse2/test_token.py @@ -12,7 +12,7 @@ text, ) -from src.parse2.token import Token, TokenLocation, TokenStream +from newlang.parse2.token import Token, TokenLocation, TokenStream from tests.templates import template_test_structure # Keywords recognized by the language diff --git a/tests/parse2/test_tokenize.py b/tests/parse2/test_tokenize.py index 43b5541..2cb51e3 100644 --- a/tests/parse2/test_tokenize.py +++ b/tests/parse2/test_tokenize.py @@ -13,8 +13,8 @@ text, ) -from src.parse2 import tokenize -from src.parse2.token import Token, TokenLocation +from newlang.parse2 import tokenize +from newlang.parse2.token import Token, TokenLocation from tests.parse2.test_token import static_token_location diff --git a/tests/parse2/test_value.py b/tests/parse2/test_value.py index 2b38651..c4863b0 100644 --- a/tests/parse2/test_value.py +++ b/tests/parse2/test_value.py @@ -6,7 +6,7 @@ from hypothesis import assume, given from hypothesis.strategies import composite, just, one_of -from src.parse2.parse import ( +from newlang.parse2.parse import ( ParseContext, ParseError, ParseErrorException, diff --git a/tests/test_i18n.py b/tests/test_i18n.py index 5356fc0..1949ea3 100644 --- a/tests/test_i18n.py +++ b/tests/test_i18n.py @@ -15,7 +15,7 @@ text, ) -from src.i18n import Message +from newlang.i18n import Message from tests.templates import template_test_structure diff --git a/tests/test_parse.py b/tests/test_parse.py index d6fe1fb..4e70db5 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -14,7 +14,7 @@ just, ) -from src import parse +from newlang import parse # Whitespace that separates lexer words lexer_whitespace = "\n\t " diff --git a/tests/test_parse_regress.py b/tests/test_parse_regress.py index 650907f..b33430f 100644 --- a/tests/test_parse_regress.py +++ b/tests/test_parse_regress.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: LGPL-2.1-only # Copyright 2022 Jookia -from src import parse +from newlang import parse # The parser had some logic along the lines of 'read token until whitespace',