diff --git a/main.py b/main.py index 288fe39..0b9a702 100755 --- a/main.py +++ b/main.py @@ -2,218 +2,7 @@ # SPDX-License-Identifier: MIT # Copyright 2021 Jookia -def tokenizer(code): - tokens = [] - token = "" - text = "" - mode = "normal" # normal/note/text - for symbol in code: - if symbol == " " or symbol == "\t" or symbol == "\n": - lowered = token.lower() - if lowered == "": - pass - elif lowered == "beginnote": - mode = "note" - elif lowered == "endnote": - mode = "normal" - elif lowered == "begintext": - mode = "text" - elif lowered == "endtext": - tokens.append(("text", text[1:-8])) - mode = "normal" - text = "" - elif token != "": - if mode == "normal": - keywords = ["newlang", "done", "set", "to", "endset", - "if", "then", "else", "endif"] - if lowered in keywords: - type = "keyword" - else: - type = "symbol" - tokens.append((type, lowered)) - token = "" - else: - token += symbol - if mode == "text": - text += symbol - return tokens - -parser_tokens = None -parser_pos = 0 - -def parser_reset(tokens): - global parser_pos - global parser_tokens - parser_tokens = tokens - parser_pos = 0 - -def parser_eof(): - global parser_pos - global parser_tokens - return parser_pos >= len(parser_tokens) - -def parser_next(): - global parser_pos - global parser_tokens - if parser_eof(): - print("Reached end of file early") - sys.exit(1) - (type, value) = parser_tokens[parser_pos] - parser_pos += 1 - print("Read %s %s" % (type, value)) - return (type, value) - -def parser_peek(): - global parser_pos - global parser_tokens - (type, value) = parser_tokens[parser_pos] - print("Peeked %s %s" % (type, value)) - return (type, value) - -def parser_skip(): - global parser_pos - global parser_tokens - parser_pos += 1 - -def parse_version(): - (type, value) = parser_next() - if type != "keyword" or value != "newlang": - print("Expected NewLang keyword") - return None - (type, value) = parser_next() - print("Parsed language version %s" % (value)) - return value - -def parse_subject(): - (type, value) = parser_next() - if type != "symbol": - print("Expected symbol, got %s" % (type)) - return None - return value - -def parse_verb(): - (type, value) = parser_next() - if type != "symbol": - print("Expected symbol, got %s" % (type)) - return None - return value - -def parse_arguments(terminator): - args = [] - while True: - (type, value) = parser_next() - if type == "keyword": - if value == terminator: - return args - else: - print("Unexpected keyword %s" % (value)) - return None - elif type == "text" or type == "symbol": - args.append((type, value)) - else: - print("Unexpected type %s" % (type)) - return None - -def parse_statement(terminator): - subject = parse_subject() - if not subject: - print("While parsing subject") - return None - verb = parse_verb() - if not verb: - print("While parsing verb") - return None - arguments = parse_arguments(terminator) - if arguments is None: - print("While parsing arguments") - return None - print("Parsed statement: subject %s verb %s args %s" % (subject, verb, arguments)) - return ('statement', subject, verb, arguments) - -def parse_set(): - subject = parse_subject() - if not subject: - print("While parsing subject") - return None - (type, value) = parser_next() - if type != "keyword" or value != "to": - print("Expect to, got %s %s" % (type, value)) - return None - print("Parsing set value...") - ast = parse_statement("endset") - if not ast: - print("While parsing statement") - return None - print("Parsed set for %s" % (subject)) - return ('set', subject, ast) - -def parse_if(): - print("Parsing if test condition...") - test = parse_statement("then") - if not test: - print("While parsing test condition") - return None - print("Parsing if success statement...") - success = parse_statement("else") - if not success: - print("While parsing success statement") - return None - print("Parsing if failure statement...") - failure = parse_statement("endif") - if not failure: - print("While parsing failure statement") - return None - print("Parsed conditional") - return ('if', test, success, failure) - -def parse_directive(): - (type, value) = parser_peek() - if type != "keyword" and type != "symbol": - print("Expected keyword or symbol here, got %s" % (type)) - return None - if type == "keyword": - parser_skip() - if value == "set": - ast = parse_set() - if not ast: - print("While parsing set directive") - return None - return ast - elif value == "if": - ast = parse_if() - if not ast: - print("While parsing set directive") - return None - return ast - else: - print("Unexpected keyword %s" % (value)) - return None - else: - ast = parse_statement("done") - if not ast: - print("While parsing statement") - return None - return ast - -def parse_file(): - print("Parsing file...") - ast = [] - version = parse_version() - if not version: - print("While parsing version identifier at start of file") - return None - if version != "0": - print("Invalid version identifier %s" % (version)) - return None - while not parser_eof(): - directive = parse_directive() - if directive == None: - print("While parsing directive in file") - return None - else: - ast.append(directive) - print("Parsed file") - return ast +import parse def do_system_print(env, args): (text_type, text_value) = args[0] @@ -270,9 +59,9 @@ if code[0:2] == '#!': next_line = code.find('\n') + 1 code = code[next_line:] - tokens = tokenizer(code) - parser_reset(tokens) - ast = parse_file() + tokens = parse.tokenizer(code) + parse.parser_reset(tokens) + ast = parse.parse_file() if not ast: return 1 for command in ast: diff --git a/parse.py b/parse.py new file mode 100644 index 0000000..c6e9517 --- /dev/null +++ b/parse.py @@ -0,0 +1,217 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +# Copyright 2021 Jookia + +parser_tokens = None +parser_pos = 0 + +def tokenizer(code): + tokens = [] + token = "" + text = "" + mode = "normal" # normal/note/text + for symbol in code: + if symbol == " " or symbol == "\t" or symbol == "\n": + lowered = token.lower() + if lowered == "": + pass + elif lowered == "beginnote": + mode = "note" + elif lowered == "endnote": + mode = "normal" + elif lowered == "begintext": + mode = "text" + elif lowered == "endtext": + tokens.append(("text", text[1:-8])) + mode = "normal" + text = "" + elif token != "": + if mode == "normal": + keywords = ["newlang", "done", "set", "to", "endset", + "if", "then", "else", "endif"] + if lowered in keywords: + type = "keyword" + else: + type = "symbol" + tokens.append((type, lowered)) + token = "" + else: + token += symbol + if mode == "text": + text += symbol + return tokens + +def parser_reset(tokens): + global parser_pos + global parser_tokens + parser_tokens = tokens + parser_pos = 0 + +def parser_eof(): + global parser_pos + global parser_tokens + return parser_pos >= len(parser_tokens) + +def parser_next(): + global parser_pos + global parser_tokens + if parser_eof(): + print("Reached end of file early") + sys.exit(1) + (type, value) = parser_tokens[parser_pos] + parser_pos += 1 + print("Read %s %s" % (type, value)) + return (type, value) + +def parser_peek(): + global parser_pos + global parser_tokens + (type, value) = parser_tokens[parser_pos] + print("Peeked %s %s" % (type, value)) + return (type, value) + +def parser_skip(): + global parser_pos + global parser_tokens + parser_pos += 1 + +def parse_version(): + (type, value) = parser_next() + if type != "keyword" or value != "newlang": + print("Expected NewLang keyword") + return None + (type, value) = parser_next() + print("Parsed language version %s" % (value)) + return value + +def parse_subject(): + (type, value) = parser_next() + if type != "symbol": + print("Expected symbol, got %s" % (type)) + return None + return value + +def parse_verb(): + (type, value) = parser_next() + if type != "symbol": + print("Expected symbol, got %s" % (type)) + return None + return value + +def parse_arguments(terminator): + args = [] + while True: + (type, value) = parser_next() + if type == "keyword": + if value == terminator: + return args + else: + print("Unexpected keyword %s" % (value)) + return None + elif type == "text" or type == "symbol": + args.append((type, value)) + else: + print("Unexpected type %s" % (type)) + return None + +def parse_statement(terminator): + subject = parse_subject() + if not subject: + print("While parsing subject") + return None + verb = parse_verb() + if not verb: + print("While parsing verb") + return None + arguments = parse_arguments(terminator) + if arguments is None: + print("While parsing arguments") + return None + print("Parsed statement: subject %s verb %s args %s" % (subject, verb, arguments)) + return ('statement', subject, verb, arguments) + +def parse_set(): + subject = parse_subject() + if not subject: + print("While parsing subject") + return None + (type, value) = parser_next() + if type != "keyword" or value != "to": + print("Expect to, got %s %s" % (type, value)) + return None + print("Parsing set value...") + ast = parse_statement("endset") + if not ast: + print("While parsing statement") + return None + print("Parsed set for %s" % (subject)) + return ('set', subject, ast) + +def parse_if(): + print("Parsing if test condition...") + test = parse_statement("then") + if not test: + print("While parsing test condition") + return None + print("Parsing if success statement...") + success = parse_statement("else") + if not success: + print("While parsing success statement") + return None + print("Parsing if failure statement...") + failure = parse_statement("endif") + if not failure: + print("While parsing failure statement") + return None + print("Parsed conditional") + return ('if', test, success, failure) + +def parse_directive(): + (type, value) = parser_peek() + if type != "keyword" and type != "symbol": + print("Expected keyword or symbol here, got %s" % (type)) + return None + if type == "keyword": + parser_skip() + if value == "set": + ast = parse_set() + if not ast: + print("While parsing set directive") + return None + return ast + elif value == "if": + ast = parse_if() + if not ast: + print("While parsing set directive") + return None + return ast + else: + print("Unexpected keyword %s" % (value)) + return None + else: + ast = parse_statement("done") + if not ast: + print("While parsing statement") + return None + return ast + +def parse_file(): + print("Parsing file...") + ast = [] + version = parse_version() + if not version: + print("While parsing version identifier at start of file") + return None + if version != "0": + print("Invalid version identifier %s" % (version)) + return None + while not parser_eof(): + directive = parse_directive() + if directive == None: + print("While parsing directive in file") + return None + else: + ast.append(directive) + print("Parsed file") + return ast +