Newer
Older
NewLang / parse.py
#!/usr/bin/env python3
# SPDX-License-Identifier: MIT
# Copyright 2021 Jookia <contact@jookia.org>

def tokenize(code):
    tokens = []
    token = ""
    text = ""
    mode = "normal" # normal/note/text
    for symbol in code:
        if symbol == " " or symbol == "\t" or symbol == "\n":
            if token == "":
                pass
            elif token == "BeginNote":
                mode = "note"
            elif token == "EndNote":
                mode = "normal"
            elif token == "BeginText":
                mode = "text"
            elif token == "EndText":
                tokens.append(("text", text[1:-8]))
                mode = "normal"
                text = ""
            elif token != "":
                if mode == "normal":
                    keywords = ["NewLang", "Done", "Set", "To", "EndSet",
                                "If", "Then", "Else", "EndIf"]
                    if token in keywords:
                        type = "keyword"
                        tokens.append((type, token.lower()))
                    else:
                        type = "symbol"
                        tokens.append((type, token))
            token = ""
        else:
            token += symbol
        if mode == "text":
            text += symbol
    tokens.append(('EOF', None))
    return tokens

class Parser:
    def __init__(self, tokens):
        self.tokens = tokens
        self.pos = 0

    def next(self):
        (type, value) = self.tokens[self.pos]
        if self.pos < (len(self.tokens) - 1):
            self.pos += 1
        print("Read %s %s" % (type, value))
        return (type, value)

    def peek(self):
        (type, value) = self.tokens[self.pos]
        print("Peeked %s %s" % (type, value))
        return (type, value)

    def parse_version(self):
        (type, value) = self.next()
        if type != "keyword" or value != "newlang":
            print("Expected NewLang keyword")
            return None
        (type, value) = self.next()
        print("Parsed language version %s" % (value))
        return value

    def parse_value(self, type, value):
        if type == "symbol":
            return ('reference', value)
        elif type == "text":
            return ('text', value)
        else:
            print("Unexpected type %s" % (type))
            return None

    def parse_arguments(self, terminator):
        args = []
        while True:
            (type, value) = self.next()
            if type == "keyword":
                if value == terminator:
                    return args
                else:
                    print("Unexpected keyword %s" % (value))
                    return None
            else:
                arg = self.parse_value(type, value)
                if not arg:
                    print("While parsing argument")
                    return None
                args.append(arg)

    def parse_statement(self, terminator):
        (type, value) = self.next()
        subject = self.parse_value(type, value)
        if not subject:
            print("While parsing subject")
            return None
        (type, value) = self.next()
        if type == "keyword":
            if value == terminator:
                verb = None
            else:
                print("Unexpected keyword %s" % (value))
                return None
        elif type == "symbol":
            verb = value
        else:
            verb = value
        if verb:
            arguments = self.parse_arguments(terminator)
            if arguments is None:
                print("While parsing arguments")
                return None
        else:
            arguments = []
        print("Parsed statement: subject %s verb %s args %s" % (subject, verb, arguments))
        return ('statement', subject, verb, arguments)

    def parse_set(self):
        (type, value) = self.next()
        if type != "symbol":
            print("Expect symbol, got %s" % (type))
            return None
        subject = value
        (type, value) = self.next()
        if type != "keyword" or value != "to":
            print("Expected To, got %s %s" % (type, value))
            return None
        print("Parsing set value...")
        ast = self.parse_statement("endset")
        if not ast:
            print("While parsing statement")
            return None
        print("Parsed set for %s" % (subject))
        return ('set', subject, ast)

    def parse_if(self):
        print("Parsing if test condition...")
        test = self.parse_statement("then")
        if not test:
            print("While parsing test condition")
            return None
        print("Parsing if success statement...")
        success = self.parse_statement("else")
        if not success:
            print("While parsing success statement")
            return None
        print("Parsing if failure statement...")
        failure = self.parse_statement("endif")
        if not failure:
            print("While parsing failure statement")
            return None
        print("Parsed conditional")
        return ('if', test, success, failure)

    def parse_directive(self):
        (type, value) = self.peek()
        if type != "keyword" and type != "symbol":
            print("Expected keyword or symbol here, got %s" % (type))
            return None
        if type == "keyword":
            self.next()
            if value == "set":
                ast = self.parse_set()
                if not ast:
                    print("While parsing set directive")
                    return None
                return ast
            elif value == "if":
                ast = self.parse_if()
                if not ast:
                    print("While parsing set directive")
                    return None
                return ast
            else:
                print("Unexpected keyword %s" % (value))
                return None
        else:
            ast = self.parse_statement("done")
            if not ast:
                print("While parsing statement")
                return None
            return ast

    def parse_file(self):
        print("Parsing file...")
        ast = []
        version = self.parse_version()
        if not version:
            print("While parsing version identifier at start of file")
            return None
        if version != "0":
            print("Invalid version identifier %s" % (version))
            return None
        while self.peek()[0] != 'EOF':
            directive = self.parse_directive()
            if directive == None:
                print("While parsing directive in file")
                return None
            else:
                ast.append(directive)
        print("Parsed file")
        return ast