Newer
Older
NewLang / parse.py
#!/usr/bin/env python3
# SPDX-License-Identifier: MIT
# Copyright 2021 Jookia <contact@jookia.org>

parser_tokens = None
parser_pos = 0

def tokenize(code):
    tokens = []
    token = ""
    text = ""
    mode = "normal" # normal/note/text
    for symbol in code:
        if symbol == " " or symbol == "\t" or symbol == "\n":
            if token == "":
                pass
            elif token == "BeginNote":
                mode = "note"
            elif token == "EndNote":
                mode = "normal"
            elif token == "BeginText":
                mode = "text"
            elif token == "EndText":
                tokens.append(("text", text[1:-8]))
                mode = "normal"
                text = ""
            elif token != "":
                if mode == "normal":
                    keywords = ["NewLang", "Done", "Set", "To", "EndSet",
                                "If", "Then", "Else", "EndIf"]
                    if token in keywords:
                        type = "keyword"
                        tokens.append((type, token.lower()))
                    else:
                        type = "symbol"
                        tokens.append((type, token))
            token = ""
        else:
            token += symbol
        if mode == "text":
            text += symbol
    return tokens

def parser_reset(tokens):
    global parser_pos
    global parser_tokens
    parser_tokens = tokens
    parser_pos = 0

def parser_eof():
    global parser_pos
    global parser_tokens
    return parser_pos >= len(parser_tokens)

def parser_next():
    global parser_pos
    global parser_tokens
    if parser_eof():
        print("Reached end of file early")
        sys.exit(1)
    (type, value) = parser_tokens[parser_pos]
    parser_pos += 1
    print("Read %s %s" % (type, value))
    return (type, value)

def parser_peek():
    global parser_pos
    global parser_tokens
    (type, value) = parser_tokens[parser_pos]
    print("Peeked %s %s" % (type, value))
    return (type, value)

def parser_skip():
    global parser_pos
    global parser_tokens
    parser_pos += 1

def parse_version():
    (type, value) = parser_next()
    if type != "keyword" or value != "newlang":
        print("Expected NewLang keyword")
        return None
    (type, value) = parser_next()
    print("Parsed language version %s" % (value))
    return value

def parse_value(type, value):
    if type == "symbol":
        return ('reference', value)
    elif type == "text":
        return ('text', value)
    else:
        print("Unexpected type %s" % (type))
        return None

def parse_arguments(terminator):
    args = []
    while True:
        (type, value) = parser_next()
        if type == "keyword":
            if value == terminator:
                return args
            else:
                print("Unexpected keyword %s" % (value))
                return None
        else:
            arg = parse_value(type, value)
            if not arg:
                print("While parsing argument")
                return None
            args.append(arg)

def parse_statement(terminator):
    (type, value) = parser_next()
    subject = parse_value(type, value)
    if not subject:
        print("While parsing subject")
        return None
    (type, value) = parser_next()
    if type == "keyword":
        if value == terminator:
            verb = None
        else:
            print("Unexpected keyword %s" % (value))
            return None
    elif type == "symbol":
        verb = value
    else:
        verb = value
    if verb:
        arguments = parse_arguments(terminator)
        if arguments is None:
            print("While parsing arguments")
            return None
    else:
        arguments = []
    print("Parsed statement: subject %s verb %s args %s" % (subject, verb, arguments))
    return ('statement', subject, verb, arguments)

def parse_set():
    (type, value) = parser_next()
    if type != "symbol":
        print("Expect symbol, got %s" % (type))
        return None
    subject = value
    (type, value) = parser_next()
    if type != "keyword" or value != "to":
        print("Expected To, got %s %s" % (type, value))
        return None
    print("Parsing set value...")
    ast = parse_statement("endset")
    if not ast:
        print("While parsing statement")
        return None
    print("Parsed set for %s" % (subject))
    return ('set', subject, ast)

def parse_if():
    print("Parsing if test condition...")
    test = parse_statement("then")
    if not test:
        print("While parsing test condition")
        return None
    print("Parsing if success statement...")
    success = parse_statement("else")
    if not success:
        print("While parsing success statement")
        return None
    print("Parsing if failure statement...")
    failure = parse_statement("endif")
    if not failure:
        print("While parsing failure statement")
        return None
    print("Parsed conditional")
    return ('if', test, success, failure)

def parse_directive():
    (type, value) = parser_peek()
    if type != "keyword" and type != "symbol":
        print("Expected keyword or symbol here, got %s" % (type))
        return None
    if type == "keyword":
        parser_skip()
        if value == "set":
            ast = parse_set()
            if not ast:
                print("While parsing set directive")
                return None
            return ast
        elif value == "if":
            ast = parse_if()
            if not ast:
                print("While parsing set directive")
                return None
            return ast
        else:
            print("Unexpected keyword %s" % (value))
            return None
    else:
        ast = parse_statement("done")
        if not ast:
            print("While parsing statement")
            return None
        return ast

def parse_file():
    print("Parsing file...")
    ast = []
    version = parse_version()
    if not version:
        print("While parsing version identifier at start of file")
        return None
    if version != "0":
        print("Invalid version identifier %s" % (version))
        return None
    while not parser_eof():
        directive = parse_directive()
        if directive == None:
            print("While parsing directive in file")
            return None
        else:
            ast.append(directive)
    print("Parsed file")
    return ast