Newer
Older
NewLang / parse.py
# SPDX-License-Identifier: MIT
# Copyright 2021 Jookia <contact@jookia.org>

import log

class Token:
    def __init__(self, type, value):
        self.type = type
        self.value = value

    def __repr__(self):
        return "Token(type %s, value '%s')" % (self.type, self.value)

def is_whitespace(symbol):
    return symbol == " " or symbol == "\t" or symbol == "\n"

class Tokenizer:
    def __init__(self, input):
        self.code = input
        self.pos = 0

    def next(self):
        if self.pos >= len(self.code):
            log.log(log.LEXER, log.LEXER, "Reached end of file")
            return None
        else:
            symbol = self.code[self.pos]
            symbol_print = "character '%s'" % (symbol)
            if symbol == '\n':
                symbol_print = "new line"
            log.log(log.LEXER, log.LEXER, "Read %s" % (symbol_print))
            self.pos += 1
            return symbol

    def read_token(self):
        token = ""
        symbol = self.next()
        if not symbol:
            log.log(log.LEXER, log.LEXER, "No token to read")
            return None
        while not is_whitespace(symbol):
            token += symbol
            symbol = self.next()
        log.log(log.LEXER, log.LEXER, "Read token '%s'" % (token))
        return token

    def tokenize(self):
        tokens = []
        text = ""
        mode = "normal" # normal/note/text
        token = self.read_token()
        while token != None:
            if token == "":
                pass
            elif token == "BeginNote":
                log.log(log.LEXER, log.TRACE, "Switching to note mode")
                mode = "note"
            elif token == "EndNote":
                log.log(log.LEXER, log.TRACE, "Ending note mode")
                mode = "normal"
            elif token == "BeginText":
                log.log(log.LEXER, log.TRACE, "Switching to text mode")
                mode = "text"
            elif token == "EndText":
                log.log(log.LEXER, log.TRACE, "Ending text mode")
                content = text[10:-1]
                log.log(log.LEXER, log.DEBUG, "Appending text '%s'" % (content))
                tokens.append(Token("text", content))
                mode = "normal"
                text = ""
            elif token != "":
                if mode == "normal":
                    keywords = ["NewLang", "Done", "Set", "To", "EndSet",
                                "If", "Then", "Else", "EndIf"]
                    if token in keywords:
                        type = "keyword"
                        token = token.lower()
                    else:
                        type = "symbol"
                    tok = Token(type, token)
                    log.log(log.LEXER, log.DEBUG, "Appending %s" % (tok))
                    tokens.append(tok)
                else:
                    log.log(log.LEXER, log.TRACE, "Skipping token '%s'" % (token))
            if mode == "text":
                text += token + " "
            token = self.read_token()
        log.log(log.LEXER, log.TRACE, "Done lexing, adding EOF")
        tokens.append(Token("EOF", None))
        return tokens

class Reference:
    def __init__(self, value):
        self.value = value

    def __repr__(self):
        return "Reference('%s')" % (self.value)

class Text:
    def __init__(self, value):
        self.value = value

    def __repr__(self):
        return "Text('%s')" % (self.value)

class Statement:
    def __init__(self, subject, verb, arguments):
        self.subject = subject
        self.verb = verb
        self.arguments = arguments

    def __repr__(self):
        return "Statement(subject %s, verb '%s', arguments %s)" % (self.subject, self.verb, self.arguments)

class Set:
    def __init__(self, subject, statement):
        self.subject = subject
        self.statement = statement

    def __repr__(self):
        return "Set(subject %s, statement %s)" % (self.subject, self.statement)

class Conditional:
    def __init__(self, test, success, failure):
        self.test = test
        self.success = success
        self.failure = failure

    def __repr__(self):
        return "Conditional(test %s, success %s, failure %s)" % (self.test, self.success, self.failure)

class ParseContext:
    def __init__(self, parent, context):
        self.parent = parent
        self.context = context

    def __repr__(self):
        return "ParseContext(parent %s, context '%s')" % (self.parent, self.context)

class ParseError(BaseException):
    def __init__(self, context, error):
        self.context = context
        self.error = error

    def __repr__(self):
        return "ParseError(context %s, error '%s')" % (self.context, self.error)

class Parser:
    def __init__(self, tokens):
        self.tokens = tokens
        self.pos = 0

    def next(self):
        token = self.tokens[self.pos]
        if self.pos < (len(self.tokens) - 1):
            self.pos += 1
        log.log(log.PARSER, log.TRACE, "Read %s" % (token))
        return token

    def peek(self):
        token = self.tokens[self.pos]
        log.log(log.PARSER, log.TRACE, "Peeked %s" % (token))
        return token

    def parse_version(self, context):
        log.log(log.PARSER, log.TRACE, "Parsing version identifier...")
        context = ParseContext(context, "parsing version identifier")
        token = self.next()
        if token.type != "keyword" or token.value != "newlang":
            raise ParseError(context, "Expected NewLang keyword")
        token = self.next()
        version = token.value
        if version != "0":
            raise ParseError(context, "Invalid version %s" % (version))
        log.log(log.PARSER, log.DEBUG, "Parsed version %s" % (version))
        return version

    def parse_value(self, context, subject, type, value):
        log.log(log.PARSER, log.TRACE, "Parsing value...")
        context = ParseContext(context, "parsing %s value" % (subject))
        if type == "symbol":
            ret = Reference(value)
        elif type == "text":
            ret = Text(value)
        else:
            raise ParseError(context, "Unexpected value type '%s'" % (type))
        log.log(log.PARSER, log.TRACE, "Parsed value, AST is %s" % (ret))
        return ret

    def parse_arguments(self, context, terminator):
        log.log(log.PARSER, log.TRACE, "Parsing arguments until '%s'..." % (terminator))
        context = ParseContext(context, "parsing arguments until '%s'" % (terminator))
        args = []
        while True:
            log.log(log.PARSER, log.TRACE, "Parsing next argument...")
            token = self.next()
            if token.type == "keyword":
                if token.value == terminator:
                    log.log(log.PARSER, log.TRACE, "Parsed arguments, AST is %s" % (args))
                    return args
                else:
                    raise ParseError(context, "Unexpected keyword '%s' in arguments" % (token.value))
            else:
                arg = self.parse_value(context, "argument", token.type, token.value)
                log.log(log.PARSER, log.TRACE, "Parsed argument %s" % (arg))
                args.append(arg)

    def parse_statement(self, context, terminator, type):
        log.log(log.PARSER, log.TRACE, "Parsing %s statement until '%s'..." % (type, terminator))
        context = ParseContext(context, "parsing %s statement" % (type))
        log.log(log.PARSER, log.TRACE, "Parsing statement subject...")
        token = self.next()
        subject = self.parse_value(context, "subject", token.type, token.value)
        log.log(log.PARSER, log.TRACE, "Parsing statement verb...")
        token = self.next()
        if token.type == "keyword":
            if token.value == terminator:
                verb = None
            else:
                raise ParseError(context, "Unexpected keyword '%s' in statement" % (token.value))
        elif token.type == "symbol":
            verb = token.value
        else:
            verb = token.value
        log.log(log.PARSER, log.TRACE, "Parsing statement arguments...")
        if verb:
            arguments = self.parse_arguments(context, terminator)
        else:
            arguments = []
        statement = Statement(subject, verb, arguments)
        log.log(log.PARSER, log.DEBUG, "Parsed statement, AST is %s" % (statement))
        return statement

    def parse_set(self, context):
        log.log(log.PARSER, log.TRACE, "Parsing set subject...")
        meta_context = ParseContext(context, "parsing set directive")
        context = ParseContext(meta_context, "parsing subject")
        token = self.next()
        if token.type != "symbol":
            raise ParseError(context, "Expected symbol, got %s" % (token.type))
        subject = token.value
        token = self.next()
        if token.type != "keyword" or token.value != "to":
            raise ParseError(meta_context, "Expected To, got %s" % (token))
        log.log(log.PARSER, log.TRACE, "Parsing set value...")
        ast = self.parse_statement(meta_context, "endset", "set value")
        set = Set(subject, ast)
        log.log(log.PARSER, log.DEBUG, "Parsed set, AST is %s" % (set))
        return set

    def parse_if(self, context):
        log.log(log.PARSER, log.TRACE, "Parsing if test condition...")
        context = ParseContext(context, "parsing if directive")
        test = self.parse_statement(context, "then", "test condition")
        log.log(log.PARSER, log.TRACE, "Parsing if success statement...")
        success = self.parse_statement(context, "else", "success")
        log.log(log.PARSER, log.TRACE, "Parsing if failure statement...")
        failure = self.parse_statement(context, "endif", "failure")
        conditional = Conditional(test, success, failure)
        log.log(log.PARSER, log.DEBUG, "Parsed if, AST is %s" % (conditional))
        return conditional

    def parse_directive(self, context):
        token = self.peek()
        if token.type != "keyword" and token.type != "symbol":
            raise ParseError(context, "Expected keyword or symbol, got %s" % (token.type))
        if token.type == "keyword":
            self.next()
            if token.value == "set":
                return self.parse_set(context)
            elif token.value == "if":
                return self.parse_if(context)
            else:
                raise ParseError(context, "Unexpected keyword %s" % (token.value))
        else:
            ast = self.parse_statement(context, "done", "command")
            return ast

    def parse_file(self):
        log.log(log.PARSER, log.TRACE, "Parsing file...")
        context = ParseContext(None, "parsing file")
        ast = []
        version = self.parse_version(context)
        while self.peek().type != "EOF":
            log.log(log.PARSER, log.TRACE, "Parsing next directive in file...")
            ast.append(self.parse_directive(context))
        log.log(log.PARSER, log.DEBUG, "Parsed file, AST is %s" % (ast))
        return ast

def parse_file(code):
    tokenizer = Tokenizer(code)
    tokens = tokenizer.tokenize()
    try:
        parser = Parser(tokens)
        return parser.parse_file()
    except ParseError as e:
        print("Parse error: %s" % (e.error))
        context = e.context
        while context:
            print("While %s " % (context.context))
            context = context.parent
        return None