#!/usr/bin/env python3 # SPDX-License-Identifier: MIT # Copyright (c) 2023 John Watts and the LuminaSensum contributors ## AST class ASTNumber(): def __init__(self, number): self.number = number def __repr__(self): return 'ASTNumber(number=%i)' % (self.number) class ASTReference(): def __init__(self, name): self.name = name def __repr__(self): return 'ASTReference(name="%s")' % (self.name) class ASTCall(): def __init__(self, subject, verb, args): self.subject = subject self.verb = verb self.args = args def __repr__(self): return '\n\t\tASTCall(subject=%s, verb="%s", args=%s)' % (self.subject, self.verb, self.args) class ASTSet(): def __init__(self, name, command): self.name = name self.command = command def __repr__(self): return '\n\tASTSet(name="%s", command=%s)' % (self.name, self.command) class ASTReturn(): def __init__(self, value): self.value = value def __repr__(self): return '\n\tASTReturn(value=%s)' % (self.value) class ASTFunction(): def __init__(self, name, statements): self.name = name self.statements = statements def __repr__(self): return 'ASTFunction(name="%s", statements=%s)' % (self.name, self.statements) ## Parser def tokenize(code): line = [] lines = [] tok = "" for c in code.strip(): if c == ' ': line.append(tok) tok = "" elif c == '\n': line.append(tok) lines.append(line) line = [] tok = "" else: tok += c line.append(tok) lines.append(line) return lines def parse_value(val): if val[0].isdigit(): return ASTNumber(int(val)) else: return ASTReference(val) def parse_call(line, offset): cut = line[offset-1:] if len(cut) < 1: print("not a call? line: %s" % (' '.join(line))) return None subject = parse_value(cut[0]) if not subject: print("not a subject? line: %s" % (' '.join(line))) return None verb = None if len(cut) > 1: verb = cut[1] args = [] for arg in cut[2:]: val = parse_value(arg) if not val: print("not a arg value? line: %s" % (' '.join(line))) return None args.append(val) return ASTCall(subject, verb, args) def parse_set(line): if len(line) < 4 or line[0] != "Set" or line[2] != "To": print("not a set? line: %s" % (' '.join(line))) return None call = parse_call(line, 4) if not call: return None return ASTSet(line[1], call) def parse_return(line): if len(line) < 2 or line[0] != "Return": print("not a return? line: %s" % (' '.join(line))) return None value = parse_value(line[1]) if not value: print("not a return value? line: %s" % (' '.join(line))) return None return ASTReturn(value) def parse_statement(lines): line = lines[0] instr = line[0] statement = None if instr == "Set": statement = parse_set(line) elif instr == "Return": statement = parse_return(line) else: print("not a statement? line: %s" % (' '.join(line))) return (statement, lines[1:]) if statement: return (statement, lines[1:]) else: return (None, []) def parse_function(lines): line = lines[0] lines = lines[1:] if len(line) != 2 or line[0] != "Function": print("not a function? line: %s" % (' '.join(line))) return (None, []) name = line[1] statements = [] while len(lines) != 0: first_word = lines[0][0] if first_word == "EndFunction": lines = lines[1:] break (statement, lines) = parse_statement(lines) if statement == None: return (None, []) else: statements.append(statement) func = ASTFunction(name, statements) return (func, lines) def parse_toplevel(lines): ast = [] while len(lines) != 0: (func, lines) = parse_function(lines) if func == None: return None else: ast.append(func) return ast ## TODO: Semantic analysis ## IR class IRNumber(): def __init__(self, number): self.number = number def __repr__(self): return '\n\tIRNumber(number=%i)' % (self.number) class IRAllocate(): def __init__(self, size): self.size = size def __repr__(self): return '\n\tIRAllocate(size=%i)' % (self.size) class IRDrop(): def __init__(self, size): self.size = size def __repr__(self): return '\n\tIRDrop(number=%i)' % (self.size) class IRLoad(): def __init__(self, variable): self.variable = variable def __repr__(self): return '\n\tIRLoad(variable=%s)' % (self.variable) class IRStore(): def __init__(self, variable): self.variable = variable def __repr__(self): return '\n\tIRStore(variable=%s)' % (self.variable) class IRCall(): def __init__(self, name, args): self.name = name self.args = args def __repr__(self): return '\n\tIRCall(name="%s", args=%i)' % (self.name, self.args) class IRReturn(): def __repr__(self): return '\n\tIRReturn()' class IRFunction(): def __init__(self, name, statements): self.name = name self.statements = statements def __repr__(self): return 'IRFunction(name="%s", statements=%s)' % (self.name, self.statements) ## IR Generator def generate_ir_value(value): if isinstance(value, ASTNumber): return [IRNumber(value.number)] elif isinstance(value, ASTReference): return [IRLoad(value.name)] else: print("Unknown value ast node: %s" % (node)) return None def generate_ir_call(ast): final_ir = [] subject_ir = generate_ir_value(ast.subject) if not subject_ir: print("Unknown subject ast node: %s" % (node)) return None if not ast.verb: return subject_ir args_ir = [] for arg in ast.args: arg_ir = generate_ir_value(arg) if not arg_ir: print("Unknown arg ast node: %s" % (arg)) return None args_ir = args_ir + arg_ir args_count = len(ast.args) alloc_ir = [IRAllocate(1)] call_ir = [IRCall(ast.verb, args_count)] drop_ir = [IRDrop(args_count)] final_ir = final_ir + alloc_ir final_ir = final_ir + args_ir final_ir = final_ir + subject_ir final_ir = final_ir + call_ir final_ir = final_ir + drop_ir return final_ir def generate_ir_set(ast): command = ast.command sub_ir = generate_ir_call(command) if not sub_ir: print("Unknown set ast node: %s" % (node)) return None store = IRStore(ast.name) return sub_ir + [store] def generate_ir_return(ast): value_ir = generate_ir_value(ast.value) if not value_ir: print("Unknown return ast node: %s" % (node)) return None store_ret = IRStore("Return") ret = IRReturn() return value_ir + [store_ret, ret] def generate_ir_function(ast): name = ast.name ir = [] for node in ast.statements: sub_ir = None if isinstance(node, ASTSet): sub_ir = generate_ir_set(node) elif isinstance(node, ASTReturn): sub_ir = generate_ir_return(node) if not sub_ir: print("Unknown statement ast node: %s" % (node)) return None ir = ir + sub_ir return IRFunction(name, ir) def generate_ir(ast): ir = [] for node in ast: sub_ir = None if isinstance(node, ASTFunction): sub_ir = generate_ir_function(node) if not sub_ir: print("Unknown ast node: %s" % (node)) return None ir.append(sub_ir) return ir ## Register allocation def find_variables(ir): vars = {'Return': 0} var_count = 0 for node in ir: if not isinstance(node, IRLoad): continue var = node.variable if not vars.get(var): var_count += 1 vars[var] = var_count return vars def replace_variables(ir, variables): new_ir = [] var_count = len(variables) - 1 # Ignore Return new_ir.append(IRAllocate(var_count)) for node in ir: if isinstance(node, IRLoad): reg = variables[node.variable] new_ir.append(IRLoad(reg)) elif isinstance(node, IRStore): reg = variables[node.variable] new_ir.append(IRStore(reg)) continue elif isinstance(node, IRReturn): new_ir.append(IRDrop(var_count)) new_ir.append(IRReturn()) else: new_ir.append(node) return new_ir def registers_allocate_func(ir): registers = find_variables(ir.statements) new_ir = replace_variables(ir.statements, registers) name = ir.name return IRFunction(name, new_ir) def registers_allocate(ir): new_ir = [] for node in ir: sub_ir = None if isinstance(node, IRFunction): sub_ir = registers_allocate_func(node) if not sub_ir: print("Unknown registers node: %s" % (node)) return None new_ir.append(sub_ir) return new_ir ## Bytecode generation def generate_bytecode_function(ir): bytes = b"" for node in ir.statements: if isinstance(node, IRNumber): bytes += b"\x01" # OP_NUM bytes += node.number.to_bytes(4, 'little') elif isinstance(node, IRAllocate): for i in range(0, node.size): bytes += b"\x05" # OP_NULL elif isinstance(node, IRDrop): bytes += b"\x08" # OP_DROP bytes += node.size.to_bytes(1) elif isinstance(node, IRLoad): index = int(node.variable) bytes += b"\x06" # OP_GET bytes += index.to_bytes(1) elif isinstance(node, IRStore): index = int(node.variable) bytes += b"\x07" # OP_SET bytes += index.to_bytes(1) elif isinstance(node, IRCall): bytes += b"\x04" # OP_CALL bytes += (node.args + 1).to_bytes(1) bytes += node.name.encode('utf-8') bytes += b"\x00" # NULL terminator elif isinstance(node, IRReturn): bytes += b"\x03" # OP_RET else: print("Unknown bytecode node: %s" % (node)) return None return bytes def generate_c_file(ir): output = "" for node in ir: sub_bytes = None if isinstance(node, IRFunction): sub_bytes = generate_bytecode_function(node) output += "static const unsigned char\n" output += "bytecode_func_%s [] = {\n" % (node.name) col = 0 for b in sub_bytes: output += "0x%02x, " % (b) col += 1 if col == 10: output += "\n" col = 0 output += "\n};" if sub_bytes is None: print("Unknown bytecode node: %s" % (node)) return None return output ## Tests code = """ Function MakeNumber Set Halfish To 618 Set Double To Halfish Add Halfish Set Final To Double Minus 2 Return Final EndFunction """ lines = tokenize(code) ast = parse_toplevel(lines) ir = generate_ir(ast) ir_reg = registers_allocate(ir) print(ir_reg) bytecode = generate_c_file(ir_reg) print(bytecode)