#!/usr/bin/env python3 # SPDX-License-Identifier: MIT # Copyright 2021 Jookia <contact@jookia.org> def tokenizer(code): tokens = [] token = "" text = "" mode = "normal" # normal/note/text for symbol in code: if symbol == " " or symbol == "\t" or symbol == "\n": lowered = token.lower() if lowered == "": pass elif lowered == "beginnote": mode = "note" elif lowered == "endnote": mode = "normal" elif lowered == "begintext": mode = "text" elif lowered == "endtext": tokens.append(("text", text[1:-8])) mode = "normal" text = "" elif token != "": if mode == "normal": keywords = ["newlang", "done", "set", "to", "endset", "if", "then", "else", "endif"] if lowered in keywords: type = "keyword" else: type = "symbol" tokens.append((type, lowered)) token = "" else: token += symbol if mode == "text": text += symbol return tokens def main(args): if len(args) != 2: print("Usage: main.py FILENAME") return 1 filename = args[1] code = open(filename).read() if code[0:2] == '#!': next_line = code.find('\n') + 1 code = code[next_line:] tokens = tokenizer(code) for t in tokens: print("TOKEN {0}: {1}".format(t[0], t[1])) return 0 if __name__ == "__main__": import sys sys.exit(main(sys.argv))