# SPDX-License-Identifier: MIT # Copyright 2021 Jookia <contact@jookia.org> def tokenizer(code): tokens = [] token = "" text = "" mode = "normal" for symbol in code: if symbol == " " or symbol == "\n": if token == "": pass elif token == "BeginNote": mode = "note" elif token == "EndNote": mode = "normal" elif token == "BeginText": mode = "text" elif token == "EndText": tokens.append(("Text", text[1:-8])) mode = "normal" text = "" elif token != "": if mode == "normal": tokens.append(("Symbol", token)) token = "" else: token += symbol if mode == "text": text += symbol return tokens testcode = open("code.txt").read() tokens = tokenizer(testcode) for t in tokens: print("TOKEN {0}: {1}".format(t[0], t[1]))