Newer
Older
NewLang / src / tokenize.py
# SPDX-License-Identifier: LGPL-2.1-only
# Copyright 2022 Jookia <contact@jookia.org>

# Checks whether a symbol is whitespace
def is_whitespace(symbol):
    return symbol == " " or symbol == "\t" or symbol == "\n"


# Splits text in to a list of characters and whitespace
def split_symbols(input):
    if input == "":
        return []
    symbols = []
    current = ""
    curr_whitespace = is_whitespace(input[0])
    for c in input:
        c_whitespace = is_whitespace(c)
        if c_whitespace == curr_whitespace:
            current += c
        else:
            symbols.append(current)
            current = c
            curr_whitespace = c_whitespace
    symbols.append(current)
    return symbols