# SPDX-License-Identifier: LGPL-2.1-only # Copyright 2022 Jookia <contact@jookia.org> # Checks whether a symbol is whitespace def is_whitespace(symbol): return symbol == " " or symbol == "\t" or symbol == "\n" # Location of a symbol class SymbolLocation: def __init__(self, line, column, file): self.line = line self.column = column self.file = file def __repr__(self): return "SymbolLocation(line %i, column %i, file '%s')" % ( # pragma: no mutate self.line, self.column, self.file, ) def __eq__(self, other): return ( self.line == other.line and self.column == other.column and self.file == other.file ) # Represents a tokenizer symbol class Symbol: def __init__(self, value, location): self.value = value self.location = location def __repr__(self): return "Symbol(value %s, location %s)" % ( # pragma: no mutate repr(self.value), repr(self.location), ) def __eq__(self, other): return self.value == other.value and self.location == other.location # Splits text in to a list of characters and whitespace def split_symbols(input): if input == "": return [] symbols = [] current = input[0] curr_whitespace = is_whitespace(input[0]) location = SymbolLocation(1, 1, "") for c in input[1:]: c_whitespace = is_whitespace(c) if c_whitespace != curr_whitespace: # Flush current buffer and switch modes symbols.append(Symbol(current, location)) current = c curr_whitespace = c_whitespace elif curr_whitespace: # Whitespace mode appends each character symbols.append(Symbol(current, location)) current = c else: # Symbol mode builds the current buffer current += c symbols.append(Symbol(current, location)) return symbols