diff --git a/docs/syntax.md b/docs/syntax.md index c671ab9..dbc451c 100644 --- a/docs/syntax.md +++ b/docs/syntax.md @@ -90,11 +90,11 @@ Syntax in NewLang is formed using alphanumeric tokens separated by whitespace. -Whitespace can be the following characters: +Whitespace can be the following Unicode code points: -- A space -- A tab -- A new line +- U+000A LINE FEED +- U+0009 HORIZONTAL TAB +- U+0020 SPACE For example, the following code snippet: diff --git a/src/tokenize.py b/src/tokenize.py index bb73f63..2365a96 100644 --- a/src/tokenize.py +++ b/src/tokenize.py @@ -19,7 +19,7 @@ return is_space(symbol) or is_newline(symbol) -# Splits text in to a list of characters and whitespace +# Splits text in non-whitespace and whitespace def split_tokens(input): if input == "": return [] @@ -35,7 +35,7 @@ current = c curr_whitespace = c_whitespace elif curr_whitespace: - # Whitespace mode appends each character + # Whitespace mode appends each code point tokens.append(Syntax(current, location, SyntaxType.TOKEN)) current = c else: