from hypothesis import given, assume from hypothesis.strategies import text, booleans, sampled_from, characters, lists from src import parse # Test with no data at all def test_parser_empty(): tokenizer = parse.Tokenizer("", "") tokens = tokenizer.tokenize() assert tokens[0].type == "EOF" assert tokens[0].location.line == 1 assert tokens[0].location.column == 0 assert tokens[0].location.file == "" # General fuzz test, make sure the parser doesn't fall apart and spew # uncontrolled errors. @given(text(), text()) def test_parser_fuzz(code, filename): try: tokenizer = parse.Tokenizer(code, filename) tokens = tokenizer.tokenize() parser = parse.Parser(tokens) parser.parse_file() except parse.ParseError: pass # Quick function to split a string by different characters, used for checking # if generated text includes tokens def split_by(string, characters): tokens = [] curr_token = "" for c in string: if c in characters: if curr_token != "": tokens.append(curr_token) curr_token = "" else: curr_token += c if curr_token != "": tokens.append(curr_token) return tokens # Test that we can make string literals using the BeginText and EndText syntax. @given(text(), sampled_from("\n\t "), sampled_from("\n\t ")) def test_lexer_text(text, space1, space2): text_tokens = split_by(text, "\n\t ") assume("BeginText" not in text_tokens and "EndText" not in text_tokens) code = "BeginText" + space1 + text + space2 + "EndText" tokenizer = parse.Tokenizer(code, "") tokens = tokenizer.tokenize() assert tokens[0].type == "text" assert tokens[0].value == text assert tokens[0].location.line == 1 assert tokens[0].location.column == 1 assert tokens[0].location.file == "" assert tokens[1].type == "EOF" # Test that we can make notes using BeginNote and EndNote syntax. @given(text(), sampled_from("\n\t "), sampled_from("\n\t ")) def test_lexer_note(text, space1, space2): text_tokens = split_by(text, "\n\t ") assume("BeginNote" not in text_tokens and "EndNote" not in text_tokens) code = "BeginNote" + space1 + text + space2 + "EndNote" tokenizer = parse.Tokenizer(code, "") tokens = tokenizer.tokenize() assert tokens[0].type == "EOF" # Test that we can make booleans using True and False @given(booleans()) def test_lexer_boolean(bool): if bool == True: code = "True" else: code = "False" tokenizer = parse.Tokenizer(code, "") tokens = tokenizer.tokenize() assert tokens[0].type == "bool" assert tokens[0].value == bool assert tokens[0].location.line == 1 assert tokens[0].location.column == 1 assert tokens[0].location.file == "" assert tokens[1].type == "EOF" # List of keywords the lexer understands keywords = [ "NewLang", "Done", "Set", "To", "EndSet", "If", "Then", "Else", "EndIf", ] # Test that we can read keywords properly @given(sampled_from(keywords)) def test_lexer_boolean(keyword): code = keyword tokenizer = parse.Tokenizer(code, "") tokens = tokenizer.tokenize() assert tokens[0].type == "keyword" assert tokens[0].value == keyword assert tokens[0].location.line == 1 assert tokens[0].location.column == 1 assert tokens[0].location.file == "" assert tokens[1].type == "EOF" # List of words the lexer understands reserved_words = keywords + ["StartText", "EndText", "StartNote", "EndNote"] # Test that we can make symbols @given(text(alphabet=characters(blacklist_characters="\n\t "), min_size=1)) def test_lexer_symbols(symbol): assume(symbol not in reserved_words) # Reserved words aren't symbols assume(not symbol.startswith("#!")) # Shebangs aren't symbols code = symbol tokenizer = parse.Tokenizer(code, "") tokens = tokenizer.tokenize() assert tokens[0].type == "symbol" assert tokens[0].value == symbol assert tokens[0].location.line == 1 assert tokens[0].location.column == 1 assert tokens[0].location.file == "" assert tokens[1].type == "EOF" # Test that reserved words aren't read without whitespace @given(lists(sampled_from(reserved_words), min_size=2)) def test_lexer_conjoined_words(words): word = "".join(words) tokenizer = parse.Tokenizer(word, "") tokens = tokenizer.tokenize() assert tokens[0].type == "symbol" assert tokens[0].value == word assert tokens[0].location.line == 1 assert tokens[0].location.column == 1 assert tokens[0].location.file == "" assert tokens[1].type == "EOF" # Test that shebangs are skipped @given(text(alphabet=characters(blacklist_characters="\n"))) def test_lexer_shebang(shebang): code = "#!" + shebang + "\n" tokenizer = parse.Tokenizer(code, "") tokens = tokenizer.tokenize() assert tokens[0].type == "EOF" assert tokens[0].location.line == 2 assert tokens[0].location.column == 0 assert tokens[0].location.file == ""