diff --git a/src/parse.py b/src/parse.py index 1af5010..3ab3b36 100644 --- a/src/parse.py +++ b/src/parse.py @@ -378,7 +378,11 @@ def parse_file(filename): - code = open(filename).read() + try: + code = open(filename, encoding="utf-8").read() + except UnicodeDecodeError: + print("Parse error: %s is not valid UTF-8" % (filename)) + return None try: tokenizer = Tokenizer(code, filename) tokens = tokenizer.tokenize() diff --git a/tests/test_parse.py b/tests/test_parse.py index 22e1588..45e149b 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -1,10 +1,10 @@ from hypothesis import given -from hypothesis.strategies import binary +from hypothesis.strategies import text from src import parse -@given(binary(), binary()) +@given(text(), text()) def test_parser_fuzz(code, filename): try: tokenizer = parse.Tokenizer(code, filename)