Skip to content

Commit c2c3dc3

Browse files
committed
#72 - support NaN and Infinity parsing in python tokenizer
1 parent 974f8e6 commit c2c3dc3

2 files changed

Lines changed: 82 additions & 1 deletion

File tree

src/json_stream/tests/test_tokenizer.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
66
Copyright (c) 2019 Daniel Yule
77
"""
8+
import math
89
import re
910
from io import StringIO
1011
from unittest import TestCase
@@ -59,6 +60,22 @@ def test_number_parsing(self):
5960
self.assertRaises(ValueError, self.tokenize_sequence, "3.6ea")
6061
self.assertRaises(ValueError, self.tokenize_sequence, "67.8e+a")
6162

63+
def test_nan_infinity_parsing(self):
64+
token_list = self.tokenize_sequence("NaN")
65+
self.assertEqual(1, len(token_list))
66+
ttype, token = token_list[0]
67+
self.assertTrue(math.isnan(token))
68+
self.assertEqual(ttype, TokenType.NUMBER)
69+
70+
self.assertNumberEquals(float("inf"), "Infinity")
71+
self.assertNumberEquals(float("-inf"), "-Infinity")
72+
73+
self.assertRaises(ValueError, self.tokenize_sequence, "Na")
74+
self.assertRaises(ValueError, self.tokenize_sequence, "Nax")
75+
self.assertRaises(ValueError, self.tokenize_sequence, "In")
76+
self.assertRaises(ValueError, self.tokenize_sequence, "Infinit")
77+
self.assertRaises(ValueError, self.tokenize_sequence, "-In")
78+
6279
def test_operator_parsing(self):
6380
self.assertOperatorEquals("{", "{")
6481
self.assertOperatorEquals("}", "}")

src/json_stream/tokenizer.py

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,15 @@ class State:
4545
UNICODE_SURROGATE_START = 23
4646
UNICODE_SURROGATE_STRING_ESCAPE = 24
4747
UNICODE_SURROGATE = 25
48+
NAN_1 = 26
49+
NAN_2 = 27
50+
INF_1 = 28
51+
INF_2 = 29
52+
INF_3 = 30
53+
INF_4 = 31
54+
INF_5 = 32
55+
INF_6 = 33
56+
INF_7 = 34
4857

4958

5059
class SpecialChar:
@@ -130,6 +139,10 @@ def process_char(char):
130139
next_state = State.TRUE_1
131140
elif char == "n":
132141
next_state = State.NULL_1
142+
elif char == "N":
143+
next_state = State.NAN_1
144+
elif char == "I":
145+
next_state = State.INF_1
133146
elif not char.isspace() and not char == SpecialChar.EOF:
134147
raise ValueError("Invalid JSON character: '{0}'".format(char))
135148
elif state == State.INTEGER:
@@ -169,8 +182,10 @@ def process_char(char):
169182
elif char in "123456789":
170183
next_state = State.INTEGER
171184
add_char = True
185+
elif char == "I":
186+
next_state = State.INF_1
172187
else:
173-
raise ValueError("A - must be followed by a digit. Got '{0}'".format(char))
188+
raise ValueError("A - must be followed by a digit or Infinity. Got '{0}'".format(char))
174189
elif state == State.INTEGER_EXP_0:
175190
if char == "+" or char == "-" or char in "0123456789":
176191
next_state = State.INTEGER_EXP
@@ -262,6 +277,55 @@ def process_char(char):
262277
now_token = (TokenType.NULL, None)
263278
else:
264279
raise ValueError("Invalid JSON character: '{0}'".format(char))
280+
elif state == State.NAN_1:
281+
if char == "a":
282+
next_state = State.NAN_2
283+
else:
284+
raise ValueError("Invalid JSON character: '{0}'".format(char))
285+
elif state == State.NAN_2:
286+
if char == "N":
287+
next_state = State.WHITESPACE
288+
completed = True
289+
now_token = (TokenType.NUMBER, float("NaN"))
290+
else:
291+
raise ValueError("Invalid JSON character: '{0}'".format(char))
292+
elif state == State.INF_1:
293+
if char == "n":
294+
next_state = State.INF_2
295+
else:
296+
raise ValueError("Invalid JSON character: '{0}'".format(char))
297+
elif state == State.INF_2:
298+
if char == "f":
299+
next_state = State.INF_3
300+
else:
301+
raise ValueError("Invalid JSON character: '{0}'".format(char))
302+
elif state == State.INF_3:
303+
if char == "i":
304+
next_state = State.INF_4
305+
else:
306+
raise ValueError("Invalid JSON character: '{0}'".format(char))
307+
elif state == State.INF_4:
308+
if char == "n":
309+
next_state = State.INF_5
310+
else:
311+
raise ValueError("Invalid JSON character: '{0}'".format(char))
312+
elif state == State.INF_5:
313+
if char == "i":
314+
next_state = State.INF_6
315+
else:
316+
raise ValueError("Invalid JSON character: '{0}'".format(char))
317+
elif state == State.INF_6:
318+
if char == "t":
319+
next_state = State.INF_7
320+
else:
321+
raise ValueError("Invalid JSON character: '{0}'".format(char))
322+
elif state == State.INF_7:
323+
if char == "y":
324+
next_state = State.WHITESPACE
325+
completed = True
326+
now_token = (TokenType.NUMBER, float("".join(token) + "Infinity"))
327+
else:
328+
raise ValueError("Invalid JSON character: '{0}'".format(char))
265329
elif state == State.STRING:
266330
if char == "\"":
267331
completed = True

0 commit comments

Comments
 (0)