1 /*******************************************************************************
2
3 Copyright:
4 Copyright (C) 2008 Aaron Craelius & Kris Bell.
5 Some parts copyright (c) 2009-2016 dunnhumby Germany GmbH.
6 All rights reserved.
7
8 License:
9 Tango Dual License: 3-Clause BSD License / Academic Free License v3.0.
10 See LICENSE_TANGO.txt for details.
11
12 Version: Initial release: July 2008
13
14 Authors: Aaron, Kris
15
16 *******************************************************************************/
17
18 module ocean.text.json.JsonParser;
19
20 import ocean.meta.types.Qualifiers;
21 import ocean.core.Exception;
22 import ocean.util.container.more.Stack;
23
24 version (unittest) import ocean.core.Test;
25
26 /*******************************************************************************
27
28 If AllowNaN is true, then NaN, Infinity, and -Infinity are parsed.
29
30 NaN, Infinity, and -Infinity are technically not part of
31 the JSON specification, but Javascript writes it by default, so it is
32 by the far the most common cause of invalid JSON. Practically all
33 JSON parsers (eg, Google GSON, Jackson, Ruby's JSON, simplejson,
34 JSON.net, Lua CJson) ...have an option to accept NaN.
35
36 *******************************************************************************/
37
38 class JsonParser(T, bool AllowNaN = false)
39 {
40 /***************************************************************************
41
42 JSON tokens. The last three are used only if AllowNaN is true
43
44 ***************************************************************************/
45
46 public enum Token
47 {
48 Empty, Name, String, Number, BeginObject, EndObject,
49 BeginArray, EndArray, True, False, Null,
50 NaN, Infinity, NegInfinity
51 }
52
53 private enum State
54 {
55 Object,
56 Array
57 };
58
59 private struct Iterator
60 {
61 const(T)* ptr;
62 const(T)* end;
63 const(T)[] text;
64
65 void reset (const(T)[] text)
66 {
67 this.text = text;
68 this.ptr = text.ptr;
69 this.end = this.ptr + text.length;
70 }
71 }
72
73 protected Iterator str;
74 private Stack!(State, 16) state;
75 private const(T)* curLoc;
76 private ptrdiff_t curLen;
77 private State curState;
78 protected Token curType;
79 protected JsonParserException exception;
80
81 /***************************************************************************
82
83 Construct a parser from a string
84
85 Params:
86 text = Text to initialize this parser to. Can be `null`.
87
88 ***************************************************************************/
89
90 this (const(T)[] text = null)
91 {
92 this.exception = new JsonParserException();
93 this.reset(text);
94 }
95
96
97 /// Returns: `true` if there is a next element, `false` otherwise
98 final bool next ()
99 {
100 if (this.str.ptr is null || this.str.end is null)
101 return false;
102
103 auto p = this.str.ptr;
104 auto e = this.str.end;
105
106 while (*p <= 32 && p < e)
107 ++p;
108
109 if ((this.str.ptr = p) >= e)
110 return false;
111
112 if (this.curState is State.Array)
113 return this.parseArrayValue();
114
115 switch (this.curType)
116 {
117 case Token.Name:
118 return this.parseMemberValue();
119
120 default:
121 break;
122 }
123
124 return this.parseMemberName();
125 }
126
127 /// Returns: The `Token` type of the current token
128 final Token type ()
129 {
130 return this.curType;
131 }
132
133 /// Returns: The current value of the token
134 final const(T)[] value ()
135 {
136 return this.curLoc[0 .. this.curLen];
137 }
138
139 /***************************************************************************
140
141 Reset the parser to a new string
142
143 Params:
144 json = new string to process
145
146 Returns:
147 `true` if the document starts with a '{' or a '['
148
149 ***************************************************************************/
150
151 bool reset (const(T)[] json = null)
152 {
153 this.state.clear();
154 this.str.reset(json);
155 this.curType = Token.Empty;
156 this.curState = State.Object;
157
158 if (json.length)
159 {
160 auto p = this.str.ptr;
161 auto e = this.str.end;
162
163 while (*p <= 32 && p < e)
164 ++p;
165 if (p < e)
166 return this.start(*(this.str.ptr = p));
167 }
168 return false;
169 }
170
171
172 /// Throws: a new exception with "expected `token`" as message
173 protected final void expected (cstring token)
174 {
175 throw this.exception.set("expected ").append(token);
176 }
177
178 /***************************************************************************
179
180 Report error about an expected token not being found
181
182 Params:
183 token = the token that was expected to be found
184 point = Where the token was expected
185
186 Throws:
187 Always end up throwing a new expection
188
189 ***************************************************************************/
190
191 protected final void expected (cstring token, const(T)* point)
192 {
193 auto diff = cast(int) (point - this.str.text.ptr);
194 throw this.exception.set("expected ").append(token).append(" @input[")
195 .append(diff).append("]");
196 }
197
198 /// Throws: A new expection with "unexpected end-of-input: msg" as message
199 private void unexpectedEOF (cstring msg)
200 {
201 throw this.exception.set("unexpected end-of-input: ").append(msg);
202 }
203
204
205 /// Called by `reset`, ensure the document starts with '{' or '['
206 private bool start (T c)
207 {
208 if (c is '{')
209 return this.push(Token.BeginObject, State.Object);
210
211 if (c is '[')
212 return this.push(Token.BeginArray, State.Array);
213
214 this.expected("'{' or '[' at start of document");
215
216 assert(0);
217 }
218
219 ///
220 private bool parseMemberName ()
221 {
222 auto p = this.str.ptr;
223 auto e = this.str.end;
224
225 if (*p is '}')
226 return this.pop(Token.EndObject);
227
228 if (*p is ',')
229 ++p;
230
231 while (*p <= 32)
232 ++p;
233
234 if (*p != '"')
235 {
236 if (*p == '}')
237 this.expected("an attribute-name after (a potentially trailing) ','", p);
238 else
239 this.expected("'\"' before attribute-name", p);
240 }
241
242 this.curLoc = p + 1;
243 this.curType = Token.Name;
244
245 while (++p < e)
246 if (*p is '"' && !this.escaped(p))
247 break;
248
249 if (p < e)
250 this.curLen = p - this.curLoc;
251 else
252 this.unexpectedEOF("in attribute-name");
253
254 this.str.ptr = p + 1;
255 return true;
256 }
257
258 ///
259 private bool parseMemberValue ()
260 {
261 auto p = this.str.ptr;
262
263 if (*p != ':')
264 this.expected("':' before attribute-value", p);
265
266 auto e = this.str.end;
267 while (++p < e && *p <= 32) {}
268
269 return this.parseValue(*(this.str.ptr = p));
270 }
271
272 ///
273 private bool parseValue (T c)
274 {
275 switch (c)
276 {
277 case '{':
278 return this.push(Token.BeginObject, State.Object);
279
280 case '[':
281 return this.push(Token.BeginArray, State.Array);
282
283 case '"':
284 return this.doString();
285
286 case 'n':
287 if (this.match("null", Token.Null))
288 return true;
289 this.expected("'null'", this.str.ptr);
290 assert(false);
291
292 case 't':
293 if (this.match("true", Token.True))
294 return true;
295 this.expected("'true'", this.str.ptr);
296 assert(false);
297
298 case 'f':
299 if (this.match("false", Token.False))
300 return true;
301 this.expected("'false'", this.str.ptr);
302 assert(false);
303
304 static if (AllowNaN)
305 {
306 case 'N':
307 if (this.match("NaN", Token.NaN))
308 return true;
309 this.expected ("'NaN'", this.str.ptr);
310 assert(false);
311
312 case 'I':
313 if (this.match("Infinity", Token.Infinity))
314 return true;
315 this.expected ("'Infinity'", this.str.ptr);
316 assert(false);
317
318 case '-':
319 if (this.match("-Infinity", Token.NegInfinity))
320 return true;
321 break;
322 }
323
324 default:
325 break;
326 }
327
328 return this.parseNumber();
329 }
330
331 ///
332 private bool doString ()
333 {
334 auto p = this.str.ptr;
335 auto e = this.str.end;
336
337 this.curLoc = p+1;
338 this.curType = Token.String;
339
340 while (++p < e)
341 if (*p is '"' && !this.escaped(p))
342 break;
343
344 if (p < e)
345 this.curLen = p - this.curLoc;
346 else
347 this.unexpectedEOF("in string");
348
349 this.str.ptr = p + 1;
350 return true;
351 }
352
353 ///
354 private bool parseNumber ()
355 {
356 auto p = this.str.ptr;
357 auto e = this.str.end;
358 T c = *(this.curLoc = p);
359
360 this.curType = Token.Number;
361
362 if (c is '-' || c is '+')
363 c = *++p;
364
365 while (c >= '0' && c <= '9')
366 c = *++p;
367
368 if (c is '.')
369 do { c = *++p; } while (c >= '0' && c <= '9');
370
371 if (c is 'e' || c is 'E')
372 {
373 c = *++p;
374
375 if (c is '-' || c is '+')
376 c = *++p;
377
378 while (c >= '0' && c <= '9')
379 c = *++p;
380 }
381
382 if (p < e)
383 this.curLen = p - this.curLoc;
384 else
385 this.unexpectedEOF("after number");
386
387 this.str.ptr = p;
388 return this.curLen > 0;
389 }
390
391 ///
392 private bool match (const(T)[] name, Token token)
393 {
394 auto i = name.length;
395 if (this.str.ptr[0 .. i] == name)
396 {
397 this.curLoc = this.str.ptr;
398 this.curType = token;
399 this.str.ptr += i;
400 this.curLen = i;
401 return true;
402 }
403 return false;
404 }
405
406 ///
407 private bool push (Token token, State next)
408 {
409 this.curLen = 0;
410 this.curType = token;
411 this.curLoc = this.str.ptr++;
412 this.state.push(this.curState);
413 this.curState = next;
414 return true;
415 }
416
417 ///
418 private bool pop (Token token)
419 {
420 this.curLen = 0;
421 this.curType = token;
422 this.curLoc = this.str.ptr++;
423 this.curState = this.state.pop;
424 return true;
425 }
426
427 ///
428 private bool parseArrayValue ()
429 {
430 auto p = this.str.ptr;
431 if (*p is ']')
432 return this.pop(Token.EndArray);
433
434 if (*p is ',')
435 ++p;
436
437 auto e = this.str.end;
438 while (p < e && *p <= 32)
439 ++p;
440
441 return this.parseValue(*(this.str.ptr = p));
442 }
443
444 ///
445 private int escaped (const(T)* p)
446 {
447 int i;
448
449 while (*--p is '\\')
450 ++i;
451 return i & 1;
452 }
453 }
454
455 public class JsonParserException : Exception
456 {
457 mixin ReusableExceptionImplementation!() R;
458 }
459
460
461 unittest
462 {
463 static immutable istring json =
464 `{
465 "glossary": {
466 "title": "example glossary",
467 "GlossDiv": {
468 "title": "S",
469 "GlossList": {
470 "GlossEntry": {
471 "ID": "SGML",
472 "SortAs": "SGML",
473 "GlossTerm": "Standard Generalized Markup Language",
474 "Acronym": "SGML",
475 "Abbrev": "ISO 8879:1986",
476 "GlossDef": {
477 "para": "A meta-markup language, used to create markup languages such as DocBook.",
478 "GlossSeeAlso": [
479 "GML",
480 "XML"
481 ]
482 },
483 "GlossSee": "markup",
484 "ANumber": 12345.6e7
485 "BNumber": 12345.6e+7
486 "CNumber": 12345.6e-7
487 "DNumber": 12345.6E7
488 "ENumber": 12345.6E+7
489 "FNumber": 12345.6E-7
490 "True": true
491 "False": false
492 "Null": null
493 }
494 }
495 }
496 }
497 }`;
498
499 auto p = new JsonParser!(char)(json);
500 test(p);
501 test(p.type == p.Token.BeginObject);
502 test(p.next);
503 test(p.type == p.Token.Name);
504 test(p.value == "glossary", p.value);
505 test(p.next);
506 test(p.value == "", p.value);
507 test(p.type == p.Token.BeginObject);
508 test(p.next);
509 test(p.type == p.Token.Name);
510 test(p.value == "title", p.value);
511 test(p.next);
512 test(p.type == p.Token.String);
513 test(p.value == "example glossary", p.value);
514 test(p.next);
515 test(p.type == p.Token.Name);
516 test(p.value == "GlossDiv", p.value);
517 test(p.next);
518 test(p.type == p.Token.BeginObject);
519 test(p.next);
520 test(p.type == p.Token.Name);
521 test(p.value == "title", p.value);
522 test(p.next);
523 test(p.type == p.Token.String);
524 test(p.value == "S", p.value);
525 test(p.next);
526 test(p.type == p.Token.Name);
527 test(p.value == "GlossList", p.value);
528 test(p.next);
529 test(p.type == p.Token.BeginObject);
530 test(p.next);
531 test(p.type == p.Token.Name);
532 test(p.value == "GlossEntry", p.value);
533 test(p.next);
534 test(p.type == p.Token.BeginObject);
535 test(p.next);
536 test(p.type == p.Token.Name);
537 test(p.value == "ID", p.value);
538 test(p.next);
539 test(p.type == p.Token.String);
540 test(p.value == "SGML", p.value);
541 test(p.next);
542 test(p.type == p.Token.Name);
543 test(p.value == "SortAs", p.value);
544 test(p.next);
545 test(p.type == p.Token.String);
546 test(p.value == "SGML", p.value);
547 test(p.next);
548 test(p.type == p.Token.Name);
549 test(p.value == "GlossTerm", p.value);
550 test(p.next);
551 test(p.type == p.Token.String);
552 test(p.value == "Standard Generalized Markup Language", p.value);
553 test(p.next);
554 test(p.type == p.Token.Name);
555 test(p.value == "Acronym", p.value);
556 test(p.next);
557 test(p.type == p.Token.String);
558 test(p.value == "SGML", p.value);
559 test(p.next);
560 test(p.type == p.Token.Name);
561 test(p.value == "Abbrev", p.value);
562 test(p.next);
563 test(p.type == p.Token.String);
564 test(p.value == "ISO 8879:1986", p.value);
565 test(p.next);
566 test(p.type == p.Token.Name);
567 test(p.value == "GlossDef", p.value);
568 test(p.next);
569 test(p.type == p.Token.BeginObject);
570 test(p.next);
571 test(p.type == p.Token.Name);
572 test(p.value == "para", p.value);
573 test(p.next);
574
575 test(p.type == p.Token.String);
576 test(p.value == "A meta-markup language, used to create markup languages such as DocBook.", p.value);
577 test(p.next);
578 test(p.type == p.Token.Name);
579 test(p.value == "GlossSeeAlso", p.value);
580 test(p.next);
581 test(p.type == p.Token.BeginArray);
582 test(p.next);
583 test(p.type == p.Token.String);
584 test(p.value == "GML", p.value);
585 test(p.next);
586 test(p.type == p.Token.String);
587 test(p.value == "XML", p.value);
588 test(p.next);
589 test(p.type == p.Token.EndArray);
590 test(p.next);
591 test(p.type == p.Token.EndObject);
592 test(p.next);
593 test(p.type == p.Token.Name);
594 test(p.value == "GlossSee", p.value);
595 test(p.next);
596 test(p.type == p.Token.String);
597 test(p.value == "markup", p.value);
598 test(p.next);
599 test(p.type == p.Token.Name);
600 test(p.value == "ANumber", p.value);
601 test(p.next);
602 test(p.type == p.Token.Number);
603 test(p.value == "12345.6e7", p.value);
604 test(p.next);
605 test(p.type == p.Token.Name);
606 test(p.value == "BNumber", p.value);
607 test(p.next);
608 test(p.type == p.Token.Number);
609 test(p.value == "12345.6e+7", p.value);
610 test(p.next);
611 test(p.type == p.Token.Name);
612 test(p.value == "CNumber", p.value);
613 test(p.next);
614 test(p.type == p.Token.Number);
615 test(p.value == "12345.6e-7", p.value);
616 test(p.next);
617 test(p.type == p.Token.Name);
618 test(p.value == "DNumber", p.value);
619 test(p.next);
620 test(p.type == p.Token.Number);
621 test(p.value == "12345.6E7", p.value);
622 test(p.next);
623 test(p.type == p.Token.Name);
624 test(p.value == "ENumber", p.value);
625 test(p.next);
626 test(p.type == p.Token.Number);
627 test(p.value == "12345.6E+7", p.value);
628 test(p.next);
629 test(p.type == p.Token.Name);
630 test(p.value == "FNumber", p.value);
631 test(p.next);
632 test(p.type == p.Token.Number);
633 test(p.value == "12345.6E-7", p.value);
634 test(p.next);
635 test(p.type == p.Token.Name);
636 test(p.value == "True", p.value);
637 test(p.next);
638 test(p.type == p.Token.True);
639 test(p.next);
640 test(p.type == p.Token.Name);
641 test(p.value == "False", p.value);
642 test(p.next);
643 test(p.type == p.Token.False);
644 test(p.next);
645 test(p.type == p.Token.Name);
646 test(p.value == "Null", p.value);
647 test(p.next);
648 test(p.type == p.Token.Null);
649 test(p.next);
650 test(p.type == p.Token.EndObject);
651 test(p.next);
652 test(p.type == p.Token.EndObject);
653 test(p.next);
654 test(p.type == p.Token.EndObject);
655 test(p.next);
656 test(p.type == p.Token.EndObject);
657 test(p.next);
658 test(p.type == p.Token.EndObject);
659 test(!p.next);
660
661 test(p.state.size == 0);
662 }