1 /******************************************************************************* 2 3 Copyright: 4 Copyright (C) 2008 Aaron Craelius & Kris Bell. 5 Some parts copyright (c) 2009-2016 dunnhumby Germany GmbH. 6 All rights reserved. 7 8 License: 9 Tango Dual License: 3-Clause BSD License / Academic Free License v3.0. 10 See LICENSE_TANGO.txt for details. 11 12 Version: Initial release: July 2008 13 14 Authors: Aaron, Kris 15 16 *******************************************************************************/ 17 18 module ocean.text.json.JsonParser; 19 20 import ocean.meta.types.Qualifiers; 21 import ocean.core.Exception; 22 import ocean.util.container.more.Stack; 23 24 version (unittest) import ocean.core.Test; 25 26 /******************************************************************************* 27 28 If AllowNaN is true, then NaN, Infinity, and -Infinity are parsed. 29 30 NaN, Infinity, and -Infinity are technically not part of 31 the JSON specification, but Javascript writes it by default, so it is 32 by the far the most common cause of invalid JSON. Practically all 33 JSON parsers (eg, Google GSON, Jackson, Ruby's JSON, simplejson, 34 JSON.net, Lua CJson) ...have an option to accept NaN. 35 36 *******************************************************************************/ 37 38 class JsonParser(T, bool AllowNaN = false) 39 { 40 /*************************************************************************** 41 42 JSON tokens. The last three are used only if AllowNaN is true 43 44 ***************************************************************************/ 45 46 public enum Token 47 { 48 Empty, Name, String, Number, BeginObject, EndObject, 49 BeginArray, EndArray, True, False, Null, 50 NaN, Infinity, NegInfinity 51 } 52 53 private enum State 54 { 55 Object, 56 Array 57 }; 58 59 private struct Iterator 60 { 61 const(T)* ptr; 62 const(T)* end; 63 const(T)[] text; 64 65 void reset (const(T)[] text) 66 { 67 this.text = text; 68 this.ptr = text.ptr; 69 this.end = this.ptr + text.length; 70 } 71 } 72 73 protected Iterator str; 74 private Stack!(State, 16) state; 75 private const(T)* curLoc; 76 private ptrdiff_t curLen; 77 private State curState; 78 protected Token curType; 79 protected JsonParserException exception; 80 81 /*************************************************************************** 82 83 Construct a parser from a string 84 85 Params: 86 text = Text to initialize this parser to. Can be `null`. 87 88 ***************************************************************************/ 89 90 this (const(T)[] text = null) 91 { 92 this.exception = new JsonParserException(); 93 this.reset(text); 94 } 95 96 97 /// Returns: `true` if there is a next element, `false` otherwise 98 final bool next () 99 { 100 if (this.str.ptr is null || this.str.end is null) 101 return false; 102 103 auto p = this.str.ptr; 104 auto e = this.str.end; 105 106 while (*p <= 32 && p < e) 107 ++p; 108 109 if ((this.str.ptr = p) >= e) 110 return false; 111 112 if (this.curState is State.Array) 113 return this.parseArrayValue(); 114 115 switch (this.curType) 116 { 117 case Token.Name: 118 return this.parseMemberValue(); 119 120 default: 121 break; 122 } 123 124 return this.parseMemberName(); 125 } 126 127 /// Returns: The `Token` type of the current token 128 final Token type () 129 { 130 return this.curType; 131 } 132 133 /// Returns: The current value of the token 134 final const(T)[] value () 135 { 136 return this.curLoc[0 .. this.curLen]; 137 } 138 139 /*************************************************************************** 140 141 Reset the parser to a new string 142 143 Params: 144 json = new string to process 145 146 Returns: 147 `true` if the document starts with a '{' or a '[' 148 149 ***************************************************************************/ 150 151 bool reset (const(T)[] json = null) 152 { 153 this.state.clear(); 154 this.str.reset(json); 155 this.curType = Token.Empty; 156 this.curState = State.Object; 157 158 if (json.length) 159 { 160 auto p = this.str.ptr; 161 auto e = this.str.end; 162 163 while (*p <= 32 && p < e) 164 ++p; 165 if (p < e) 166 return this.start(*(this.str.ptr = p)); 167 } 168 return false; 169 } 170 171 172 /// Throws: a new exception with "expected `token`" as message 173 protected final void expected (cstring token) 174 { 175 throw this.exception.set("expected ").append(token); 176 } 177 178 /*************************************************************************** 179 180 Report error about an expected token not being found 181 182 Params: 183 token = the token that was expected to be found 184 point = Where the token was expected 185 186 Throws: 187 Always end up throwing a new expection 188 189 ***************************************************************************/ 190 191 protected final void expected (cstring token, const(T)* point) 192 { 193 auto diff = cast(int) (point - this.str.text.ptr); 194 throw this.exception.set("expected ").append(token).append(" @input[") 195 .append(diff).append("]"); 196 } 197 198 /// Throws: A new expection with "unexpected end-of-input: msg" as message 199 private void unexpectedEOF (cstring msg) 200 { 201 throw this.exception.set("unexpected end-of-input: ").append(msg); 202 } 203 204 205 /// Called by `reset`, ensure the document starts with '{' or '[' 206 private bool start (T c) 207 { 208 if (c is '{') 209 return this.push(Token.BeginObject, State.Object); 210 211 if (c is '[') 212 return this.push(Token.BeginArray, State.Array); 213 214 this.expected("'{' or '[' at start of document"); 215 216 assert(0); 217 } 218 219 /// 220 private bool parseMemberName () 221 { 222 auto p = this.str.ptr; 223 auto e = this.str.end; 224 225 if (*p is '}') 226 return this.pop(Token.EndObject); 227 228 if (*p is ',') 229 ++p; 230 231 while (*p <= 32) 232 ++p; 233 234 if (*p != '"') 235 { 236 if (*p == '}') 237 this.expected("an attribute-name after (a potentially trailing) ','", p); 238 else 239 this.expected("'\"' before attribute-name", p); 240 } 241 242 this.curLoc = p + 1; 243 this.curType = Token.Name; 244 245 while (++p < e) 246 if (*p is '"' && !this.escaped(p)) 247 break; 248 249 if (p < e) 250 this.curLen = p - this.curLoc; 251 else 252 this.unexpectedEOF("in attribute-name"); 253 254 this.str.ptr = p + 1; 255 return true; 256 } 257 258 /// 259 private bool parseMemberValue () 260 { 261 auto p = this.str.ptr; 262 263 if (*p != ':') 264 this.expected("':' before attribute-value", p); 265 266 auto e = this.str.end; 267 while (++p < e && *p <= 32) {} 268 269 return this.parseValue(*(this.str.ptr = p)); 270 } 271 272 /// 273 private bool parseValue (T c) 274 { 275 switch (c) 276 { 277 case '{': 278 return this.push(Token.BeginObject, State.Object); 279 280 case '[': 281 return this.push(Token.BeginArray, State.Array); 282 283 case '"': 284 return this.doString(); 285 286 case 'n': 287 if (this.match("null", Token.Null)) 288 return true; 289 this.expected("'null'", this.str.ptr); 290 assert(false); 291 292 case 't': 293 if (this.match("true", Token.True)) 294 return true; 295 this.expected("'true'", this.str.ptr); 296 assert(false); 297 298 case 'f': 299 if (this.match("false", Token.False)) 300 return true; 301 this.expected("'false'", this.str.ptr); 302 assert(false); 303 304 static if (AllowNaN) 305 { 306 case 'N': 307 if (this.match("NaN", Token.NaN)) 308 return true; 309 this.expected ("'NaN'", this.str.ptr); 310 assert(false); 311 312 case 'I': 313 if (this.match("Infinity", Token.Infinity)) 314 return true; 315 this.expected ("'Infinity'", this.str.ptr); 316 assert(false); 317 318 case '-': 319 if (this.match("-Infinity", Token.NegInfinity)) 320 return true; 321 break; 322 } 323 324 default: 325 break; 326 } 327 328 return this.parseNumber(); 329 } 330 331 /// 332 private bool doString () 333 { 334 auto p = this.str.ptr; 335 auto e = this.str.end; 336 337 this.curLoc = p+1; 338 this.curType = Token.String; 339 340 while (++p < e) 341 if (*p is '"' && !this.escaped(p)) 342 break; 343 344 if (p < e) 345 this.curLen = p - this.curLoc; 346 else 347 this.unexpectedEOF("in string"); 348 349 this.str.ptr = p + 1; 350 return true; 351 } 352 353 /// 354 private bool parseNumber () 355 { 356 auto p = this.str.ptr; 357 auto e = this.str.end; 358 T c = *(this.curLoc = p); 359 360 this.curType = Token.Number; 361 362 if (c is '-' || c is '+') 363 c = *++p; 364 365 while (c >= '0' && c <= '9') 366 c = *++p; 367 368 if (c is '.') 369 do { c = *++p; } while (c >= '0' && c <= '9'); 370 371 if (c is 'e' || c is 'E') 372 { 373 c = *++p; 374 375 if (c is '-' || c is '+') 376 c = *++p; 377 378 while (c >= '0' && c <= '9') 379 c = *++p; 380 } 381 382 if (p < e) 383 this.curLen = p - this.curLoc; 384 else 385 this.unexpectedEOF("after number"); 386 387 this.str.ptr = p; 388 return this.curLen > 0; 389 } 390 391 /// 392 private bool match (const(T)[] name, Token token) 393 { 394 auto i = name.length; 395 if (this.str.ptr[0 .. i] == name) 396 { 397 this.curLoc = this.str.ptr; 398 this.curType = token; 399 this.str.ptr += i; 400 this.curLen = i; 401 return true; 402 } 403 return false; 404 } 405 406 /// 407 private bool push (Token token, State next) 408 { 409 this.curLen = 0; 410 this.curType = token; 411 this.curLoc = this.str.ptr++; 412 this.state.push(this.curState); 413 this.curState = next; 414 return true; 415 } 416 417 /// 418 private bool pop (Token token) 419 { 420 this.curLen = 0; 421 this.curType = token; 422 this.curLoc = this.str.ptr++; 423 this.curState = this.state.pop; 424 return true; 425 } 426 427 /// 428 private bool parseArrayValue () 429 { 430 auto p = this.str.ptr; 431 if (*p is ']') 432 return this.pop(Token.EndArray); 433 434 if (*p is ',') 435 ++p; 436 437 auto e = this.str.end; 438 while (p < e && *p <= 32) 439 ++p; 440 441 return this.parseValue(*(this.str.ptr = p)); 442 } 443 444 /// 445 private int escaped (const(T)* p) 446 { 447 int i; 448 449 while (*--p is '\\') 450 ++i; 451 return i & 1; 452 } 453 } 454 455 public class JsonParserException : Exception 456 { 457 mixin ReusableExceptionImplementation!() R; 458 } 459 460 461 unittest 462 { 463 static immutable istring json = 464 `{ 465 "glossary": { 466 "title": "example glossary", 467 "GlossDiv": { 468 "title": "S", 469 "GlossList": { 470 "GlossEntry": { 471 "ID": "SGML", 472 "SortAs": "SGML", 473 "GlossTerm": "Standard Generalized Markup Language", 474 "Acronym": "SGML", 475 "Abbrev": "ISO 8879:1986", 476 "GlossDef": { 477 "para": "A meta-markup language, used to create markup languages such as DocBook.", 478 "GlossSeeAlso": [ 479 "GML", 480 "XML" 481 ] 482 }, 483 "GlossSee": "markup", 484 "ANumber": 12345.6e7 485 "BNumber": 12345.6e+7 486 "CNumber": 12345.6e-7 487 "DNumber": 12345.6E7 488 "ENumber": 12345.6E+7 489 "FNumber": 12345.6E-7 490 "True": true 491 "False": false 492 "Null": null 493 } 494 } 495 } 496 } 497 }`; 498 499 auto p = new JsonParser!(char)(json); 500 test(p); 501 test(p.type == p.Token.BeginObject); 502 test(p.next); 503 test(p.type == p.Token.Name); 504 test(p.value == "glossary", p.value); 505 test(p.next); 506 test(p.value == "", p.value); 507 test(p.type == p.Token.BeginObject); 508 test(p.next); 509 test(p.type == p.Token.Name); 510 test(p.value == "title", p.value); 511 test(p.next); 512 test(p.type == p.Token.String); 513 test(p.value == "example glossary", p.value); 514 test(p.next); 515 test(p.type == p.Token.Name); 516 test(p.value == "GlossDiv", p.value); 517 test(p.next); 518 test(p.type == p.Token.BeginObject); 519 test(p.next); 520 test(p.type == p.Token.Name); 521 test(p.value == "title", p.value); 522 test(p.next); 523 test(p.type == p.Token.String); 524 test(p.value == "S", p.value); 525 test(p.next); 526 test(p.type == p.Token.Name); 527 test(p.value == "GlossList", p.value); 528 test(p.next); 529 test(p.type == p.Token.BeginObject); 530 test(p.next); 531 test(p.type == p.Token.Name); 532 test(p.value == "GlossEntry", p.value); 533 test(p.next); 534 test(p.type == p.Token.BeginObject); 535 test(p.next); 536 test(p.type == p.Token.Name); 537 test(p.value == "ID", p.value); 538 test(p.next); 539 test(p.type == p.Token.String); 540 test(p.value == "SGML", p.value); 541 test(p.next); 542 test(p.type == p.Token.Name); 543 test(p.value == "SortAs", p.value); 544 test(p.next); 545 test(p.type == p.Token.String); 546 test(p.value == "SGML", p.value); 547 test(p.next); 548 test(p.type == p.Token.Name); 549 test(p.value == "GlossTerm", p.value); 550 test(p.next); 551 test(p.type == p.Token.String); 552 test(p.value == "Standard Generalized Markup Language", p.value); 553 test(p.next); 554 test(p.type == p.Token.Name); 555 test(p.value == "Acronym", p.value); 556 test(p.next); 557 test(p.type == p.Token.String); 558 test(p.value == "SGML", p.value); 559 test(p.next); 560 test(p.type == p.Token.Name); 561 test(p.value == "Abbrev", p.value); 562 test(p.next); 563 test(p.type == p.Token.String); 564 test(p.value == "ISO 8879:1986", p.value); 565 test(p.next); 566 test(p.type == p.Token.Name); 567 test(p.value == "GlossDef", p.value); 568 test(p.next); 569 test(p.type == p.Token.BeginObject); 570 test(p.next); 571 test(p.type == p.Token.Name); 572 test(p.value == "para", p.value); 573 test(p.next); 574 575 test(p.type == p.Token.String); 576 test(p.value == "A meta-markup language, used to create markup languages such as DocBook.", p.value); 577 test(p.next); 578 test(p.type == p.Token.Name); 579 test(p.value == "GlossSeeAlso", p.value); 580 test(p.next); 581 test(p.type == p.Token.BeginArray); 582 test(p.next); 583 test(p.type == p.Token.String); 584 test(p.value == "GML", p.value); 585 test(p.next); 586 test(p.type == p.Token.String); 587 test(p.value == "XML", p.value); 588 test(p.next); 589 test(p.type == p.Token.EndArray); 590 test(p.next); 591 test(p.type == p.Token.EndObject); 592 test(p.next); 593 test(p.type == p.Token.Name); 594 test(p.value == "GlossSee", p.value); 595 test(p.next); 596 test(p.type == p.Token.String); 597 test(p.value == "markup", p.value); 598 test(p.next); 599 test(p.type == p.Token.Name); 600 test(p.value == "ANumber", p.value); 601 test(p.next); 602 test(p.type == p.Token.Number); 603 test(p.value == "12345.6e7", p.value); 604 test(p.next); 605 test(p.type == p.Token.Name); 606 test(p.value == "BNumber", p.value); 607 test(p.next); 608 test(p.type == p.Token.Number); 609 test(p.value == "12345.6e+7", p.value); 610 test(p.next); 611 test(p.type == p.Token.Name); 612 test(p.value == "CNumber", p.value); 613 test(p.next); 614 test(p.type == p.Token.Number); 615 test(p.value == "12345.6e-7", p.value); 616 test(p.next); 617 test(p.type == p.Token.Name); 618 test(p.value == "DNumber", p.value); 619 test(p.next); 620 test(p.type == p.Token.Number); 621 test(p.value == "12345.6E7", p.value); 622 test(p.next); 623 test(p.type == p.Token.Name); 624 test(p.value == "ENumber", p.value); 625 test(p.next); 626 test(p.type == p.Token.Number); 627 test(p.value == "12345.6E+7", p.value); 628 test(p.next); 629 test(p.type == p.Token.Name); 630 test(p.value == "FNumber", p.value); 631 test(p.next); 632 test(p.type == p.Token.Number); 633 test(p.value == "12345.6E-7", p.value); 634 test(p.next); 635 test(p.type == p.Token.Name); 636 test(p.value == "True", p.value); 637 test(p.next); 638 test(p.type == p.Token.True); 639 test(p.next); 640 test(p.type == p.Token.Name); 641 test(p.value == "False", p.value); 642 test(p.next); 643 test(p.type == p.Token.False); 644 test(p.next); 645 test(p.type == p.Token.Name); 646 test(p.value == "Null", p.value); 647 test(p.next); 648 test(p.type == p.Token.Null); 649 test(p.next); 650 test(p.type == p.Token.EndObject); 651 test(p.next); 652 test(p.type == p.Token.EndObject); 653 test(p.next); 654 test(p.type == p.Token.EndObject); 655 test(p.next); 656 test(p.type == p.Token.EndObject); 657 test(p.next); 658 test(p.type == p.Token.EndObject); 659 test(!p.next); 660 661 test(p.state.size == 0); 662 }