JsonCpp project page Classes Namespace JsonCpp home page

json_reader.cpp
Go to the documentation of this file.
1// Copyright 2007-2011 Baptiste Lepilleur and The JsonCpp Authors
2// Copyright (C) 2016 InfoTeCS JSC. All rights reserved.
3// Distributed under MIT license, or public domain if desired and
4// recognized in your jurisdiction.
5// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
6
7#if !defined(JSON_IS_AMALGAMATION)
8#include "json_tool.h"
9#include <json/assertions.h>
10#include <json/reader.h>
11#include <json/value.h>
12#endif // if !defined(JSON_IS_AMALGAMATION)
13#include <algorithm>
14#include <cassert>
15#include <cmath>
16#include <cstring>
17#include <iostream>
18#include <istream>
19#include <iterator>
20#include <limits>
21#include <memory>
22#include <set>
23#include <sstream>
24#include <utility>
25
26#include <cstdio>
27
28#if defined(_MSC_VER)
29#if !defined(_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES)
30#define _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES 1
31#endif //_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES
32#endif //_MSC_VER
33
34#if defined(_MSC_VER)
35// Disable warning about strdup being deprecated.
36#pragma warning(disable : 4996)
37#endif
38
39// Define JSONCPP_DEPRECATED_STACK_LIMIT as an appropriate integer at compile
40// time to change the stack limit
41#if !defined(JSONCPP_DEPRECATED_STACK_LIMIT)
42#define JSONCPP_DEPRECATED_STACK_LIMIT 256
43#endif
44
45static size_t const stackLimit_g =
46 JSONCPP_DEPRECATED_STACK_LIMIT; // see readValue()
47
48namespace Json {
49
50using CharReaderPtr = std::unique_ptr<CharReader>;
51
52// Implementation of class Features
53// ////////////////////////////////
54
55Features::Features() = default;
56
57Features Features::all() { return {}; }
58
60 Features features;
61 features.allowComments_ = false;
62 features.strictRoot_ = true;
63 features.allowDroppedNullPlaceholders_ = false;
64 features.allowNumericKeys_ = false;
65 return features;
66}
67
68// Implementation of class Reader
69// ////////////////////////////////
70
71bool Reader::containsNewLine(Reader::Location begin, Reader::Location end) {
72 return std::any_of(begin, end, [](char b) { return b == '\n' || b == '\r'; });
73}
74
75// Class Reader
76// //////////////////////////////////////////////////////////////////
77
78Reader::Reader() : features_(Features::all()) {}
79
80Reader::Reader(const Features& features) : features_(features) {}
81
82bool Reader::parse(const std::string& document, Value& root,
83 bool collectComments) {
84 document_.assign(document.begin(), document.end());
85 const char* begin = document_.c_str();
86 const char* end = begin + document_.length();
87 return parse(begin, end, root, collectComments);
88}
89
90bool Reader::parse(std::istream& is, Value& root, bool collectComments) {
91 document_.assign(std::istreambuf_iterator<char>(is),
92 std::istreambuf_iterator<char>());
93 return parse(document_.data(), document_.data() + document_.size(), root,
94 collectComments);
95}
96
97bool Reader::parse(const char* beginDoc, const char* endDoc, Value& root,
98 bool collectComments) {
99 if (!features_.allowComments_) {
100 collectComments = false;
101 }
102
103 begin_ = beginDoc;
104 end_ = endDoc;
105 collectComments_ = collectComments;
106 current_ = begin_;
107 lastValueEnd_ = nullptr;
108 lastValue_ = nullptr;
109 commentsBefore_.clear();
110 errors_.clear();
111 while (!nodes_.empty())
112 nodes_.pop();
113 nodes_.push(&root);
114
115 bool successful = readValue();
116 Token token;
117 readTokenSkippingComments(token);
118 if (collectComments_ && !commentsBefore_.empty())
119 root.setComment(commentsBefore_, commentAfter);
120 if (features_.strictRoot_) {
121 if (!root.isArray() && !root.isObject()) {
122 // Set error location to start of doc, ideally should be first token found
123 // in doc
124 token.type_ = tokenError;
125 token.start_ = beginDoc;
126 token.end_ = endDoc;
127 addError(
128 "A valid JSON document must be either an array or an object value.",
129 token);
130 return false;
131 }
132 }
133 return successful;
134}
135
136bool Reader::readValue() {
137 // readValue() may call itself only if it calls readObject() or ReadArray().
138 // These methods execute nodes_.push() just before and nodes_.pop)() just
139 // after calling readValue(). parse() executes one nodes_.push(), so > instead
140 // of >=.
141 if (nodes_.size() > stackLimit_g)
142#if JSON_USE_EXCEPTION
143 throwRuntimeError("Exceeded stackLimit in readValue().");
144#else
145 // throwRuntimeError aborts. Don't abort here.
146 return false;
147#endif
148
149 Token token;
150 readTokenSkippingComments(token);
151 bool successful = true;
152
153 if (collectComments_ && !commentsBefore_.empty()) {
154 currentValue().setComment(commentsBefore_, commentBefore);
155 commentsBefore_.clear();
156 }
157
158 switch (token.type_) {
159 case tokenObjectBegin:
160 successful = readObject(token);
161 currentValue().setOffsetLimit(current_ - begin_);
162 break;
163 case tokenArrayBegin:
164 successful = readArray(token);
165 currentValue().setOffsetLimit(current_ - begin_);
166 break;
167 case tokenNumber:
168 successful = decodeNumber(token);
169 break;
170 case tokenString:
171 successful = decodeString(token);
172 break;
173 case tokenTrue: {
174 Value v(true);
175 currentValue().swapPayload(v);
176 currentValue().setOffsetStart(token.start_ - begin_);
177 currentValue().setOffsetLimit(token.end_ - begin_);
178 } break;
179 case tokenFalse: {
180 Value v(false);
181 currentValue().swapPayload(v);
182 currentValue().setOffsetStart(token.start_ - begin_);
183 currentValue().setOffsetLimit(token.end_ - begin_);
184 } break;
185 case tokenNull: {
186 Value v;
187 currentValue().swapPayload(v);
188 currentValue().setOffsetStart(token.start_ - begin_);
189 currentValue().setOffsetLimit(token.end_ - begin_);
190 } break;
191 case tokenArraySeparator:
192 case tokenObjectEnd:
193 case tokenArrayEnd:
194 if (features_.allowDroppedNullPlaceholders_) {
195 // "Un-read" the current token and mark the current value as a null
196 // token.
197 current_--;
198 Value v;
199 currentValue().swapPayload(v);
200 currentValue().setOffsetStart(current_ - begin_ - 1);
201 currentValue().setOffsetLimit(current_ - begin_);
202 break;
203 } // Else, fall through...
204 default:
205 currentValue().setOffsetStart(token.start_ - begin_);
206 currentValue().setOffsetLimit(token.end_ - begin_);
207 return addError("Syntax error: value, object or array expected.", token);
208 }
209
210 if (collectComments_) {
211 lastValueEnd_ = current_;
212 lastValue_ = &currentValue();
213 }
214
215 return successful;
216}
217
218bool Reader::readTokenSkippingComments(Token& token) {
219 bool success = readToken(token);
220 if (features_.allowComments_) {
221 while (success && token.type_ == tokenComment) {
222 success = readToken(token);
223 }
224 }
225 return success;
226}
227
228bool Reader::readToken(Token& token) {
229 skipSpaces();
230 token.start_ = current_;
231 Char c = getNextChar();
232 bool ok = true;
233 switch (c) {
234 case '{':
235 token.type_ = tokenObjectBegin;
236 break;
237 case '}':
238 token.type_ = tokenObjectEnd;
239 break;
240 case '[':
241 token.type_ = tokenArrayBegin;
242 break;
243 case ']':
244 token.type_ = tokenArrayEnd;
245 break;
246 case '"':
247 token.type_ = tokenString;
248 ok = readString();
249 break;
250 case '/':
251 token.type_ = tokenComment;
252 ok = readComment();
253 break;
254 case '0':
255 case '1':
256 case '2':
257 case '3':
258 case '4':
259 case '5':
260 case '6':
261 case '7':
262 case '8':
263 case '9':
264 case '-':
265 token.type_ = tokenNumber;
266 readNumber();
267 break;
268 case 't':
269 token.type_ = tokenTrue;
270 ok = match("rue", 3);
271 break;
272 case 'f':
273 token.type_ = tokenFalse;
274 ok = match("alse", 4);
275 break;
276 case 'n':
277 token.type_ = tokenNull;
278 ok = match("ull", 3);
279 break;
280 case ',':
281 token.type_ = tokenArraySeparator;
282 break;
283 case ':':
284 token.type_ = tokenMemberSeparator;
285 break;
286 case 0:
287 token.type_ = tokenEndOfStream;
288 break;
289 default:
290 ok = false;
291 break;
292 }
293 if (!ok)
294 token.type_ = tokenError;
295 token.end_ = current_;
296 return ok;
297}
298
299void Reader::skipSpaces() {
300 while (current_ != end_) {
301 Char c = *current_;
302 if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
303 ++current_;
304 else
305 break;
306 }
307}
308
309bool Reader::match(const Char* pattern, int patternLength) {
310 if (end_ - current_ < patternLength)
311 return false;
312 int index = patternLength;
313 while (index--)
314 if (current_[index] != pattern[index])
315 return false;
316 current_ += patternLength;
317 return true;
318}
319
320bool Reader::readComment() {
321 Location commentBegin = current_ - 1;
322 Char c = getNextChar();
323 bool successful = false;
324 if (c == '*')
325 successful = readCStyleComment();
326 else if (c == '/')
327 successful = readCppStyleComment();
328 if (!successful)
329 return false;
330
331 if (collectComments_) {
333 if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
334 if (c != '*' || !containsNewLine(commentBegin, current_))
335 placement = commentAfterOnSameLine;
336 }
337
338 addComment(commentBegin, current_, placement);
339 }
340 return true;
341}
342
343String Reader::normalizeEOL(Reader::Location begin, Reader::Location end) {
344 String normalized;
345 normalized.reserve(static_cast<size_t>(end - begin));
346 Reader::Location current = begin;
347 while (current != end) {
348 char c = *current++;
349 if (c == '\r') {
350 if (current != end && *current == '\n')
351 // convert dos EOL
352 ++current;
353 // convert Mac EOL
354 normalized += '\n';
355 } else {
356 normalized += c;
357 }
358 }
359 return normalized;
360}
361
362void Reader::addComment(Location begin, Location end,
363 CommentPlacement placement) {
364 assert(collectComments_);
365 const String& normalized = normalizeEOL(begin, end);
366 if (placement == commentAfterOnSameLine) {
367 assert(lastValue_ != nullptr);
368 lastValue_->setComment(normalized, placement);
369 } else {
370 commentsBefore_ += normalized;
371 }
372}
373
374bool Reader::readCStyleComment() {
375 while ((current_ + 1) < end_) {
376 Char c = getNextChar();
377 if (c == '*' && *current_ == '/')
378 break;
379 }
380 return getNextChar() == '/';
381}
382
383bool Reader::readCppStyleComment() {
384 while (current_ != end_) {
385 Char c = getNextChar();
386 if (c == '\n')
387 break;
388 if (c == '\r') {
389 // Consume DOS EOL. It will be normalized in addComment.
390 if (current_ != end_ && *current_ == '\n')
391 getNextChar();
392 // Break on Moc OS 9 EOL.
393 break;
394 }
395 }
396 return true;
397}
398
399void Reader::readNumber() {
400 Location p = current_;
401 char c = '0'; // stopgap for already consumed character
402 // integral part
403 while (c >= '0' && c <= '9')
404 c = (current_ = p) < end_ ? *p++ : '\0';
405 // fractional part
406 if (c == '.') {
407 c = (current_ = p) < end_ ? *p++ : '\0';
408 while (c >= '0' && c <= '9')
409 c = (current_ = p) < end_ ? *p++ : '\0';
410 }
411 // exponential part
412 if (c == 'e' || c == 'E') {
413 c = (current_ = p) < end_ ? *p++ : '\0';
414 if (c == '+' || c == '-')
415 c = (current_ = p) < end_ ? *p++ : '\0';
416 while (c >= '0' && c <= '9')
417 c = (current_ = p) < end_ ? *p++ : '\0';
418 }
419}
420
421bool Reader::readString() {
422 Char c = '\0';
423 while (current_ != end_) {
424 c = getNextChar();
425 if (c == '\\')
426 getNextChar();
427 else if (c == '"')
428 break;
429 }
430 return c == '"';
431}
432
433bool Reader::readObject(Token& token) {
434 Token tokenName;
435 String name;
436 Value init(objectValue);
437 currentValue().swapPayload(init);
438 currentValue().setOffsetStart(token.start_ - begin_);
439 while (readTokenSkippingComments(tokenName)) {
440 if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
441 return true;
442 name.clear();
443 if (tokenName.type_ == tokenString) {
444 if (!decodeString(tokenName, name))
445 return recoverFromError(tokenObjectEnd);
446 } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
447 Value numberName;
448 if (!decodeNumber(tokenName, numberName))
449 return recoverFromError(tokenObjectEnd);
450 name = numberName.asString();
451 } else {
452 break;
453 }
454
455 Token colon;
456 if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
457 return addErrorAndRecover("Missing ':' after object member name", colon,
458 tokenObjectEnd);
459 }
460 Value& value = currentValue()[name];
461 nodes_.push(&value);
462 bool ok = readValue();
463 nodes_.pop();
464 if (!ok) // error already set
465 return recoverFromError(tokenObjectEnd);
466
467 Token comma;
468 if (!readTokenSkippingComments(comma) ||
469 (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator)) {
470 return addErrorAndRecover("Missing ',' or '}' in object declaration",
471 comma, tokenObjectEnd);
472 }
473 if (comma.type_ == tokenObjectEnd)
474 return true;
475 }
476 return addErrorAndRecover("Missing '}' or object member name", tokenName,
477 tokenObjectEnd);
478}
479
480bool Reader::readArray(Token& token) {
481 Value init(arrayValue);
482 currentValue().swapPayload(init);
483 currentValue().setOffsetStart(token.start_ - begin_);
484 skipSpaces();
485 if (current_ != end_ && *current_ == ']') // empty array
486 {
487 Token endArray;
488 readToken(endArray);
489 return true;
490 }
491 int index = 0;
492 for (;;) {
493 Value& value = currentValue()[index++];
494 nodes_.push(&value);
495 bool ok = readValue();
496 nodes_.pop();
497 if (!ok) // error already set
498 return recoverFromError(tokenArrayEnd);
499
500 Token currentToken;
501 // Accept Comment after last item in the array.
502 ok = readTokenSkippingComments(currentToken);
503 bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
504 currentToken.type_ != tokenArrayEnd);
505 if (!ok || badTokenType) {
506 return addErrorAndRecover("Missing ',' or ']' in array declaration",
507 currentToken, tokenArrayEnd);
508 }
509 if (currentToken.type_ == tokenArrayEnd)
510 break;
511 }
512 return true;
513}
514
515bool Reader::decodeNumber(Token& token) {
516 Value decoded;
517 if (!decodeNumber(token, decoded))
518 return false;
519 currentValue().swapPayload(decoded);
520 currentValue().setOffsetStart(token.start_ - begin_);
521 currentValue().setOffsetLimit(token.end_ - begin_);
522 return true;
523}
524
525bool Reader::decodeNumber(Token& token, Value& decoded) {
526 // Attempts to parse the number as an integer. If the number is
527 // larger than the maximum supported value of an integer then
528 // we decode the number as a double.
529 Location current = token.start_;
530 bool isNegative = *current == '-';
531 if (isNegative)
532 ++current;
533 // TODO: Help the compiler do the div and mod at compile time or get rid of
534 // them.
535 Value::LargestUInt maxIntegerValue =
538 Value::LargestUInt threshold = maxIntegerValue / 10;
539 Value::LargestUInt value = 0;
540 while (current < token.end_) {
541 Char c = *current++;
542 if (c < '0' || c > '9')
543 return decodeDouble(token, decoded);
544 auto digit(static_cast<Value::UInt>(c - '0'));
545 if (value >= threshold) {
546 // We've hit or exceeded the max value divided by 10 (rounded down). If
547 // a) we've only just touched the limit, b) this is the last digit, and
548 // c) it's small enough to fit in that rounding delta, we're okay.
549 // Otherwise treat this number as a double to avoid overflow.
550 if (value > threshold || current != token.end_ ||
551 digit > maxIntegerValue % 10) {
552 return decodeDouble(token, decoded);
553 }
554 }
555 value = value * 10 + digit;
556 }
557 if (isNegative && value == maxIntegerValue)
558 decoded = Value::minLargestInt;
559 else if (isNegative)
560 decoded = -Value::LargestInt(value);
561 else if (value <= Value::LargestUInt(Value::maxInt))
562 decoded = Value::LargestInt(value);
563 else
564 decoded = value;
565 return true;
566}
567
568bool Reader::decodeDouble(Token& token) {
569 Value decoded;
570 if (!decodeDouble(token, decoded))
571 return false;
572 currentValue().swapPayload(decoded);
573 currentValue().setOffsetStart(token.start_ - begin_);
574 currentValue().setOffsetLimit(token.end_ - begin_);
575 return true;
576}
577
578bool Reader::decodeDouble(Token& token, Value& decoded) {
579 double value = 0;
580 IStringStream is(String(token.start_, token.end_));
581 is.imbue(std::locale::classic());
582 if (!(is >> value)) {
583 if (value == std::numeric_limits<double>::max())
584 value = std::numeric_limits<double>::infinity();
585 else if (value == std::numeric_limits<double>::lowest())
586 value = -std::numeric_limits<double>::infinity();
587 else if (!std::isinf(value))
588 return addError(
589 "'" + String(token.start_, token.end_) + "' is not a number.", token);
590 }
591 decoded = value;
592 return true;
593}
594
595bool Reader::decodeString(Token& token) {
596 String decoded_string;
597 if (!decodeString(token, decoded_string))
598 return false;
599 Value decoded(decoded_string);
600 currentValue().swapPayload(decoded);
601 currentValue().setOffsetStart(token.start_ - begin_);
602 currentValue().setOffsetLimit(token.end_ - begin_);
603 return true;
604}
605
606bool Reader::decodeString(Token& token, String& decoded) {
607 decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
608 Location current = token.start_ + 1; // skip '"'
609 Location end = token.end_ - 1; // do not include '"'
610 while (current != end) {
611 Char c = *current++;
612 if (c == '"')
613 break;
614 if (c == '\\') {
615 if (current == end)
616 return addError("Empty escape sequence in string", token, current);
617 Char escape = *current++;
618 switch (escape) {
619 case '"':
620 decoded += '"';
621 break;
622 case '/':
623 decoded += '/';
624 break;
625 case '\\':
626 decoded += '\\';
627 break;
628 case 'b':
629 decoded += '\b';
630 break;
631 case 'f':
632 decoded += '\f';
633 break;
634 case 'n':
635 decoded += '\n';
636 break;
637 case 'r':
638 decoded += '\r';
639 break;
640 case 't':
641 decoded += '\t';
642 break;
643 case 'u': {
644 unsigned int unicode;
645 if (!decodeUnicodeCodePoint(token, current, end, unicode))
646 return false;
647 decoded += codePointToUTF8(unicode);
648 } break;
649 default:
650 return addError("Bad escape sequence in string", token, current);
651 }
652 } else {
653 if (static_cast<unsigned char>(c) < 0x20)
654 return addError("Control character in string", token, current - 1);
655 decoded += c;
656 }
657 }
658 return true;
659}
660
661bool Reader::decodeUnicodeCodePoint(Token& token, Location& current,
662 Location end, unsigned int& unicode) {
663
664 if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
665 return false;
666 if (unicode >= 0xD800 && unicode <= 0xDBFF) {
667 // surrogate pairs
668 if (end - current < 6)
669 return addError(
670 "additional six characters expected to parse unicode surrogate pair.",
671 token, current);
672 if (*(current++) == '\\' && *(current++) == 'u') {
673 unsigned int surrogatePair;
674 if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
675 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
676 } else
677 return false;
678 } else
679 return addError("expecting another \\u token to begin the second half of "
680 "a unicode surrogate pair",
681 token, current);
682 }
683 return true;
684}
685
686bool Reader::decodeUnicodeEscapeSequence(Token& token, Location& current,
687 Location end,
688 unsigned int& ret_unicode) {
689 if (end - current < 4)
690 return addError(
691 "Bad unicode escape sequence in string: four digits expected.", token,
692 current);
693 int unicode = 0;
694 for (int index = 0; index < 4; ++index) {
695 Char c = *current++;
696 unicode *= 16;
697 if (c >= '0' && c <= '9')
698 unicode += c - '0';
699 else if (c >= 'a' && c <= 'f')
700 unicode += c - 'a' + 10;
701 else if (c >= 'A' && c <= 'F')
702 unicode += c - 'A' + 10;
703 else
704 return addError(
705 "Bad unicode escape sequence in string: hexadecimal digit expected.",
706 token, current);
707 }
708 ret_unicode = static_cast<unsigned int>(unicode);
709 return true;
710}
711
712bool Reader::addError(const String& message, Token& token, Location extra) {
713 ErrorInfo info;
714 info.token_ = token;
715 info.message_ = message;
716 info.extra_ = extra;
717 errors_.push_back(info);
718 return false;
719}
720
721bool Reader::recoverFromError(TokenType skipUntilToken) {
722 size_t const errorCount = errors_.size();
723 Token skip;
724 for (;;) {
725 if (!readToken(skip))
726 errors_.resize(errorCount); // discard errors caused by recovery
727 if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
728 break;
729 }
730 errors_.resize(errorCount);
731 return false;
732}
733
734bool Reader::addErrorAndRecover(const String& message, Token& token,
735 TokenType skipUntilToken) {
736 addError(message, token);
737 return recoverFromError(skipUntilToken);
738}
739
740Value& Reader::currentValue() { return *(nodes_.top()); }
741
742Reader::Char Reader::getNextChar() {
743 if (current_ == end_)
744 return 0;
745 return *current_++;
746}
747
748void Reader::getLocationLineAndColumn(Location location, int& line,
749 int& column) const {
750 Location current = begin_;
751 Location lastLineStart = current;
752 line = 0;
753 while (current < location && current != end_) {
754 Char c = *current++;
755 if (c == '\r') {
756 if (current != end_ && *current == '\n')
757 ++current;
758 lastLineStart = current;
759 ++line;
760 } else if (c == '\n') {
761 lastLineStart = current;
762 ++line;
763 }
764 }
765 // column & line start at 1
766 column = int(location - lastLineStart) + 1;
767 ++line;
768}
769
770String Reader::getLocationLineAndColumn(Location location) const {
771 int line, column;
772 getLocationLineAndColumn(location, line, column);
773 char buffer[18 + 16 + 16 + 1];
774 jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
775 return buffer;
776}
777
778// Deprecated. Preserved for backward compatibility
779String Reader::getFormatedErrorMessages() const {
781}
782
784 String formattedMessage;
785 for (const auto& error : errors_) {
786 formattedMessage +=
787 "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
788 formattedMessage += " " + error.message_ + "\n";
789 if (error.extra_)
790 formattedMessage +=
791 "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
792 }
793 return formattedMessage;
794}
795
796std::vector<Reader::StructuredError> Reader::getStructuredErrors() const {
797 std::vector<Reader::StructuredError> allErrors;
798 for (const auto& error : errors_) {
799 Reader::StructuredError structured;
800 structured.offset_start = error.token_.start_ - begin_;
801 structured.offset_limit = error.token_.end_ - begin_;
802 structured.message = error.message_;
803 allErrors.push_back(structured);
804 }
805 return allErrors;
806}
807
808bool Reader::pushError(const Value& value, const String& message) {
809 ptrdiff_t const length = end_ - begin_;
810 if (value.getOffsetStart() > length || value.getOffsetLimit() > length)
811 return false;
812 Token token;
813 token.type_ = tokenError;
814 token.start_ = begin_ + value.getOffsetStart();
815 token.end_ = begin_ + value.getOffsetLimit();
816 ErrorInfo info;
817 info.token_ = token;
818 info.message_ = message;
819 info.extra_ = nullptr;
820 errors_.push_back(info);
821 return true;
822}
823
824bool Reader::pushError(const Value& value, const String& message,
825 const Value& extra) {
826 ptrdiff_t const length = end_ - begin_;
827 if (value.getOffsetStart() > length || value.getOffsetLimit() > length ||
828 extra.getOffsetLimit() > length)
829 return false;
830 Token token;
831 token.type_ = tokenError;
832 token.start_ = begin_ + value.getOffsetStart();
833 token.end_ = begin_ + value.getOffsetLimit();
834 ErrorInfo info;
835 info.token_ = token;
836 info.message_ = message;
837 info.extra_ = begin_ + extra.getOffsetStart();
838 errors_.push_back(info);
839 return true;
840}
841
842bool Reader::good() const { return errors_.empty(); }
843
844// Originally copied from the Features class (now deprecated), used internally
845// for features implementation.
846class OurFeatures {
847public:
848 static OurFeatures all();
849 bool allowComments_;
850 bool allowTrailingCommas_;
851 bool strictRoot_;
852 bool allowDroppedNullPlaceholders_;
853 bool allowNumericKeys_;
854 bool allowSingleQuotes_;
855 bool failIfExtra_;
856 bool rejectDupKeys_;
857 bool allowSpecialFloats_;
858 bool skipBom_;
859 size_t stackLimit_;
860}; // OurFeatures
861
862OurFeatures OurFeatures::all() { return {}; }
863
864// Implementation of class Reader
865// ////////////////////////////////
866
867// Originally copied from the Reader class (now deprecated), used internally
868// for implementing JSON reading.
869class OurReader {
870public:
871 using Char = char;
872 using Location = const Char*;
873
874 explicit OurReader(OurFeatures const& features);
875 bool parse(const char* beginDoc, const char* endDoc, Value& root,
876 bool collectComments = true);
877 String getFormattedErrorMessages() const;
878 std::vector<CharReader::StructuredError> getStructuredErrors() const;
879
880private:
881 OurReader(OurReader const&); // no impl
882 void operator=(OurReader const&); // no impl
883
884 enum TokenType {
885 tokenEndOfStream = 0,
886 tokenObjectBegin,
887 tokenObjectEnd,
888 tokenArrayBegin,
889 tokenArrayEnd,
890 tokenString,
891 tokenNumber,
892 tokenTrue,
893 tokenFalse,
894 tokenNull,
895 tokenNaN,
896 tokenPosInf,
897 tokenNegInf,
898 tokenArraySeparator,
899 tokenMemberSeparator,
900 tokenComment,
901 tokenError
902 };
903
904 class Token {
905 public:
906 TokenType type_;
907 Location start_;
908 Location end_;
909 };
910
911 class ErrorInfo {
912 public:
913 Token token_;
914 String message_;
915 Location extra_;
916 };
917
918 using Errors = std::deque<ErrorInfo>;
919
920 bool readToken(Token& token);
921 bool readTokenSkippingComments(Token& token);
922 void skipSpaces();
923 void skipBom(bool skipBom);
924 bool match(const Char* pattern, int patternLength);
925 bool readComment();
926 bool readCStyleComment(bool* containsNewLineResult);
927 bool readCppStyleComment();
928 bool readString();
929 bool readStringSingleQuote();
930 bool readNumber(bool checkInf);
931 bool readValue();
932 bool readObject(Token& token);
933 bool readArray(Token& token);
934 bool decodeNumber(Token& token);
935 bool decodeNumber(Token& token, Value& decoded);
936 bool decodeString(Token& token);
937 bool decodeString(Token& token, String& decoded);
938 bool decodeDouble(Token& token);
939 bool decodeDouble(Token& token, Value& decoded);
940 bool decodeUnicodeCodePoint(Token& token, Location& current, Location end,
941 unsigned int& unicode);
942 bool decodeUnicodeEscapeSequence(Token& token, Location& current,
943 Location end, unsigned int& unicode);
944 bool addError(const String& message, Token& token, Location extra = nullptr);
945 bool recoverFromError(TokenType skipUntilToken);
946 bool addErrorAndRecover(const String& message, Token& token,
947 TokenType skipUntilToken);
948 void skipUntilSpace();
949 Value& currentValue();
950 Char getNextChar();
951 void getLocationLineAndColumn(Location location, int& line,
952 int& column) const;
953 String getLocationLineAndColumn(Location location) const;
954 void addComment(Location begin, Location end, CommentPlacement placement);
955
956 static String normalizeEOL(Location begin, Location end);
957 static bool containsNewLine(Location begin, Location end);
958
959 using Nodes = std::stack<Value*>;
960
961 Nodes nodes_{};
962 Errors errors_{};
963 String document_{};
964 Location begin_ = nullptr;
965 Location end_ = nullptr;
966 Location current_ = nullptr;
967 Location lastValueEnd_ = nullptr;
968 Value* lastValue_ = nullptr;
969 bool lastValueHasAComment_ = false;
970 String commentsBefore_{};
971
972 OurFeatures const features_;
973 bool collectComments_ = false;
974}; // OurReader
975
976// complete copy of Read impl, for OurReader
977
978bool OurReader::containsNewLine(OurReader::Location begin,
979 OurReader::Location end) {
980 return std::any_of(begin, end, [](char b) { return b == '\n' || b == '\r'; });
981}
982
983OurReader::OurReader(OurFeatures const& features) : features_(features) {}
984
985bool OurReader::parse(const char* beginDoc, const char* endDoc, Value& root,
986 bool collectComments) {
987 if (!features_.allowComments_) {
988 collectComments = false;
989 }
990
991 begin_ = beginDoc;
992 end_ = endDoc;
993 collectComments_ = collectComments;
994 current_ = begin_;
995 lastValueEnd_ = nullptr;
996 lastValue_ = nullptr;
997 commentsBefore_.clear();
998 errors_.clear();
999 while (!nodes_.empty())
1000 nodes_.pop();
1001 nodes_.push(&root);
1002
1003 // skip byte order mark if it exists at the beginning of the UTF-8 text.
1004 skipBom(features_.skipBom_);
1005 bool successful = readValue();
1006 nodes_.pop();
1007 Token token;
1008 readTokenSkippingComments(token);
1009 if (features_.failIfExtra_ && (token.type_ != tokenEndOfStream)) {
1010 addError("Extra non-whitespace after JSON value.", token);
1011 return false;
1012 }
1013 if (collectComments_ && !commentsBefore_.empty())
1014 root.setComment(commentsBefore_, commentAfter);
1015 if (features_.strictRoot_) {
1016 if (!root.isArray() && !root.isObject()) {
1017 // Set error location to start of doc, ideally should be first token found
1018 // in doc
1019 token.type_ = tokenError;
1020 token.start_ = beginDoc;
1021 token.end_ = endDoc;
1022 addError(
1023 "A valid JSON document must be either an array or an object value.",
1024 token);
1025 return false;
1026 }
1027 }
1028 return successful;
1029}
1030
1031bool OurReader::readValue() {
1032 // To preserve the old behaviour we cast size_t to int.
1033 if (nodes_.size() > features_.stackLimit_)
1034 throwRuntimeError("Exceeded stackLimit in readValue().");
1035 Token token;
1036 readTokenSkippingComments(token);
1037 bool successful = true;
1038
1039 if (collectComments_ && !commentsBefore_.empty()) {
1040 currentValue().setComment(commentsBefore_, commentBefore);
1041 commentsBefore_.clear();
1042 }
1043
1044 switch (token.type_) {
1045 case tokenObjectBegin:
1046 successful = readObject(token);
1047 currentValue().setOffsetLimit(current_ - begin_);
1048 break;
1049 case tokenArrayBegin:
1050 successful = readArray(token);
1051 currentValue().setOffsetLimit(current_ - begin_);
1052 break;
1053 case tokenNumber:
1054 successful = decodeNumber(token);
1055 break;
1056 case tokenString:
1057 successful = decodeString(token);
1058 break;
1059 case tokenTrue: {
1060 Value v(true);
1061 currentValue().swapPayload(v);
1062 currentValue().setOffsetStart(token.start_ - begin_);
1063 currentValue().setOffsetLimit(token.end_ - begin_);
1064 } break;
1065 case tokenFalse: {
1066 Value v(false);
1067 currentValue().swapPayload(v);
1068 currentValue().setOffsetStart(token.start_ - begin_);
1069 currentValue().setOffsetLimit(token.end_ - begin_);
1070 } break;
1071 case tokenNull: {
1072 Value v;
1073 currentValue().swapPayload(v);
1074 currentValue().setOffsetStart(token.start_ - begin_);
1075 currentValue().setOffsetLimit(token.end_ - begin_);
1076 } break;
1077 case tokenNaN: {
1078 Value v(std::numeric_limits<double>::quiet_NaN());
1079 currentValue().swapPayload(v);
1080 currentValue().setOffsetStart(token.start_ - begin_);
1081 currentValue().setOffsetLimit(token.end_ - begin_);
1082 } break;
1083 case tokenPosInf: {
1084 Value v(std::numeric_limits<double>::infinity());
1085 currentValue().swapPayload(v);
1086 currentValue().setOffsetStart(token.start_ - begin_);
1087 currentValue().setOffsetLimit(token.end_ - begin_);
1088 } break;
1089 case tokenNegInf: {
1090 Value v(-std::numeric_limits<double>::infinity());
1091 currentValue().swapPayload(v);
1092 currentValue().setOffsetStart(token.start_ - begin_);
1093 currentValue().setOffsetLimit(token.end_ - begin_);
1094 } break;
1095 case tokenArraySeparator:
1096 case tokenObjectEnd:
1097 case tokenArrayEnd:
1098 if (features_.allowDroppedNullPlaceholders_) {
1099 // "Un-read" the current token and mark the current value as a null
1100 // token.
1101 current_--;
1102 Value v;
1103 currentValue().swapPayload(v);
1104 currentValue().setOffsetStart(current_ - begin_ - 1);
1105 currentValue().setOffsetLimit(current_ - begin_);
1106 break;
1107 } // else, fall through ...
1108 default:
1109 currentValue().setOffsetStart(token.start_ - begin_);
1110 currentValue().setOffsetLimit(token.end_ - begin_);
1111 return addError("Syntax error: value, object or array expected.", token);
1112 }
1113
1114 if (collectComments_) {
1115 lastValueEnd_ = current_;
1116 lastValueHasAComment_ = false;
1117 lastValue_ = &currentValue();
1118 }
1119
1120 return successful;
1121}
1122
1123bool OurReader::readTokenSkippingComments(Token& token) {
1124 bool success = readToken(token);
1125 if (features_.allowComments_) {
1126 while (success && token.type_ == tokenComment) {
1127 success = readToken(token);
1128 }
1129 }
1130 return success;
1131}
1132
1133bool OurReader::readToken(Token& token) {
1134 skipSpaces();
1135 token.start_ = current_;
1136 Char c = getNextChar();
1137 bool ok = true;
1138 switch (c) {
1139 case '{':
1140 token.type_ = tokenObjectBegin;
1141 break;
1142 case '}':
1143 token.type_ = tokenObjectEnd;
1144 break;
1145 case '[':
1146 token.type_ = tokenArrayBegin;
1147 break;
1148 case ']':
1149 token.type_ = tokenArrayEnd;
1150 break;
1151 case '"':
1152 token.type_ = tokenString;
1153 ok = readString();
1154 break;
1155 case '\'':
1156 if (features_.allowSingleQuotes_) {
1157 token.type_ = tokenString;
1158 ok = readStringSingleQuote();
1159 } else {
1160 // If we don't allow single quotes, this is a failure case.
1161 ok = false;
1162 }
1163 break;
1164 case '/':
1165 token.type_ = tokenComment;
1166 ok = readComment();
1167 break;
1168 case '0':
1169 case '1':
1170 case '2':
1171 case '3':
1172 case '4':
1173 case '5':
1174 case '6':
1175 case '7':
1176 case '8':
1177 case '9':
1178 token.type_ = tokenNumber;
1179 readNumber(false);
1180 break;
1181 case '-':
1182 if (readNumber(true)) {
1183 token.type_ = tokenNumber;
1184 } else {
1185 token.type_ = tokenNegInf;
1186 ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1187 }
1188 break;
1189 case '+':
1190 if (readNumber(true)) {
1191 token.type_ = tokenNumber;
1192 } else {
1193 token.type_ = tokenPosInf;
1194 ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1195 }
1196 break;
1197 case 't':
1198 token.type_ = tokenTrue;
1199 ok = match("rue", 3);
1200 break;
1201 case 'f':
1202 token.type_ = tokenFalse;
1203 ok = match("alse", 4);
1204 break;
1205 case 'n':
1206 token.type_ = tokenNull;
1207 ok = match("ull", 3);
1208 break;
1209 case 'N':
1210 if (features_.allowSpecialFloats_) {
1211 token.type_ = tokenNaN;
1212 ok = match("aN", 2);
1213 } else {
1214 ok = false;
1215 }
1216 break;
1217 case 'I':
1218 if (features_.allowSpecialFloats_) {
1219 token.type_ = tokenPosInf;
1220 ok = match("nfinity", 7);
1221 } else {
1222 ok = false;
1223 }
1224 break;
1225 case ',':
1226 token.type_ = tokenArraySeparator;
1227 break;
1228 case ':':
1229 token.type_ = tokenMemberSeparator;
1230 break;
1231 case 0:
1232 token.type_ = tokenEndOfStream;
1233 break;
1234 default:
1235 ok = false;
1236 break;
1237 }
1238 if (!ok)
1239 token.type_ = tokenError;
1240 token.end_ = current_;
1241 return ok;
1242}
1243
1244void OurReader::skipSpaces() {
1245 while (current_ != end_) {
1246 Char c = *current_;
1247 if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
1248 ++current_;
1249 else
1250 break;
1251 }
1252}
1253
1254void OurReader::skipBom(bool skipBom) {
1255 // The default behavior is to skip BOM.
1256 if (skipBom) {
1257 if ((end_ - begin_) >= 3 && strncmp(begin_, "\xEF\xBB\xBF", 3) == 0) {
1258 begin_ += 3;
1259 current_ = begin_;
1260 }
1261 }
1262}
1263
1264bool OurReader::match(const Char* pattern, int patternLength) {
1265 if (end_ - current_ < patternLength)
1266 return false;
1267 int index = patternLength;
1268 while (index--)
1269 if (current_[index] != pattern[index])
1270 return false;
1271 current_ += patternLength;
1272 return true;
1273}
1274
1275bool OurReader::readComment() {
1276 const Location commentBegin = current_ - 1;
1277 const Char c = getNextChar();
1278 bool successful = false;
1279 bool cStyleWithEmbeddedNewline = false;
1280
1281 const bool isCStyleComment = (c == '*');
1282 const bool isCppStyleComment = (c == '/');
1283 if (isCStyleComment) {
1284 successful = readCStyleComment(&cStyleWithEmbeddedNewline);
1285 } else if (isCppStyleComment) {
1286 successful = readCppStyleComment();
1287 }
1288
1289 if (!successful)
1290 return false;
1291
1292 if (collectComments_) {
1293 CommentPlacement placement = commentBefore;
1294
1295 if (!lastValueHasAComment_) {
1296 if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
1297 if (isCppStyleComment || !cStyleWithEmbeddedNewline) {
1298 placement = commentAfterOnSameLine;
1299 lastValueHasAComment_ = true;
1300 }
1301 }
1302 }
1303
1304 addComment(commentBegin, current_, placement);
1305 }
1306 return true;
1307}
1308
1309String OurReader::normalizeEOL(OurReader::Location begin,
1310 OurReader::Location end) {
1311 String normalized;
1312 normalized.reserve(static_cast<size_t>(end - begin));
1313 OurReader::Location current = begin;
1314 while (current != end) {
1315 char c = *current++;
1316 if (c == '\r') {
1317 if (current != end && *current == '\n')
1318 // convert dos EOL
1319 ++current;
1320 // convert Mac EOL
1321 normalized += '\n';
1322 } else {
1323 normalized += c;
1324 }
1325 }
1326 return normalized;
1327}
1328
1329void OurReader::addComment(Location begin, Location end,
1330 CommentPlacement placement) {
1331 assert(collectComments_);
1332 const String& normalized = normalizeEOL(begin, end);
1333 if (placement == commentAfterOnSameLine) {
1334 assert(lastValue_ != nullptr);
1335 lastValue_->setComment(normalized, placement);
1336 } else {
1337 commentsBefore_ += normalized;
1338 }
1339}
1340
1341bool OurReader::readCStyleComment(bool* containsNewLineResult) {
1342 *containsNewLineResult = false;
1343
1344 while ((current_ + 1) < end_) {
1345 Char c = getNextChar();
1346 if (c == '*' && *current_ == '/')
1347 break;
1348 if (c == '\n')
1349 *containsNewLineResult = true;
1350 }
1351
1352 return getNextChar() == '/';
1353}
1354
1355bool OurReader::readCppStyleComment() {
1356 while (current_ != end_) {
1357 Char c = getNextChar();
1358 if (c == '\n')
1359 break;
1360 if (c == '\r') {
1361 // Consume DOS EOL. It will be normalized in addComment.
1362 if (current_ != end_ && *current_ == '\n')
1363 getNextChar();
1364 // Break on Moc OS 9 EOL.
1365 break;
1366 }
1367 }
1368 return true;
1369}
1370
1371bool OurReader::readNumber(bool checkInf) {
1372 Location p = current_;
1373 if (checkInf && p != end_ && *p == 'I') {
1374 current_ = ++p;
1375 return false;
1376 }
1377 char c = '0'; // stopgap for already consumed character
1378 // integral part
1379 while (c >= '0' && c <= '9')
1380 c = (current_ = p) < end_ ? *p++ : '\0';
1381 // fractional part
1382 if (c == '.') {
1383 c = (current_ = p) < end_ ? *p++ : '\0';
1384 while (c >= '0' && c <= '9')
1385 c = (current_ = p) < end_ ? *p++ : '\0';
1386 }
1387 // exponential part
1388 if (c == 'e' || c == 'E') {
1389 c = (current_ = p) < end_ ? *p++ : '\0';
1390 if (c == '+' || c == '-')
1391 c = (current_ = p) < end_ ? *p++ : '\0';
1392 while (c >= '0' && c <= '9')
1393 c = (current_ = p) < end_ ? *p++ : '\0';
1394 }
1395 return true;
1396}
1397bool OurReader::readString() {
1398 Char c = 0;
1399 while (current_ != end_) {
1400 c = getNextChar();
1401 if (c == '\\')
1402 getNextChar();
1403 else if (c == '"')
1404 break;
1405 }
1406 return c == '"';
1407}
1408
1409bool OurReader::readStringSingleQuote() {
1410 Char c = 0;
1411 while (current_ != end_) {
1412 c = getNextChar();
1413 if (c == '\\')
1414 getNextChar();
1415 else if (c == '\'')
1416 break;
1417 }
1418 return c == '\'';
1419}
1420
1421bool OurReader::readObject(Token& token) {
1422 Token tokenName;
1423 String name;
1424 Value init(objectValue);
1425 currentValue().swapPayload(init);
1426 currentValue().setOffsetStart(token.start_ - begin_);
1427 while (readTokenSkippingComments(tokenName)) {
1428 if (tokenName.type_ == tokenObjectEnd &&
1429 (name.empty() ||
1430 features_.allowTrailingCommas_)) // empty object or trailing comma
1431 return true;
1432 name.clear();
1433 if (tokenName.type_ == tokenString) {
1434 if (!decodeString(tokenName, name))
1435 return recoverFromError(tokenObjectEnd);
1436 } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
1437 Value numberName;
1438 if (!decodeNumber(tokenName, numberName))
1439 return recoverFromError(tokenObjectEnd);
1440 name = numberName.asString();
1441 } else {
1442 break;
1443 }
1444 if (name.length() >= (1U << 30))
1445 throwRuntimeError("keylength >= 2^30");
1446 if (features_.rejectDupKeys_ && currentValue().isMember(name)) {
1447 String msg = "Duplicate key: '" + name + "'";
1448 return addErrorAndRecover(msg, tokenName, tokenObjectEnd);
1449 }
1450
1451 Token colon;
1452 if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
1453 return addErrorAndRecover("Missing ':' after object member name", colon,
1454 tokenObjectEnd);
1455 }
1456 Value& value = currentValue()[name];
1457 nodes_.push(&value);
1458 bool ok = readValue();
1459 nodes_.pop();
1460 if (!ok) // error already set
1461 return recoverFromError(tokenObjectEnd);
1462
1463 Token comma;
1464 if (!readTokenSkippingComments(comma) ||
1465 (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator)) {
1466 return addErrorAndRecover("Missing ',' or '}' in object declaration",
1467 comma, tokenObjectEnd);
1468 }
1469 if (comma.type_ == tokenObjectEnd)
1470 return true;
1471 }
1472 return addErrorAndRecover("Missing '}' or object member name", tokenName,
1473 tokenObjectEnd);
1474}
1475
1476bool OurReader::readArray(Token& token) {
1477 Value init(arrayValue);
1478 currentValue().swapPayload(init);
1479 currentValue().setOffsetStart(token.start_ - begin_);
1480 int index = 0;
1481 for (;;) {
1482 skipSpaces();
1483 if (current_ != end_ && *current_ == ']' &&
1484 (index == 0 ||
1485 (features_.allowTrailingCommas_ &&
1486 !features_.allowDroppedNullPlaceholders_))) // empty array or trailing
1487 // comma
1488 {
1489 Token endArray;
1490 readToken(endArray);
1491 return true;
1492 }
1493 Value& value = currentValue()[index++];
1494 nodes_.push(&value);
1495 bool ok = readValue();
1496 nodes_.pop();
1497 if (!ok) // error already set
1498 return recoverFromError(tokenArrayEnd);
1499
1500 Token currentToken;
1501 // Accept Comment after last item in the array.
1502 ok = readTokenSkippingComments(currentToken);
1503 bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
1504 currentToken.type_ != tokenArrayEnd);
1505 if (!ok || badTokenType) {
1506 return addErrorAndRecover("Missing ',' or ']' in array declaration",
1507 currentToken, tokenArrayEnd);
1508 }
1509 if (currentToken.type_ == tokenArrayEnd)
1510 break;
1511 }
1512 return true;
1513}
1514
1515bool OurReader::decodeNumber(Token& token) {
1516 Value decoded;
1517 if (!decodeNumber(token, decoded))
1518 return false;
1519 currentValue().swapPayload(decoded);
1520 currentValue().setOffsetStart(token.start_ - begin_);
1521 currentValue().setOffsetLimit(token.end_ - begin_);
1522 return true;
1523}
1524
1525bool OurReader::decodeNumber(Token& token, Value& decoded) {
1526 // Attempts to parse the number as an integer. If the number is
1527 // larger than the maximum supported value of an integer then
1528 // we decode the number as a double.
1529 Location current = token.start_;
1530 const bool isNegative = *current == '-';
1531 if (isNegative) {
1532 ++current;
1533 }
1534
1535 // We assume we can represent the largest and smallest integer types as
1536 // unsigned integers with separate sign. This is only true if they can fit
1537 // into an unsigned integer.
1539 "Int must be smaller than UInt");
1540
1541 // We need to convert minLargestInt into a positive number. The easiest way
1542 // to do this conversion is to assume our "threshold" value of minLargestInt
1543 // divided by 10 can fit in maxLargestInt when absolute valued. This should
1544 // be a safe assumption.
1546 "The absolute value of minLargestInt must be greater than or "
1547 "equal to maxLargestInt");
1548 static_assert(Value::minLargestInt / 10 >= -Value::maxLargestInt,
1549 "The absolute value of minLargestInt must be only 1 magnitude "
1550 "larger than maxLargest Int");
1551
1552 static constexpr Value::LargestUInt positive_threshold =
1554 static constexpr Value::UInt positive_last_digit = Value::maxLargestUInt % 10;
1555
1556 // For the negative values, we have to be more careful. Since typically
1557 // -Value::minLargestInt will cause an overflow, we first divide by 10 and
1558 // then take the inverse. This assumes that minLargestInt is only a single
1559 // power of 10 different in magnitude, which we check above. For the last
1560 // digit, we take the modulus before negating for the same reason.
1561 static constexpr auto negative_threshold =
1563 static constexpr auto negative_last_digit =
1565
1566 const Value::LargestUInt threshold =
1567 isNegative ? negative_threshold : positive_threshold;
1568 const Value::UInt max_last_digit =
1569 isNegative ? negative_last_digit : positive_last_digit;
1570
1571 Value::LargestUInt value = 0;
1572 while (current < token.end_) {
1573 Char c = *current++;
1574 if (c < '0' || c > '9')
1575 return decodeDouble(token, decoded);
1576
1577 const auto digit(static_cast<Value::UInt>(c - '0'));
1578 if (value >= threshold) {
1579 // We've hit or exceeded the max value divided by 10 (rounded down). If
1580 // a) we've only just touched the limit, meaning value == threshold,
1581 // b) this is the last digit, or
1582 // c) it's small enough to fit in that rounding delta, we're okay.
1583 // Otherwise treat this number as a double to avoid overflow.
1584 if (value > threshold || current != token.end_ ||
1585 digit > max_last_digit) {
1586 return decodeDouble(token, decoded);
1587 }
1588 }
1589 value = value * 10 + digit;
1590 }
1591
1592 if (isNegative) {
1593 // We use the same magnitude assumption here, just in case.
1594 const auto last_digit = static_cast<Value::UInt>(value % 10);
1595 decoded = -Value::LargestInt(value / 10) * 10 - last_digit;
1596 } else if (value <= Value::LargestUInt(Value::maxLargestInt)) {
1597 decoded = Value::LargestInt(value);
1598 } else {
1599 decoded = value;
1600 }
1601
1602 return true;
1603}
1604
1605bool OurReader::decodeDouble(Token& token) {
1606 Value decoded;
1607 if (!decodeDouble(token, decoded))
1608 return false;
1609 currentValue().swapPayload(decoded);
1610 currentValue().setOffsetStart(token.start_ - begin_);
1611 currentValue().setOffsetLimit(token.end_ - begin_);
1612 return true;
1613}
1614
1615bool OurReader::decodeDouble(Token& token, Value& decoded) {
1616 double value = 0;
1617 IStringStream is(String(token.start_, token.end_));
1618 is.imbue(std::locale::classic());
1619 if (!(is >> value)) {
1620 if (value == std::numeric_limits<double>::max())
1621 value = std::numeric_limits<double>::infinity();
1622 else if (value == std::numeric_limits<double>::lowest())
1623 value = -std::numeric_limits<double>::infinity();
1624 else if (!std::isinf(value))
1625 return addError(
1626 "'" + String(token.start_, token.end_) + "' is not a number.", token);
1627 }
1628 decoded = value;
1629 return true;
1630}
1631
1632bool OurReader::decodeString(Token& token) {
1633 String decoded_string;
1634 if (!decodeString(token, decoded_string))
1635 return false;
1636 Value decoded(decoded_string);
1637 currentValue().swapPayload(decoded);
1638 currentValue().setOffsetStart(token.start_ - begin_);
1639 currentValue().setOffsetLimit(token.end_ - begin_);
1640 return true;
1641}
1642
1643bool OurReader::decodeString(Token& token, String& decoded) {
1644 decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
1645 Location current = token.start_ + 1; // skip '"'
1646 Location end = token.end_ - 1; // do not include '"'
1647 while (current != end) {
1648 Char c = *current++;
1649 if (c == '"')
1650 break;
1651 if (c == '\\') {
1652 if (current == end)
1653 return addError("Empty escape sequence in string", token, current);
1654 Char escape = *current++;
1655 switch (escape) {
1656 case '"':
1657 decoded += '"';
1658 break;
1659 case '/':
1660 decoded += '/';
1661 break;
1662 case '\\':
1663 decoded += '\\';
1664 break;
1665 case 'b':
1666 decoded += '\b';
1667 break;
1668 case 'f':
1669 decoded += '\f';
1670 break;
1671 case 'n':
1672 decoded += '\n';
1673 break;
1674 case 'r':
1675 decoded += '\r';
1676 break;
1677 case 't':
1678 decoded += '\t';
1679 break;
1680 case 'u': {
1681 unsigned int unicode;
1682 if (!decodeUnicodeCodePoint(token, current, end, unicode))
1683 return false;
1684 decoded += codePointToUTF8(unicode);
1685 } break;
1686 default:
1687 return addError("Bad escape sequence in string", token, current);
1688 }
1689 } else {
1690 if (static_cast<unsigned char>(c) < 0x20)
1691 return addError("Control character in string", token, current - 1);
1692 decoded += c;
1693 }
1694 }
1695 return true;
1696}
1697
1698bool OurReader::decodeUnicodeCodePoint(Token& token, Location& current,
1699 Location end, unsigned int& unicode) {
1700
1701 if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
1702 return false;
1703 if (unicode >= 0xD800 && unicode <= 0xDBFF) {
1704 // surrogate pairs
1705 if (end - current < 6)
1706 return addError(
1707 "additional six characters expected to parse unicode surrogate pair.",
1708 token, current);
1709 if (*(current++) == '\\' && *(current++) == 'u') {
1710 unsigned int surrogatePair;
1711 if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
1712 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
1713 } else
1714 return false;
1715 } else
1716 return addError("expecting another \\u token to begin the second half of "
1717 "a unicode surrogate pair",
1718 token, current);
1719 }
1720 return true;
1721}
1722
1723bool OurReader::decodeUnicodeEscapeSequence(Token& token, Location& current,
1724 Location end,
1725 unsigned int& ret_unicode) {
1726 if (end - current < 4)
1727 return addError(
1728 "Bad unicode escape sequence in string: four digits expected.", token,
1729 current);
1730 int unicode = 0;
1731 for (int index = 0; index < 4; ++index) {
1732 Char c = *current++;
1733 unicode *= 16;
1734 if (c >= '0' && c <= '9')
1735 unicode += c - '0';
1736 else if (c >= 'a' && c <= 'f')
1737 unicode += c - 'a' + 10;
1738 else if (c >= 'A' && c <= 'F')
1739 unicode += c - 'A' + 10;
1740 else
1741 return addError(
1742 "Bad unicode escape sequence in string: hexadecimal digit expected.",
1743 token, current);
1744 }
1745 ret_unicode = static_cast<unsigned int>(unicode);
1746 return true;
1747}
1748
1749bool OurReader::addError(const String& message, Token& token, Location extra) {
1750 ErrorInfo info;
1751 info.token_ = token;
1752 info.message_ = message;
1753 info.extra_ = extra;
1754 errors_.push_back(info);
1755 return false;
1756}
1757
1758bool OurReader::recoverFromError(TokenType skipUntilToken) {
1759 size_t errorCount = errors_.size();
1760 Token skip;
1761 for (;;) {
1762 if (!readToken(skip))
1763 errors_.resize(errorCount); // discard errors caused by recovery
1764 if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
1765 break;
1766 }
1767 errors_.resize(errorCount);
1768 return false;
1769}
1770
1771bool OurReader::addErrorAndRecover(const String& message, Token& token,
1772 TokenType skipUntilToken) {
1773 addError(message, token);
1774 return recoverFromError(skipUntilToken);
1775}
1776
1777Value& OurReader::currentValue() { return *(nodes_.top()); }
1778
1779OurReader::Char OurReader::getNextChar() {
1780 if (current_ == end_)
1781 return 0;
1782 return *current_++;
1783}
1784
1785void OurReader::getLocationLineAndColumn(Location location, int& line,
1786 int& column) const {
1787 Location current = begin_;
1788 Location lastLineStart = current;
1789 line = 0;
1790 while (current < location && current != end_) {
1791 Char c = *current++;
1792 if (c == '\r') {
1793 if (current != end_ && *current == '\n')
1794 ++current;
1795 lastLineStart = current;
1796 ++line;
1797 } else if (c == '\n') {
1798 lastLineStart = current;
1799 ++line;
1800 }
1801 }
1802 // column & line start at 1
1803 column = int(location - lastLineStart) + 1;
1804 ++line;
1805}
1806
1807String OurReader::getLocationLineAndColumn(Location location) const {
1808 int line, column;
1809 getLocationLineAndColumn(location, line, column);
1810 char buffer[18 + 16 + 16 + 1];
1811 jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
1812 return buffer;
1813}
1814
1815String OurReader::getFormattedErrorMessages() const {
1816 String formattedMessage;
1817 for (const auto& error : errors_) {
1818 formattedMessage +=
1819 "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
1820 formattedMessage += " " + error.message_ + "\n";
1821 if (error.extra_)
1822 formattedMessage +=
1823 "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
1824 }
1825 return formattedMessage;
1826}
1827
1828std::vector<CharReader::StructuredError>
1829OurReader::getStructuredErrors() const {
1830 std::vector<CharReader::StructuredError> allErrors;
1831 for (const auto& error : errors_) {
1832 CharReader::StructuredError structured;
1833 structured.offset_start = error.token_.start_ - begin_;
1834 structured.offset_limit = error.token_.end_ - begin_;
1835 structured.message = error.message_;
1836 allErrors.push_back(structured);
1837 }
1838 return allErrors;
1839}
1840
1841class OurCharReader : public CharReader {
1842
1843public:
1844 OurCharReader(bool collectComments, OurFeatures const& features)
1845 : CharReader(
1846 std::unique_ptr<OurImpl>(new OurImpl(collectComments, features))) {}
1847
1848protected:
1849 class OurImpl : public Impl {
1850 public:
1851 OurImpl(bool collectComments, OurFeatures const& features)
1852 : collectComments_(collectComments), reader_(features) {}
1853
1854 bool parse(char const* beginDoc, char const* endDoc, Value* root,
1855 String* errs) override {
1856 bool ok = reader_.parse(beginDoc, endDoc, *root, collectComments_);
1857 if (errs) {
1858 *errs = reader_.getFormattedErrorMessages();
1859 }
1860 return ok;
1861 }
1862
1863 std::vector<CharReader::StructuredError>
1864 getStructuredErrors() const override {
1865 return reader_.getStructuredErrors();
1866 }
1867
1868 private:
1869 bool const collectComments_;
1870 OurReader reader_;
1871 };
1872};
1873
1874CharReaderBuilder::CharReaderBuilder() { setDefaults(&settings_); }
1875CharReaderBuilder::~CharReaderBuilder() = default;
1877 bool collectComments = settings_["collectComments"].asBool();
1878 OurFeatures features = OurFeatures::all();
1879 features.allowComments_ = settings_["allowComments"].asBool();
1880 features.allowTrailingCommas_ = settings_["allowTrailingCommas"].asBool();
1881 features.strictRoot_ = settings_["strictRoot"].asBool();
1882 features.allowDroppedNullPlaceholders_ =
1883 settings_["allowDroppedNullPlaceholders"].asBool();
1884 features.allowNumericKeys_ = settings_["allowNumericKeys"].asBool();
1885 features.allowSingleQuotes_ = settings_["allowSingleQuotes"].asBool();
1886
1887 // Stack limit is always a size_t, so we get this as an unsigned int
1888 // regardless of it we have 64-bit integer support enabled.
1889 features.stackLimit_ = static_cast<size_t>(settings_["stackLimit"].asUInt());
1890 features.failIfExtra_ = settings_["failIfExtra"].asBool();
1891 features.rejectDupKeys_ = settings_["rejectDupKeys"].asBool();
1892 features.allowSpecialFloats_ = settings_["allowSpecialFloats"].asBool();
1893 features.skipBom_ = settings_["skipBom"].asBool();
1894 return new OurCharReader(collectComments, features);
1895}
1896
1898 static const auto& valid_keys = *new std::set<String>{
1899 "collectComments",
1900 "allowComments",
1901 "allowTrailingCommas",
1902 "strictRoot",
1903 "allowDroppedNullPlaceholders",
1904 "allowNumericKeys",
1905 "allowSingleQuotes",
1906 "stackLimit",
1907 "failIfExtra",
1908 "rejectDupKeys",
1909 "allowSpecialFloats",
1910 "skipBom",
1911 };
1912 for (auto si = settings_.begin(); si != settings_.end(); ++si) {
1913 auto key = si.name();
1914 if (valid_keys.count(key))
1915 continue;
1916 if (invalid)
1917 (*invalid)[key] = *si;
1918 else
1919 return false;
1920 }
1921 return invalid ? invalid->empty() : true;
1922}
1923
1925 return settings_[key];
1926}
1927// static
1930 (*settings)["allowComments"] = false;
1931 (*settings)["allowTrailingCommas"] = false;
1932 (*settings)["strictRoot"] = true;
1933 (*settings)["allowDroppedNullPlaceholders"] = false;
1934 (*settings)["allowNumericKeys"] = false;
1935 (*settings)["allowSingleQuotes"] = false;
1936 (*settings)["stackLimit"] = 256;
1937 (*settings)["failIfExtra"] = true;
1938 (*settings)["rejectDupKeys"] = true;
1939 (*settings)["allowSpecialFloats"] = false;
1940 (*settings)["skipBom"] = true;
1942}
1943// static
1946 (*settings)["collectComments"] = true;
1947 (*settings)["allowComments"] = true;
1948 (*settings)["allowTrailingCommas"] = true;
1949 (*settings)["strictRoot"] = false;
1950 (*settings)["allowDroppedNullPlaceholders"] = false;
1951 (*settings)["allowNumericKeys"] = false;
1952 (*settings)["allowSingleQuotes"] = false;
1953 (*settings)["stackLimit"] = 256;
1954 (*settings)["failIfExtra"] = false;
1955 (*settings)["rejectDupKeys"] = false;
1956 (*settings)["allowSpecialFloats"] = false;
1957 (*settings)["skipBom"] = true;
1959}
1960// static
1963 (*settings)["allowComments"] = false;
1964 (*settings)["allowTrailingCommas"] = false;
1965 (*settings)["strictRoot"] = false;
1966 (*settings)["allowDroppedNullPlaceholders"] = false;
1967 (*settings)["allowNumericKeys"] = false;
1968 (*settings)["allowSingleQuotes"] = false;
1969 (*settings)["stackLimit"] = 256;
1970 (*settings)["failIfExtra"] = true;
1971 (*settings)["rejectDupKeys"] = false;
1972 (*settings)["allowSpecialFloats"] = false;
1973 (*settings)["skipBom"] = false;
1975}
1976
1977std::vector<CharReader::StructuredError>
1979 return _impl->getStructuredErrors();
1980}
1981
1982bool CharReader::parse(char const* beginDoc, char const* endDoc, Value* root,
1983 String* errs) {
1984 return _impl->parse(beginDoc, endDoc, root, errs);
1985}
1986
1988// global functions
1989
1990bool parseFromStream(CharReader::Factory const& fact, IStream& sin, Value* root,
1991 String* errs) {
1992 OStringStream ssin;
1993 ssin << sin.rdbuf();
1994 String doc = std::move(ssin).str();
1995 char const* begin = doc.data();
1996 char const* end = begin + doc.size();
1997 // Note that we do not actually need a null-terminator.
1998 CharReaderPtr const reader(fact.newCharReader());
1999 return reader->parse(begin, end, root, errs);
2000}
2001
2004 String errs;
2005 bool ok = parseFromStream(b, sin, &root, &errs);
2006 if (!ok) {
2007 throwRuntimeError(errs);
2008 }
2009 return sin;
2010}
2011
2012} // namespace Json
virtual CharReader * newCharReader() const =0
Allocate a CharReader via operator new().
virtual std::vector< StructuredError > getStructuredErrors() const =0
Build a CharReader implementation.
Definition reader.h:317
static void setDefaults(Json::Value *settings)
Called by ctor, but you can use this to reset settings_.
static void ecma404Mode(Json::Value *settings)
ECMA-404 mode.
Value & operator[](const String &key)
A simple way to update a specific setting.
static void strictMode(Json::Value *settings)
Same as old Features::strictMode().
bool validate(Json::Value *invalid) const
Configuration of this builder.
Interface for reading JSON from a char array.
Definition reader.h:248
CharReader(std::unique_ptr< Impl > impl)
Definition reader.h:299
std::vector< StructuredError > getStructuredErrors() const
Returns a vector of structured errors encountered while parsing.
virtual bool parse(char const *beginDoc, char const *endDoc, Value *root, String *errs)
Read a Value from a JSON document.
Configuration passed to reader and writer.
bool strictRoot_
true if root must be either an array or an object value.
bool allowComments_
true if comments are allowed. Default: true.
bool allowDroppedNullPlaceholders_
true if dropped null placeholders are allowed. Default: false.
static Features all()
A configuration that allows all features and assumes all strings are UTF-8.
Features()
Initialize the configuration like JsonConfig::allFeatures;.
static Features strictMode()
A configuration that is strictly compatible with the JSON specification.
bool allowNumericKeys_
true if numeric object key are allowed. Default: false.
char Char
Definition reader.h:39
Reader()
Constructs a Reader allowing all features for parsing.
bool pushError(const Value &value, const String &message)
Add a semantic error message.
bool good() const
Return whether there are any errors.
const Char * Location
Definition reader.h:40
std::vector< StructuredError > getStructuredErrors() const
Returns a vector of structured errors encountered while parsing.
bool parse(const std::string &document, Value &root, bool collectComments=true)
Read a Value from a JSON document.
String getFormattedErrorMessages() const
Returns a user friendly string that list errors in the parsed document.
Represents a JSON value.
Definition value.h:207
bool empty() const
Return true if empty array, empty object, or null; otherwise, false.
static constexpr LargestInt maxLargestInt
Maximum signed integer value that can be stored in a Json::Value.
Definition value.h:241
Json::UInt UInt
Definition value.h:215
bool isArray() const
void setComment(const char *comment, size_t len, CommentPlacement placement)
Comments must be //... or /* ... *‍/.
Definition value.h:650
ptrdiff_t getOffsetLimit() const
void swapPayload(Value &other)
Swap values but leave comments and source offsets in place.
void setOffsetLimit(ptrdiff_t limit)
Json::LargestInt LargestInt
Definition value.h:221
Json::LargestUInt LargestUInt
Definition value.h:222
bool isObject() const
void setOffsetStart(ptrdiff_t start)
static constexpr Int maxInt
Maximum signed int value that can be stored in a Json::Value.
Definition value.h:248
static constexpr LargestUInt maxLargestUInt
Maximum unsigned integer value that can be stored in a Json::Value.
Definition value.h:243
static constexpr LargestInt minLargestInt
Minimum signed integer value that can be stored in a Json::Value.
Definition value.h:238
ptrdiff_t getOffsetStart() const
#define jsoncpp_snprintf
Definition config.h:63
#define JSONCPP_DEPRECATED_STACK_LIMIT
static size_t const stackLimit_g
JSON (JavaScript Object Notation).
Definition allocator.h:16
std::basic_ostringstream< String::value_type, String::traits_type, String::allocator_type > OStringStream
Definition config.h:136
CommentPlacement
Definition value.h:132
@ commentAfterOnSameLine
a comment just after a value on the same line
Definition value.h:134
@ commentBefore
a comment placed on the line before a value
Definition value.h:133
@ commentAfter
a comment on the line after a value (only make sense for
Definition value.h:135
std::basic_istringstream< String::value_type, String::traits_type, String::allocator_type > IStringStream
Definition config.h:133
std::unique_ptr< CharReader > CharReaderPtr
@ arrayValue
array value (ordered list)
Definition value.h:128
@ objectValue
object value (collection of name/value pairs).
Definition value.h:129
std::istream IStream
Definition config.h:139
static String codePointToUTF8(unsigned int cp)
Converts a unicode code-point to UTF-8.
Definition json_tool.h:39
std::basic_string< char, std::char_traits< char >, Allocator< char > > String
Definition config.h:132
IStream & operator>>(IStream &, Value &)
Read from 'sin' into 'root'.
bool parseFromStream(CharReader::Factory const &, IStream &, Value *root, String *errs)
Consume entire stream and use its begin/end.
An error tagged with where in the JSON text it was encountered.
Definition reader.h:47