All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
reader.h
Go to the documentation of this file.
1 // Tencent is pleased to support the open source community by making RapidJSON available.
2 //
3 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
4 //
5 // Licensed under the MIT License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // http://opensource.org/licenses/MIT
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14 
15 #ifndef RAPIDJSON_READER_H_
16 #define RAPIDJSON_READER_H_
17 
18 /*! \file reader.h */
19 
20 #include "allocators.h"
21 #include "stream.h"
22 #include "encodedstream.h"
23 #include "internal/meta.h"
24 #include "internal/stack.h"
25 #include "internal/strtod.h"
26 #include <limits>
27 
28 #if defined(RAPIDJSON_SIMD) && defined(_MSC_VER)
29 #include <intrin.h>
30 #pragma intrinsic(_BitScanForward)
31 #endif
32 #ifdef RAPIDJSON_SSE42
33 #include <nmmintrin.h>
34 #elif defined(RAPIDJSON_SSE2)
35 #include <emmintrin.h>
36 #endif
37 
38 #ifdef _MSC_VER
39 RAPIDJSON_DIAG_PUSH
40 RAPIDJSON_DIAG_OFF(4127) // conditional expression is constant
41 RAPIDJSON_DIAG_OFF(4702) // unreachable code
42 #endif
43 
44 #ifdef __clang__
45 RAPIDJSON_DIAG_PUSH
46 RAPIDJSON_DIAG_OFF(old-style-cast)
47 RAPIDJSON_DIAG_OFF(padded)
48 RAPIDJSON_DIAG_OFF(switch-enum)
49 #endif
50 
51 #ifdef __GNUC__
52 RAPIDJSON_DIAG_PUSH
53 RAPIDJSON_DIAG_OFF(effc++)
54 #endif
55 
56 //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
57 #define RAPIDJSON_NOTHING /* deliberately empty */
58 #ifndef RAPIDJSON_PARSE_ERROR_EARLY_RETURN
59 #define RAPIDJSON_PARSE_ERROR_EARLY_RETURN(value) \
60  RAPIDJSON_MULTILINEMACRO_BEGIN \
61  if (RAPIDJSON_UNLIKELY(HasParseError())) { return value; } \
62  RAPIDJSON_MULTILINEMACRO_END
63 #endif
64 #define RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID \
65  RAPIDJSON_PARSE_ERROR_EARLY_RETURN(RAPIDJSON_NOTHING)
66 //!@endcond
67 
68 /*! \def RAPIDJSON_PARSE_ERROR_NORETURN
69  \ingroup RAPIDJSON_ERRORS
70  \brief Macro to indicate a parse error.
71  \param parseErrorCode \ref rapidjson::ParseErrorCode of the error
72  \param offset position of the error in JSON input (\c size_t)
73 
74  This macros can be used as a customization point for the internal
75  error handling mechanism of RapidJSON.
76 
77  A common usage model is to throw an exception instead of requiring the
78  caller to explicitly check the \ref rapidjson::GenericReader::Parse's
79  return value:
80 
81  \code
82  #define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode,offset) \
83  throw ParseException(parseErrorCode, #parseErrorCode, offset)
84 
85  #include <stdexcept> // std::runtime_error
86  #include "rapidjson/error/error.h" // rapidjson::ParseResult
87 
88  struct ParseException : std::runtime_error, rapidjson::ParseResult {
89  ParseException(rapidjson::ParseErrorCode code, const char* msg, size_t offset)
90  : std::runtime_error(msg), ParseResult(code, offset) {}
91  };
92 
93  #include "rapidjson/reader.h"
94  \endcode
95 
96  \see RAPIDJSON_PARSE_ERROR, rapidjson::GenericReader::Parse
97  */
98 #ifndef RAPIDJSON_PARSE_ERROR_NORETURN
99 #define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset) \
100  RAPIDJSON_MULTILINEMACRO_BEGIN \
101  RAPIDJSON_ASSERT(!HasParseError()); /* Error can only be assigned once */ \
102  SetParseError(parseErrorCode, offset); \
103  RAPIDJSON_MULTILINEMACRO_END
104 #endif
105 
106 /*! \def RAPIDJSON_PARSE_ERROR
107  \ingroup RAPIDJSON_ERRORS
108  \brief (Internal) macro to indicate and handle a parse error.
109  \param parseErrorCode \ref rapidjson::ParseErrorCode of the error
110  \param offset position of the error in JSON input (\c size_t)
111 
112  Invokes RAPIDJSON_PARSE_ERROR_NORETURN and stops the parsing.
113 
114  \see RAPIDJSON_PARSE_ERROR_NORETURN
115  \hideinitializer
116  */
117 #ifndef RAPIDJSON_PARSE_ERROR
118 #define RAPIDJSON_PARSE_ERROR(parseErrorCode, offset) \
119  RAPIDJSON_MULTILINEMACRO_BEGIN \
120  RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset); \
121  RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; \
122  RAPIDJSON_MULTILINEMACRO_END
123 #endif
124 
125 #include "error/error.h" // ParseErrorCode, ParseResult
126 
127 RAPIDJSON_NAMESPACE_BEGIN
128 
129 ///////////////////////////////////////////////////////////////////////////////
130 // ParseFlag
131 
132 /*! \def RAPIDJSON_PARSE_DEFAULT_FLAGS
133  \ingroup RAPIDJSON_CONFIG
134  \brief User-defined kParseDefaultFlags definition.
135 
136  User can define this as any \c ParseFlag combinations.
137 */
138 #ifndef RAPIDJSON_PARSE_DEFAULT_FLAGS
139 #define RAPIDJSON_PARSE_DEFAULT_FLAGS kParseNoFlags
140 #endif
141 
142 //! Combination of parseFlags
143 /*! \see Reader::Parse, Document::Parse, Document::ParseInsitu, Document::ParseStream
144  */
145 enum ParseFlag {
146  kParseNoFlags = 0, //!< No flags are set.
147  kParseInsituFlag = 1, //!< In-situ(destructive) parsing.
148  kParseValidateEncodingFlag = 2, //!< Validate encoding of JSON strings.
149  kParseIterativeFlag = 4, //!< Iterative(constant complexity in terms of function call stack size) parsing.
150  kParseStopWhenDoneFlag = 8, //!< After parsing a complete JSON root from stream, stop further processing the rest of stream. When this flag is used, parser will not generate kParseErrorDocumentRootNotSingular error.
151  kParseFullPrecisionFlag = 16, //!< Parse number in full precision (but slower).
152  kParseCommentsFlag = 32, //!< Allow one-line (//) and multi-line (/**/) comments.
153  kParseNumbersAsStringsFlag = 64, //!< Parse all numbers (ints/doubles) as strings.
154  kParseTrailingCommasFlag = 128, //!< Allow trailing commas at the end of objects and arrays.
155  kParseNanAndInfFlag = 256, //!< Allow parsing NaN, Inf, Infinity, -Inf and -Infinity as doubles.
156  kParseDefaultFlags = RAPIDJSON_PARSE_DEFAULT_FLAGS //!< Default parse flags. Can be customized by defining RAPIDJSON_PARSE_DEFAULT_FLAGS
157 };
158 
159 ///////////////////////////////////////////////////////////////////////////////
160 // Handler
161 
162 /*! \class rapidjson::Handler
163  \brief Concept for receiving events from GenericReader upon parsing.
164  The functions return true if no error occurs. If they return false,
165  the event publisher should terminate the process.
166 \code
167 concept Handler {
168  typename Ch;
169 
170  bool Null();
171  bool Bool(bool b);
172  bool Int(int i);
173  bool Uint(unsigned i);
174  bool Int64(int64_t i);
175  bool Uint64(uint64_t i);
176  bool Double(double d);
177  /// enabled via kParseNumbersAsStringsFlag, string is not null-terminated (use length)
178  bool RawNumber(const Ch* str, SizeType length, bool copy);
179  bool String(const Ch* str, SizeType length, bool copy);
180  bool StartObject();
181  bool Key(const Ch* str, SizeType length, bool copy);
182  bool EndObject(SizeType memberCount);
183  bool StartArray();
184  bool EndArray(SizeType elementCount);
185 };
186 \endcode
187 */
188 ///////////////////////////////////////////////////////////////////////////////
189 // BaseReaderHandler
190 
191 //! Default implementation of Handler.
192 /*! This can be used as base class of any reader handler.
193  \note implements Handler concept
194 */
195 template<typename Encoding = UTF8<>, typename Derived = void>
196 struct BaseReaderHandler {
197  typedef typename Encoding::Ch Ch;
198 
199  typedef typename internal::SelectIf<internal::IsSame<Derived, void>, BaseReaderHandler, Derived>::Type Override;
200 
201  bool Default() { return true; }
202  bool Null() { return static_cast<Override&>(*this).Default(); }
203  bool Bool(bool) { return static_cast<Override&>(*this).Default(); }
204  bool Int(int) { return static_cast<Override&>(*this).Default(); }
205  bool Uint(unsigned) { return static_cast<Override&>(*this).Default(); }
206  bool Int64(int64_t) { return static_cast<Override&>(*this).Default(); }
207  bool Uint64(uint64_t) { return static_cast<Override&>(*this).Default(); }
208  bool Double(double) { return static_cast<Override&>(*this).Default(); }
209  /// enabled via kParseNumbersAsStringsFlag, string is not null-terminated (use length)
210  bool RawNumber(const Ch* str, SizeType len, bool copy) { return static_cast<Override&>(*this).String(str, len, copy); }
211  bool String(const Ch*, SizeType, bool) { return static_cast<Override&>(*this).Default(); }
212  bool StartObject() { return static_cast<Override&>(*this).Default(); }
213  bool Key(const Ch* str, SizeType len, bool copy) { return static_cast<Override&>(*this).String(str, len, copy); }
214  bool EndObject(SizeType) { return static_cast<Override&>(*this).Default(); }
215  bool StartArray() { return static_cast<Override&>(*this).Default(); }
216  bool EndArray(SizeType) { return static_cast<Override&>(*this).Default(); }
217 };
218 
219 ///////////////////////////////////////////////////////////////////////////////
220 // StreamLocalCopy
221 
222 namespace internal {
223 
224 template<typename Stream, int = StreamTraits<Stream>::copyOptimization>
225 class StreamLocalCopy;
226 
227 //! Do copy optimization.
228 template<typename Stream>
229 class StreamLocalCopy<Stream, 1> {
230 public:
231  StreamLocalCopy(Stream& original) : s(original), original_(original) {}
232  ~StreamLocalCopy() { original_ = s; }
233 
234  Stream s;
235 
236 private:
237  StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */;
238 
239  Stream& original_;
240 };
241 
242 //! Keep reference.
243 template<typename Stream>
244 class StreamLocalCopy<Stream, 0> {
245 public:
246  StreamLocalCopy(Stream& original) : s(original) {}
247 
248  Stream& s;
249 
250 private:
251  StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */;
252 };
253 
254 } // namespace internal
255 
256 ///////////////////////////////////////////////////////////////////////////////
257 // SkipWhitespace
258 
259 //! Skip the JSON white spaces in a stream.
260 /*! \param is A input stream for skipping white spaces.
261  \note This function has SSE2/SSE4.2 specialization.
262 */
263 template<typename InputStream>
264 void SkipWhitespace(InputStream& is) {
265  internal::StreamLocalCopy<InputStream> copy(is);
266  InputStream& s(copy.s);
267 
268  typename InputStream::Ch c;
269  while ((c = s.Peek()) == ' ' || c == '\n' || c == '\r' || c == '\t')
270  s.Take();
271 }
272 
273 inline const char* SkipWhitespace(const char* p, const char* end) {
274  while (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
275  ++p;
276  return p;
277 }
278 
279 #ifdef RAPIDJSON_SSE42
280 //! Skip whitespace with SSE 4.2 pcmpistrm instruction, testing 16 8-byte characters at once.
281 inline const char *SkipWhitespace_SIMD(const char* p) {
282  // Fast return for single non-whitespace
283  if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
284  ++p;
285  else
286  return p;
287 
288  // 16-byte align to the next boundary
289  const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
290  while (p != nextAligned)
291  if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
292  ++p;
293  else
294  return p;
295 
296  // The rest of string using SIMD
297  static const char whitespace[16] = " \n\r\t";
298  const __m128i w = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespace[0]));
299 
300  for (;; p += 16) {
301  const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
302  const int r = _mm_cmpistri(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT | _SIDD_NEGATIVE_POLARITY);
303  if (r != 16) // some of characters is non-whitespace
304  return p + r;
305  }
306 }
307 
308 inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
309  // Fast return for single non-whitespace
310  if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
311  ++p;
312  else
313  return p;
314 
315  // The middle of string using SIMD
316  static const char whitespace[16] = " \n\r\t";
317  const __m128i w = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespace[0]));
318 
319  for (; p <= end - 16; p += 16) {
320  const __m128i s = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p));
321  const int r = _mm_cmpistri(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT | _SIDD_NEGATIVE_POLARITY);
322  if (r != 16) // some of characters is non-whitespace
323  return p + r;
324  }
325 
326  return SkipWhitespace(p, end);
327 }
328 
329 #elif defined(RAPIDJSON_SSE2)
330 
331 //! Skip whitespace with SSE2 instructions, testing 16 8-byte characters at once.
332 inline const char *SkipWhitespace_SIMD(const char* p) {
333  // Fast return for single non-whitespace
334  if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
335  ++p;
336  else
337  return p;
338 
339  // 16-byte align to the next boundary
340  const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
341  while (p != nextAligned)
342  if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
343  ++p;
344  else
345  return p;
346 
347  // The rest of string
348  #define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c }
349  static const char whitespaces[4][16] = { C16(' '), C16('\n'), C16('\r'), C16('\t') };
350  #undef C16
351 
352  const __m128i w0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[0][0]));
353  const __m128i w1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[1][0]));
354  const __m128i w2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[2][0]));
355  const __m128i w3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[3][0]));
356 
357  for (;; p += 16) {
358  const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
359  __m128i x = _mm_cmpeq_epi8(s, w0);
360  x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1));
361  x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2));
362  x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3));
363  unsigned short r = static_cast<unsigned short>(~_mm_movemask_epi8(x));
364  if (r != 0) { // some of characters may be non-whitespace
365 #ifdef _MSC_VER // Find the index of first non-whitespace
366  unsigned long offset;
367  _BitScanForward(&offset, r);
368  return p + offset;
369 #else
370  return p + __builtin_ffs(r) - 1;
371 #endif
372  }
373  }
374 }
375 
376 inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
377  // Fast return for single non-whitespace
378  if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
379  ++p;
380  else
381  return p;
382 
383  // The rest of string
384  #define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c }
385  static const char whitespaces[4][16] = { C16(' '), C16('\n'), C16('\r'), C16('\t') };
386  #undef C16
387 
388  const __m128i w0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[0][0]));
389  const __m128i w1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[1][0]));
390  const __m128i w2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[2][0]));
391  const __m128i w3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[3][0]));
392 
393  for (; p <= end - 16; p += 16) {
394  const __m128i s = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p));
395  __m128i x = _mm_cmpeq_epi8(s, w0);
396  x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1));
397  x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2));
398  x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3));
399  unsigned short r = static_cast<unsigned short>(~_mm_movemask_epi8(x));
400  if (r != 0) { // some of characters may be non-whitespace
401 #ifdef _MSC_VER // Find the index of first non-whitespace
402  unsigned long offset;
403  _BitScanForward(&offset, r);
404  return p + offset;
405 #else
406  return p + __builtin_ffs(r) - 1;
407 #endif
408  }
409  }
410 
411  return SkipWhitespace(p, end);
412 }
413 
414 #endif // RAPIDJSON_SSE2
415 
416 #ifdef RAPIDJSON_SIMD
417 //! Template function specialization for InsituStringStream
418 template<> inline void SkipWhitespace(InsituStringStream& is) {
419  is.src_ = const_cast<char*>(SkipWhitespace_SIMD(is.src_));
420 }
421 
422 //! Template function specialization for StringStream
423 template<> inline void SkipWhitespace(StringStream& is) {
424  is.src_ = SkipWhitespace_SIMD(is.src_);
425 }
426 
427 template<> inline void SkipWhitespace(EncodedInputStream<UTF8<>, MemoryStream>& is) {
428  is.is_.src_ = SkipWhitespace_SIMD(is.is_.src_, is.is_.end_);
429 }
430 #endif // RAPIDJSON_SIMD
431 
432 ///////////////////////////////////////////////////////////////////////////////
433 // GenericReader
434 
435 //! SAX-style JSON parser. Use \ref Reader for UTF8 encoding and default allocator.
436 /*! GenericReader parses JSON text from a stream, and send events synchronously to an
437  object implementing Handler concept.
438 
439  It needs to allocate a stack for storing a single decoded string during
440  non-destructive parsing.
441 
442  For in-situ parsing, the decoded string is directly written to the source
443  text string, no temporary buffer is required.
444 
445  A GenericReader object can be reused for parsing multiple JSON text.
446 
447  \tparam SourceEncoding Encoding of the input stream.
448  \tparam TargetEncoding Encoding of the parse output.
449  \tparam StackAllocator Allocator type for stack.
450 */
451 template <typename SourceEncoding, typename TargetEncoding, typename StackAllocator = CrtAllocator>
452 class GenericReader {
453 public:
454  typedef typename SourceEncoding::Ch Ch; //!< SourceEncoding character type
455 
456  //! Constructor.
457  /*! \param stackAllocator Optional allocator for allocating stack memory. (Only use for non-destructive parsing)
458  \param stackCapacity stack capacity in bytes for storing a single decoded string. (Only use for non-destructive parsing)
459  */
460  GenericReader(StackAllocator* stackAllocator = 0, size_t stackCapacity = kDefaultStackCapacity) : stack_(stackAllocator, stackCapacity), parseResult_() {}
461 
462  //! Parse JSON text.
463  /*! \tparam parseFlags Combination of \ref ParseFlag.
464  \tparam InputStream Type of input stream, implementing Stream concept.
465  \tparam Handler Type of handler, implementing Handler concept.
466  \param is Input stream to be parsed.
467  \param handler The handler to receive events.
468  \return Whether the parsing is successful.
469  */
470  template <unsigned parseFlags, typename InputStream, typename Handler>
471  ParseResult Parse(InputStream& is, Handler& handler) {
472  if (parseFlags & kParseIterativeFlag)
473  return IterativeParse<parseFlags>(is, handler);
474 
475  parseResult_.Clear();
476 
477  ClearStackOnExit scope(*this);
478 
479  SkipWhitespaceAndComments<parseFlags>(is);
480  RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
481 
482  if (RAPIDJSON_UNLIKELY(is.Peek() == '\0')) {
484  RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
485  }
486  else {
487  ParseValue<parseFlags>(is, handler);
488  RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
489 
490  if (!(parseFlags & kParseStopWhenDoneFlag)) {
491  SkipWhitespaceAndComments<parseFlags>(is);
492  RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
493 
494  if (RAPIDJSON_UNLIKELY(is.Peek() != '\0')) {
496  RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
497  }
498  }
499  }
500 
501  return parseResult_;
502  }
503 
504  //! Parse JSON text (with \ref kParseDefaultFlags)
505  /*! \tparam InputStream Type of input stream, implementing Stream concept
506  \tparam Handler Type of handler, implementing Handler concept.
507  \param is Input stream to be parsed.
508  \param handler The handler to receive events.
509  \return Whether the parsing is successful.
510  */
511  template <typename InputStream, typename Handler>
512  ParseResult Parse(InputStream& is, Handler& handler) {
513  return Parse<kParseDefaultFlags>(is, handler);
514  }
515 
516  //! Initialize JSON text token-by-token parsing
517  /*!
518  */
520  parseResult_.Clear();
521  state_ = IterativeParsingStartState;
522  }
523 
524  //! Parse one token from JSON text
525  /*! \tparam InputStream Type of input stream, implementing Stream concept
526  \tparam Handler Type of handler, implementing Handler concept.
527  \param is Input stream to be parsed.
528  \param handler The handler to receive events.
529  \return Whether the parsing is successful.
530  */
531  template <unsigned parseFlags, typename InputStream, typename Handler>
532  bool IterativeParseNext(InputStream& is, Handler& handler) {
533  while (RAPIDJSON_LIKELY(is.Peek() != '\0')) {
534  SkipWhitespaceAndComments<parseFlags>(is);
535 
536  Token t = Tokenize(is.Peek());
537  IterativeParsingState n = Predict(state_, t);
538  IterativeParsingState d = Transit<parseFlags>(state_, t, n, is, handler);
539 
540  // If we've finished or hit an error...
541  if (RAPIDJSON_UNLIKELY(IsIterativeParsingCompleteState(d))) {
542  // Report errors.
543  if (d == IterativeParsingErrorState) {
544  HandleError(state_, is);
545  return false;
546  }
547 
548  // Transition to the finish state.
549  RAPIDJSON_ASSERT(d == IterativeParsingFinishState);
550  state_ = d;
551 
552  // If StopWhenDone is not set...
553  if (!(parseFlags & kParseStopWhenDoneFlag)) {
554  // ... and extra non-whitespace data is found...
555  SkipWhitespaceAndComments<parseFlags>(is);
556  if (is.Peek() != '\0') {
557  // ... this is considered an error.
558  HandleError(state_, is);
559  return false;
560  }
561  }
562 
563  // Success! We are done!
564  return true;
565  }
566 
567  // Transition to the new state.
568  state_ = d;
569 
570  // If we parsed anything other than a delimiter, we invoked the handler, so we can return true now.
571  if (!IsIterativeParsingDelimiterState(n))
572  return true;
573  }
574 
575  // We reached the end of file.
576  stack_.Clear();
577 
578  if (state_ != IterativeParsingFinishState) {
579  HandleError(state_, is);
580  return false;
581  }
582 
583  return true;
584  }
585 
586  //! Check if token-by-token parsing JSON text is complete
587  /*! \return Whether the JSON has been fully decoded.
588  */
589  RAPIDJSON_FORCEINLINE bool IterativeParseComplete() {
590  return IsIterativeParsingCompleteState(state_);
591  }
592 
593  //! Whether a parse error has occured in the last parsing.
594  bool HasParseError() const { return parseResult_.IsError(); }
595 
596  //! Get the \ref ParseErrorCode of last parsing.
597  ParseErrorCode GetParseErrorCode() const { return parseResult_.Code(); }
598 
599  //! Get the position of last parsing error in input, 0 otherwise.
600  size_t GetErrorOffset() const { return parseResult_.Offset(); }
601 
602 protected:
603  void SetParseError(ParseErrorCode code, size_t offset) { parseResult_.Set(code, offset); }
604 
605 private:
606  // Prohibit copy constructor & assignment operator.
607  GenericReader(const GenericReader&);
608  GenericReader& operator=(const GenericReader&);
609 
610  void ClearStack() { stack_.Clear(); }
611 
612  // clear stack on any exit from ParseStream, e.g. due to exception
613  struct ClearStackOnExit {
614  explicit ClearStackOnExit(GenericReader& r) : r_(r) {}
615  ~ClearStackOnExit() { r_.ClearStack(); }
616  private:
617  GenericReader& r_;
618  ClearStackOnExit(const ClearStackOnExit&);
619  ClearStackOnExit& operator=(const ClearStackOnExit&);
620  };
621 
622  template<unsigned parseFlags, typename InputStream>
623  void SkipWhitespaceAndComments(InputStream& is) {
624  SkipWhitespace(is);
625 
626  if (parseFlags & kParseCommentsFlag) {
627  while (RAPIDJSON_UNLIKELY(Consume(is, '/'))) {
628  if (Consume(is, '*')) {
629  while (true) {
630  if (RAPIDJSON_UNLIKELY(is.Peek() == '\0'))
632  else if (Consume(is, '*')) {
633  if (Consume(is, '/'))
634  break;
635  }
636  else
637  is.Take();
638  }
639  }
640  else if (RAPIDJSON_LIKELY(Consume(is, '/')))
641  while (is.Peek() != '\0' && is.Take() != '\n') {}
642  else
644 
645  SkipWhitespace(is);
646  }
647  }
648  }
649 
650  // Parse object: { string : value, ... }
651  template<unsigned parseFlags, typename InputStream, typename Handler>
652  void ParseObject(InputStream& is, Handler& handler) {
653  RAPIDJSON_ASSERT(is.Peek() == '{');
654  is.Take(); // Skip '{'
655 
656  if (RAPIDJSON_UNLIKELY(!handler.StartObject()))
658 
659  SkipWhitespaceAndComments<parseFlags>(is);
660  RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
661 
662  if (Consume(is, '}')) {
663  if (RAPIDJSON_UNLIKELY(!handler.EndObject(0))) // empty object
665  return;
666  }
667 
668  for (SizeType memberCount = 0;;) {
669  if (RAPIDJSON_UNLIKELY(is.Peek() != '"'))
671 
672  ParseString<parseFlags>(is, handler, true);
673  RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
674 
675  SkipWhitespaceAndComments<parseFlags>(is);
676  RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
677 
678  if (RAPIDJSON_UNLIKELY(!Consume(is, ':')))
680 
681  SkipWhitespaceAndComments<parseFlags>(is);
682  RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
683 
684  ParseValue<parseFlags>(is, handler);
685  RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
686 
687  SkipWhitespaceAndComments<parseFlags>(is);
688  RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
689 
690  ++memberCount;
691 
692  switch (is.Peek()) {
693  case ',':
694  is.Take();
695  SkipWhitespaceAndComments<parseFlags>(is);
696  RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
697  break;
698  case '}':
699  is.Take();
700  if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount)))
702  return;
703  default:
704  RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); break; // This useless break is only for making warning and coverage happy
705  }
706 
707  if (parseFlags & kParseTrailingCommasFlag) {
708  if (is.Peek() == '}') {
709  if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount)))
711  is.Take();
712  return;
713  }
714  }
715  }
716  }
717 
718  // Parse array: [ value, ... ]
719  template<unsigned parseFlags, typename InputStream, typename Handler>
720  void ParseArray(InputStream& is, Handler& handler) {
721  RAPIDJSON_ASSERT(is.Peek() == '[');
722  is.Take(); // Skip '['
723 
724  if (RAPIDJSON_UNLIKELY(!handler.StartArray()))
726 
727  SkipWhitespaceAndComments<parseFlags>(is);
728  RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
729 
730  if (Consume(is, ']')) {
731  if (RAPIDJSON_UNLIKELY(!handler.EndArray(0))) // empty array
733  return;
734  }
735 
736  for (SizeType elementCount = 0;;) {
737  ParseValue<parseFlags>(is, handler);
738  RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
739 
740  ++elementCount;
741  SkipWhitespaceAndComments<parseFlags>(is);
742  RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
743 
744  if (Consume(is, ',')) {
745  SkipWhitespaceAndComments<parseFlags>(is);
746  RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
747  }
748  else if (Consume(is, ']')) {
749  if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount)))
751  return;
752  }
753  else
755 
756  if (parseFlags & kParseTrailingCommasFlag) {
757  if (is.Peek() == ']') {
758  if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount)))
760  is.Take();
761  return;
762  }
763  }
764  }
765  }
766 
767  template<unsigned parseFlags, typename InputStream, typename Handler>
768  void ParseNull(InputStream& is, Handler& handler) {
769  RAPIDJSON_ASSERT(is.Peek() == 'n');
770  is.Take();
771 
772  if (RAPIDJSON_LIKELY(Consume(is, 'u') && Consume(is, 'l') && Consume(is, 'l'))) {
773  if (RAPIDJSON_UNLIKELY(!handler.Null()))
775  }
776  else
778  }
779 
780  template<unsigned parseFlags, typename InputStream, typename Handler>
781  void ParseTrue(InputStream& is, Handler& handler) {
782  RAPIDJSON_ASSERT(is.Peek() == 't');
783  is.Take();
784 
785  if (RAPIDJSON_LIKELY(Consume(is, 'r') && Consume(is, 'u') && Consume(is, 'e'))) {
786  if (RAPIDJSON_UNLIKELY(!handler.Bool(true)))
788  }
789  else
791  }
792 
793  template<unsigned parseFlags, typename InputStream, typename Handler>
794  void ParseFalse(InputStream& is, Handler& handler) {
795  RAPIDJSON_ASSERT(is.Peek() == 'f');
796  is.Take();
797 
798  if (RAPIDJSON_LIKELY(Consume(is, 'a') && Consume(is, 'l') && Consume(is, 's') && Consume(is, 'e'))) {
799  if (RAPIDJSON_UNLIKELY(!handler.Bool(false)))
801  }
802  else
804  }
805 
806  template<typename InputStream>
807  RAPIDJSON_FORCEINLINE static bool Consume(InputStream& is, typename InputStream::Ch expect) {
808  if (RAPIDJSON_LIKELY(is.Peek() == expect)) {
809  is.Take();
810  return true;
811  }
812  else
813  return false;
814  }
815 
816  // Helper function to parse four hexidecimal digits in \uXXXX in ParseString().
817  template<typename InputStream>
818  unsigned ParseHex4(InputStream& is, size_t escapeOffset) {
819  unsigned codepoint = 0;
820  for (int i = 0; i < 4; i++) {
821  Ch c = is.Peek();
822  codepoint <<= 4;
823  codepoint += static_cast<unsigned>(c);
824  if (c >= '0' && c <= '9')
825  codepoint -= '0';
826  else if (c >= 'A' && c <= 'F')
827  codepoint -= 'A' - 10;
828  else if (c >= 'a' && c <= 'f')
829  codepoint -= 'a' - 10;
830  else {
832  RAPIDJSON_PARSE_ERROR_EARLY_RETURN(0);
833  }
834  is.Take();
835  }
836  return codepoint;
837  }
838 
839  template <typename CharType>
840  class StackStream {
841  public:
842  typedef CharType Ch;
843 
844  StackStream(internal::Stack<StackAllocator>& stack) : stack_(stack), length_(0) {}
845  RAPIDJSON_FORCEINLINE void Put(Ch c) {
846  *stack_.template Push<Ch>() = c;
847  ++length_;
848  }
849 
850  RAPIDJSON_FORCEINLINE void* Push(SizeType count) {
851  length_ += count;
852  return stack_.template Push<Ch>(count);
853  }
854 
855  size_t Length() const { return length_; }
856 
857  Ch* Pop() {
858  return stack_.template Pop<Ch>(length_);
859  }
860 
861  private:
862  StackStream(const StackStream&);
863  StackStream& operator=(const StackStream&);
864 
865  internal::Stack<StackAllocator>& stack_;
866  SizeType length_;
867  };
868 
869  // Parse string and generate String event. Different code paths for kParseInsituFlag.
870  template<unsigned parseFlags, typename InputStream, typename Handler>
871  void ParseString(InputStream& is, Handler& handler, bool isKey = false) {
872  internal::StreamLocalCopy<InputStream> copy(is);
873  InputStream& s(copy.s);
874 
875  RAPIDJSON_ASSERT(s.Peek() == '\"');
876  s.Take(); // Skip '\"'
877 
878  bool success = false;
879  if (parseFlags & kParseInsituFlag) {
880  typename InputStream::Ch *head = s.PutBegin();
881  ParseStringToStream<parseFlags, SourceEncoding, SourceEncoding>(s, s);
882  RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
883  size_t length = s.PutEnd(head) - 1;
884  RAPIDJSON_ASSERT(length <= 0xFFFFFFFF);
885  const typename TargetEncoding::Ch* const str = reinterpret_cast<typename TargetEncoding::Ch*>(head);
886  success = (isKey ? handler.Key(str, SizeType(length), false) : handler.String(str, SizeType(length), false));
887  }
888  else {
889  StackStream<typename TargetEncoding::Ch> stackStream(stack_);
890  ParseStringToStream<parseFlags, SourceEncoding, TargetEncoding>(s, stackStream);
891  RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
892  SizeType length = static_cast<SizeType>(stackStream.Length()) - 1;
893  const typename TargetEncoding::Ch* const str = stackStream.Pop();
894  success = (isKey ? handler.Key(str, length, true) : handler.String(str, length, true));
895  }
896  if (RAPIDJSON_UNLIKELY(!success))
898  }
899 
900  // Parse string to an output is
901  // This function handles the prefix/suffix double quotes, escaping, and optional encoding validation.
902  template<unsigned parseFlags, typename SEncoding, typename TEncoding, typename InputStream, typename OutputStream>
903  RAPIDJSON_FORCEINLINE void ParseStringToStream(InputStream& is, OutputStream& os) {
904 //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
905 #define Z16 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
906  static const char escape[256] = {
907  Z16, Z16, 0, 0,'\"', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'/',
908  Z16, Z16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'\\', 0, 0, 0,
909  0, 0,'\b', 0, 0, 0,'\f', 0, 0, 0, 0, 0, 0, 0,'\n', 0,
910  0, 0,'\r', 0,'\t', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
911  Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16
912  };
913 #undef Z16
914 //!@endcond
915 
916  for (;;) {
917  // Scan and copy string before "\\\"" or < 0x20. This is an optional optimzation.
918  if (!(parseFlags & kParseValidateEncodingFlag))
919  ScanCopyUnescapedString(is, os);
920 
921  Ch c = is.Peek();
922  if (RAPIDJSON_UNLIKELY(c == '\\')) { // Escape
923  size_t escapeOffset = is.Tell(); // For invalid escaping, report the inital '\\' as error offset
924  is.Take();
925  Ch e = is.Peek();
926  if ((sizeof(Ch) == 1 || unsigned(e) < 256) && RAPIDJSON_LIKELY(escape[static_cast<unsigned char>(e)])) {
927  is.Take();
928  os.Put(static_cast<typename TEncoding::Ch>(escape[static_cast<unsigned char>(e)]));
929  }
930  else if (RAPIDJSON_LIKELY(e == 'u')) { // Unicode
931  is.Take();
932  unsigned codepoint = ParseHex4(is, escapeOffset);
933  RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
934  if (RAPIDJSON_UNLIKELY(codepoint >= 0xD800 && codepoint <= 0xDBFF)) {
935  // Handle UTF-16 surrogate pair
936  if (RAPIDJSON_UNLIKELY(!Consume(is, '\\') || !Consume(is, 'u')))
938  unsigned codepoint2 = ParseHex4(is, escapeOffset);
939  RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
940  if (RAPIDJSON_UNLIKELY(codepoint2 < 0xDC00 || codepoint2 > 0xDFFF))
942  codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000;
943  }
944  TEncoding::Encode(os, codepoint);
945  }
946  else
948  }
949  else if (RAPIDJSON_UNLIKELY(c == '"')) { // Closing double quote
950  is.Take();
951  os.Put('\0'); // null-terminate the string
952  return;
953  }
954  else if (RAPIDJSON_UNLIKELY(static_cast<unsigned>(c) < 0x20)) { // RFC 4627: unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
955  if (c == '\0')
957  else
959  }
960  else {
961  size_t offset = is.Tell();
962  if (RAPIDJSON_UNLIKELY((parseFlags & kParseValidateEncodingFlag ?
963  !Transcoder<SEncoding, TEncoding>::Validate(is, os) :
964  !Transcoder<SEncoding, TEncoding>::Transcode(is, os))))
966  }
967  }
968  }
969 
970  template<typename InputStream, typename OutputStream>
971  static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InputStream&, OutputStream&) {
972  // Do nothing for generic version
973  }
974 
975 #if defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42)
976  // StringStream -> StackStream<char>
977  static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(StringStream& is, StackStream<char>& os) {
978  const char* p = is.src_;
979 
980  // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
981  const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
982  while (p != nextAligned)
983  if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
984  is.src_ = p;
985  return;
986  }
987  else
988  os.Put(*p++);
989 
990  // The rest of string using SIMD
991  static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
992  static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
993  static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
994  const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
995  const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
996  const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
997 
998  for (;; p += 16) {
999  const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
1000  const __m128i t1 = _mm_cmpeq_epi8(s, dq);
1001  const __m128i t2 = _mm_cmpeq_epi8(s, bs);
1002  const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
1003  const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
1004  unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
1005  if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped
1006  SizeType length;
1007  #ifdef _MSC_VER // Find the index of first escaped
1008  unsigned long offset;
1009  _BitScanForward(&offset, r);
1010  length = offset;
1011  #else
1012  length = static_cast<SizeType>(__builtin_ffs(r) - 1);
1013  #endif
1014  if (length != 0) {
1015  char* q = reinterpret_cast<char*>(os.Push(length));
1016  for (size_t i = 0; i < length; i++)
1017  q[i] = p[i];
1018 
1019  p += length;
1020  }
1021  break;
1022  }
1023  _mm_storeu_si128(reinterpret_cast<__m128i *>(os.Push(16)), s);
1024  }
1025 
1026  is.src_ = p;
1027  }
1028 
1029  // InsituStringStream -> InsituStringStream
1030  static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InsituStringStream& is, InsituStringStream& os) {
1031  RAPIDJSON_ASSERT(&is == &os);
1032  (void)os;
1033 
1034  if (is.src_ == is.dst_) {
1035  SkipUnescapedString(is);
1036  return;
1037  }
1038 
1039  char* p = is.src_;
1040  char *q = is.dst_;
1041 
1042  // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
1043  const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
1044  while (p != nextAligned)
1045  if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
1046  is.src_ = p;
1047  is.dst_ = q;
1048  return;
1049  }
1050  else
1051  *q++ = *p++;
1052 
1053  // The rest of string using SIMD
1054  static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
1055  static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
1056  static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
1057  const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
1058  const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
1059  const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
1060 
1061  for (;; p += 16, q += 16) {
1062  const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
1063  const __m128i t1 = _mm_cmpeq_epi8(s, dq);
1064  const __m128i t2 = _mm_cmpeq_epi8(s, bs);
1065  const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
1066  const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
1067  unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
1068  if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped
1069  size_t length;
1070 #ifdef _MSC_VER // Find the index of first escaped
1071  unsigned long offset;
1072  _BitScanForward(&offset, r);
1073  length = offset;
1074 #else
1075  length = static_cast<size_t>(__builtin_ffs(r) - 1);
1076 #endif
1077  for (const char* pend = p + length; p != pend; )
1078  *q++ = *p++;
1079  break;
1080  }
1081  _mm_storeu_si128(reinterpret_cast<__m128i *>(q), s);
1082  }
1083 
1084  is.src_ = p;
1085  is.dst_ = q;
1086  }
1087 
1088  // When read/write pointers are the same for insitu stream, just skip unescaped characters
1089  static RAPIDJSON_FORCEINLINE void SkipUnescapedString(InsituStringStream& is) {
1090  RAPIDJSON_ASSERT(is.src_ == is.dst_);
1091  char* p = is.src_;
1092 
1093  // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
1094  const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
1095  for (; p != nextAligned; p++)
1096  if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
1097  is.src_ = is.dst_ = p;
1098  return;
1099  }
1100 
1101  // The rest of string using SIMD
1102  static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
1103  static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
1104  static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
1105  const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
1106  const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
1107  const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
1108 
1109  for (;; p += 16) {
1110  const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
1111  const __m128i t1 = _mm_cmpeq_epi8(s, dq);
1112  const __m128i t2 = _mm_cmpeq_epi8(s, bs);
1113  const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
1114  const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
1115  unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
1116  if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped
1117  size_t length;
1118 #ifdef _MSC_VER // Find the index of first escaped
1119  unsigned long offset;
1120  _BitScanForward(&offset, r);
1121  length = offset;
1122 #else
1123  length = static_cast<size_t>(__builtin_ffs(r) - 1);
1124 #endif
1125  p += length;
1126  break;
1127  }
1128  }
1129 
1130  is.src_ = is.dst_ = p;
1131  }
1132 #endif
1133 
1134  template<typename InputStream, bool backup, bool pushOnTake>
1135  class NumberStream;
1136 
1137  template<typename InputStream>
1138  class NumberStream<InputStream, false, false> {
1139  public:
1140  typedef typename InputStream::Ch Ch;
1141 
1142  NumberStream(GenericReader& reader, InputStream& s) : is(s) { (void)reader; }
1143 
1144  RAPIDJSON_FORCEINLINE Ch Peek() const { return is.Peek(); }
1145  RAPIDJSON_FORCEINLINE Ch TakePush() { return is.Take(); }
1146  RAPIDJSON_FORCEINLINE Ch Take() { return is.Take(); }
1147  RAPIDJSON_FORCEINLINE void Push(char) {}
1148 
1149  size_t Tell() { return is.Tell(); }
1150  size_t Length() { return 0; }
1151  const char* Pop() { return 0; }
1152 
1153  protected:
1154  NumberStream& operator=(const NumberStream&);
1155 
1156  InputStream& is;
1157  };
1158 
1159  template<typename InputStream>
1160  class NumberStream<InputStream, true, false> : public NumberStream<InputStream, false, false> {
1161  typedef NumberStream<InputStream, false, false> Base;
1162  public:
1163  NumberStream(GenericReader& reader, InputStream& is) : Base(reader, is), stackStream(reader.stack_) {}
1164 
1165  RAPIDJSON_FORCEINLINE Ch TakePush() {
1166  stackStream.Put(static_cast<char>(Base::is.Peek()));
1167  return Base::is.Take();
1168  }
1169 
1170  RAPIDJSON_FORCEINLINE void Push(char c) {
1171  stackStream.Put(c);
1172  }
1173 
1174  size_t Length() { return stackStream.Length(); }
1175 
1176  const char* Pop() {
1177  stackStream.Put('\0');
1178  return stackStream.Pop();
1179  }
1180 
1181  private:
1182  StackStream<char> stackStream;
1183  };
1184 
1185  template<typename InputStream>
1186  class NumberStream<InputStream, true, true> : public NumberStream<InputStream, true, false> {
1187  typedef NumberStream<InputStream, true, false> Base;
1188  public:
1189  NumberStream(GenericReader& reader, InputStream& is) : Base(reader, is) {}
1190 
1191  RAPIDJSON_FORCEINLINE Ch Take() { return Base::TakePush(); }
1192  };
1193 
1194  template<unsigned parseFlags, typename InputStream, typename Handler>
1195  void ParseNumber(InputStream& is, Handler& handler) {
1196  internal::StreamLocalCopy<InputStream> copy(is);
1197  NumberStream<InputStream,
1198  ((parseFlags & kParseNumbersAsStringsFlag) != 0) ?
1199  ((parseFlags & kParseInsituFlag) == 0) :
1200  ((parseFlags & kParseFullPrecisionFlag) != 0),
1201  (parseFlags & kParseNumbersAsStringsFlag) != 0 &&
1202  (parseFlags & kParseInsituFlag) == 0> s(*this, copy.s);
1203 
1204  size_t startOffset = s.Tell();
1205  double d = 0.0;
1206  bool useNanOrInf = false;
1207 
1208  // Parse minus
1209  bool minus = Consume(s, '-');
1210 
1211  // Parse int: zero / ( digit1-9 *DIGIT )
1212  unsigned i = 0;
1213  uint64_t i64 = 0;
1214  bool use64bit = false;
1215  int significandDigit = 0;
1216  if (RAPIDJSON_UNLIKELY(s.Peek() == '0')) {
1217  i = 0;
1218  s.TakePush();
1219  }
1220  else if (RAPIDJSON_LIKELY(s.Peek() >= '1' && s.Peek() <= '9')) {
1221  i = static_cast<unsigned>(s.TakePush() - '0');
1222 
1223  if (minus)
1224  while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1225  if (RAPIDJSON_UNLIKELY(i >= 214748364)) { // 2^31 = 2147483648
1226  if (RAPIDJSON_LIKELY(i != 214748364 || s.Peek() > '8')) {
1227  i64 = i;
1228  use64bit = true;
1229  break;
1230  }
1231  }
1232  i = i * 10 + static_cast<unsigned>(s.TakePush() - '0');
1233  significandDigit++;
1234  }
1235  else
1236  while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1237  if (RAPIDJSON_UNLIKELY(i >= 429496729)) { // 2^32 - 1 = 4294967295
1238  if (RAPIDJSON_LIKELY(i != 429496729 || s.Peek() > '5')) {
1239  i64 = i;
1240  use64bit = true;
1241  break;
1242  }
1243  }
1244  i = i * 10 + static_cast<unsigned>(s.TakePush() - '0');
1245  significandDigit++;
1246  }
1247  }
1248  // Parse NaN or Infinity here
1249  else if ((parseFlags & kParseNanAndInfFlag) && RAPIDJSON_LIKELY((s.Peek() == 'I' || s.Peek() == 'N'))) {
1250  if (Consume(s, 'N')) {
1251  if (Consume(s, 'a') && Consume(s, 'N')) {
1252  d = std::numeric_limits<double>::quiet_NaN();
1253  useNanOrInf = true;
1254  }
1255  }
1256  else if (RAPIDJSON_LIKELY(Consume(s, 'I'))) {
1257  if (Consume(s, 'n') && Consume(s, 'f')) {
1258  d = (minus ? -std::numeric_limits<double>::infinity() : std::numeric_limits<double>::infinity());
1259  useNanOrInf = true;
1260 
1261  if (RAPIDJSON_UNLIKELY(s.Peek() == 'i' && !(Consume(s, 'i') && Consume(s, 'n')
1262  && Consume(s, 'i') && Consume(s, 't') && Consume(s, 'y')))) {
1264  }
1265  }
1266  }
1267 
1268  if (RAPIDJSON_UNLIKELY(!useNanOrInf)) {
1270  }
1271  }
1272  else
1274 
1275  // Parse 64bit int
1276  bool useDouble = false;
1277  if (use64bit) {
1278  if (minus)
1279  while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1280  if (RAPIDJSON_UNLIKELY(i64 >= RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC))) // 2^63 = 9223372036854775808
1281  if (RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC) || s.Peek() > '8')) {
1282  d = static_cast<double>(i64);
1283  useDouble = true;
1284  break;
1285  }
1286  i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
1287  significandDigit++;
1288  }
1289  else
1290  while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1291  if (RAPIDJSON_UNLIKELY(i64 >= RAPIDJSON_UINT64_C2(0x19999999, 0x99999999))) // 2^64 - 1 = 18446744073709551615
1292  if (RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) || s.Peek() > '5')) {
1293  d = static_cast<double>(i64);
1294  useDouble = true;
1295  break;
1296  }
1297  i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
1298  significandDigit++;
1299  }
1300  }
1301 
1302  // Force double for big integer
1303  if (useDouble) {
1304  while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1305  if (RAPIDJSON_UNLIKELY(d >= 1.7976931348623157e307)) // DBL_MAX / 10.0
1307  d = d * 10 + (s.TakePush() - '0');
1308  }
1309  }
1310 
1311  // Parse frac = decimal-point 1*DIGIT
1312  int expFrac = 0;
1313  size_t decimalPosition;
1314  if (Consume(s, '.')) {
1315  decimalPosition = s.Length();
1316 
1317  if (RAPIDJSON_UNLIKELY(!(s.Peek() >= '0' && s.Peek() <= '9')))
1319 
1320  if (!useDouble) {
1321 #if RAPIDJSON_64BIT
1322  // Use i64 to store significand in 64-bit architecture
1323  if (!use64bit)
1324  i64 = i;
1325 
1326  while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1327  if (i64 > RAPIDJSON_UINT64_C2(0x1FFFFF, 0xFFFFFFFF)) // 2^53 - 1 for fast path
1328  break;
1329  else {
1330  i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
1331  --expFrac;
1332  if (i64 != 0)
1333  significandDigit++;
1334  }
1335  }
1336 
1337  d = static_cast<double>(i64);
1338 #else
1339  // Use double to store significand in 32-bit architecture
1340  d = static_cast<double>(use64bit ? i64 : i);
1341 #endif
1342  useDouble = true;
1343  }
1344 
1345  while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1346  if (significandDigit < 17) {
1347  d = d * 10.0 + (s.TakePush() - '0');
1348  --expFrac;
1349  if (RAPIDJSON_LIKELY(d > 0.0))
1350  significandDigit++;
1351  }
1352  else
1353  s.TakePush();
1354  }
1355  }
1356  else
1357  decimalPosition = s.Length(); // decimal position at the end of integer.
1358 
1359  // Parse exp = e [ minus / plus ] 1*DIGIT
1360  int exp = 0;
1361  if (Consume(s, 'e') || Consume(s, 'E')) {
1362  if (!useDouble) {
1363  d = static_cast<double>(use64bit ? i64 : i);
1364  useDouble = true;
1365  }
1366 
1367  bool expMinus = false;
1368  if (Consume(s, '+'))
1369  ;
1370  else if (Consume(s, '-'))
1371  expMinus = true;
1372 
1373  if (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1374  exp = static_cast<int>(s.Take() - '0');
1375  if (expMinus) {
1376  while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1377  exp = exp * 10 + static_cast<int>(s.Take() - '0');
1378  if (exp >= 214748364) { // Issue #313: prevent overflow exponent
1379  while (RAPIDJSON_UNLIKELY(s.Peek() >= '0' && s.Peek() <= '9')) // Consume the rest of exponent
1380  s.Take();
1381  }
1382  }
1383  }
1384  else { // positive exp
1385  int maxExp = 308 - expFrac;
1386  while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1387  exp = exp * 10 + static_cast<int>(s.Take() - '0');
1388  if (RAPIDJSON_UNLIKELY(exp > maxExp))
1390  }
1391  }
1392  }
1393  else
1395 
1396  if (expMinus)
1397  exp = -exp;
1398  }
1399 
1400  // Finish parsing, call event according to the type of number.
1401  bool cont = true;
1402 
1403  if (parseFlags & kParseNumbersAsStringsFlag) {
1404  if (parseFlags & kParseInsituFlag) {
1405  s.Pop(); // Pop stack no matter if it will be used or not.
1406  typename InputStream::Ch* head = is.PutBegin();
1407  const size_t length = s.Tell() - startOffset;
1408  RAPIDJSON_ASSERT(length <= 0xFFFFFFFF);
1409  // unable to insert the \0 character here, it will erase the comma after this number
1410  const typename TargetEncoding::Ch* const str = reinterpret_cast<typename TargetEncoding::Ch*>(head);
1411  cont = handler.RawNumber(str, SizeType(length), false);
1412  }
1413  else {
1414  SizeType numCharsToCopy = static_cast<SizeType>(s.Length());
1415  StringStream srcStream(s.Pop());
1416  StackStream<typename TargetEncoding::Ch> dstStream(stack_);
1417  while (numCharsToCopy--) {
1418  Transcoder<UTF8<>, TargetEncoding>::Transcode(srcStream, dstStream);
1419  }
1420  dstStream.Put('\0');
1421  const typename TargetEncoding::Ch* str = dstStream.Pop();
1422  const SizeType length = static_cast<SizeType>(dstStream.Length()) - 1;
1423  cont = handler.RawNumber(str, SizeType(length), true);
1424  }
1425  }
1426  else {
1427  size_t length = s.Length();
1428  const char* decimal = s.Pop(); // Pop stack no matter if it will be used or not.
1429 
1430  if (useDouble) {
1431  int p = exp + expFrac;
1432  if (parseFlags & kParseFullPrecisionFlag)
1433  d = internal::StrtodFullPrecision(d, p, decimal, length, decimalPosition, exp);
1434  else
1435  d = internal::StrtodNormalPrecision(d, p);
1436 
1437  cont = handler.Double(minus ? -d : d);
1438  }
1439  else if (useNanOrInf) {
1440  cont = handler.Double(d);
1441  }
1442  else {
1443  if (use64bit) {
1444  if (minus)
1445  cont = handler.Int64(static_cast<int64_t>(~i64 + 1));
1446  else
1447  cont = handler.Uint64(i64);
1448  }
1449  else {
1450  if (minus)
1451  cont = handler.Int(static_cast<int32_t>(~i + 1));
1452  else
1453  cont = handler.Uint(i);
1454  }
1455  }
1456  }
1457  if (RAPIDJSON_UNLIKELY(!cont))
1459  }
1460 
1461  // Parse any JSON value
1462  template<unsigned parseFlags, typename InputStream, typename Handler>
1463  void ParseValue(InputStream& is, Handler& handler) {
1464  switch (is.Peek()) {
1465  case 'n': ParseNull <parseFlags>(is, handler); break;
1466  case 't': ParseTrue <parseFlags>(is, handler); break;
1467  case 'f': ParseFalse <parseFlags>(is, handler); break;
1468  case '"': ParseString<parseFlags>(is, handler); break;
1469  case '{': ParseObject<parseFlags>(is, handler); break;
1470  case '[': ParseArray <parseFlags>(is, handler); break;
1471  default :
1472  ParseNumber<parseFlags>(is, handler);
1473  break;
1474 
1475  }
1476  }
1477 
1478  // Iterative Parsing
1479 
1480  // States
1481  enum IterativeParsingState {
1482  IterativeParsingFinishState = 0, // sink states at top
1483  IterativeParsingErrorState, // sink states at top
1484  IterativeParsingStartState,
1485 
1486  // Object states
1487  IterativeParsingObjectInitialState,
1488  IterativeParsingMemberKeyState,
1489  IterativeParsingMemberValueState,
1490  IterativeParsingObjectFinishState,
1491 
1492  // Array states
1493  IterativeParsingArrayInitialState,
1494  IterativeParsingElementState,
1495  IterativeParsingArrayFinishState,
1496 
1497  // Single value state
1498  IterativeParsingValueState,
1499 
1500  // Delimiter states (at bottom)
1501  IterativeParsingElementDelimiterState,
1502  IterativeParsingMemberDelimiterState,
1503  IterativeParsingKeyValueDelimiterState,
1504 
1505  cIterativeParsingStateCount
1506  };
1507 
1508  // Tokens
1509  enum Token {
1510  LeftBracketToken = 0,
1511  RightBracketToken,
1512 
1513  LeftCurlyBracketToken,
1514  RightCurlyBracketToken,
1515 
1516  CommaToken,
1517  ColonToken,
1518 
1519  StringToken,
1520  FalseToken,
1521  TrueToken,
1522  NullToken,
1523  NumberToken,
1524 
1525  kTokenCount
1526  };
1527 
1528  RAPIDJSON_FORCEINLINE Token Tokenize(Ch c) {
1529 
1530 //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
1531 #define N NumberToken
1532 #define N16 N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N
1533  // Maps from ASCII to Token
1534  static const unsigned char tokenMap[256] = {
1535  N16, // 00~0F
1536  N16, // 10~1F
1537  N, N, StringToken, N, N, N, N, N, N, N, N, N, CommaToken, N, N, N, // 20~2F
1538  N, N, N, N, N, N, N, N, N, N, ColonToken, N, N, N, N, N, // 30~3F
1539  N16, // 40~4F
1540  N, N, N, N, N, N, N, N, N, N, N, LeftBracketToken, N, RightBracketToken, N, N, // 50~5F
1541  N, N, N, N, N, N, FalseToken, N, N, N, N, N, N, N, NullToken, N, // 60~6F
1542  N, N, N, N, TrueToken, N, N, N, N, N, N, LeftCurlyBracketToken, N, RightCurlyBracketToken, N, N, // 70~7F
1543  N16, N16, N16, N16, N16, N16, N16, N16 // 80~FF
1544  };
1545 #undef N
1546 #undef N16
1547 //!@endcond
1548 
1549  if (sizeof(Ch) == 1 || static_cast<unsigned>(c) < 256)
1550  return static_cast<Token>(tokenMap[static_cast<unsigned char>(c)]);
1551  else
1552  return NumberToken;
1553  }
1554 
1555  RAPIDJSON_FORCEINLINE IterativeParsingState Predict(IterativeParsingState state, Token token) {
1556  // current state x one lookahead token -> new state
1557  static const char G[cIterativeParsingStateCount][kTokenCount] = {
1558  // Finish(sink state)
1559  {
1560  IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1561  IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1562  IterativeParsingErrorState
1563  },
1564  // Error(sink state)
1565  {
1566  IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1567  IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1568  IterativeParsingErrorState
1569  },
1570  // Start
1571  {
1572  IterativeParsingArrayInitialState, // Left bracket
1573  IterativeParsingErrorState, // Right bracket
1574  IterativeParsingObjectInitialState, // Left curly bracket
1575  IterativeParsingErrorState, // Right curly bracket
1576  IterativeParsingErrorState, // Comma
1577  IterativeParsingErrorState, // Colon
1578  IterativeParsingValueState, // String
1579  IterativeParsingValueState, // False
1580  IterativeParsingValueState, // True
1581  IterativeParsingValueState, // Null
1582  IterativeParsingValueState // Number
1583  },
1584  // ObjectInitial
1585  {
1586  IterativeParsingErrorState, // Left bracket
1587  IterativeParsingErrorState, // Right bracket
1588  IterativeParsingErrorState, // Left curly bracket
1589  IterativeParsingObjectFinishState, // Right curly bracket
1590  IterativeParsingErrorState, // Comma
1591  IterativeParsingErrorState, // Colon
1592  IterativeParsingMemberKeyState, // String
1593  IterativeParsingErrorState, // False
1594  IterativeParsingErrorState, // True
1595  IterativeParsingErrorState, // Null
1596  IterativeParsingErrorState // Number
1597  },
1598  // MemberKey
1599  {
1600  IterativeParsingErrorState, // Left bracket
1601  IterativeParsingErrorState, // Right bracket
1602  IterativeParsingErrorState, // Left curly bracket
1603  IterativeParsingErrorState, // Right curly bracket
1604  IterativeParsingErrorState, // Comma
1605  IterativeParsingKeyValueDelimiterState, // Colon
1606  IterativeParsingErrorState, // String
1607  IterativeParsingErrorState, // False
1608  IterativeParsingErrorState, // True
1609  IterativeParsingErrorState, // Null
1610  IterativeParsingErrorState // Number
1611  },
1612  // MemberValue
1613  {
1614  IterativeParsingErrorState, // Left bracket
1615  IterativeParsingErrorState, // Right bracket
1616  IterativeParsingErrorState, // Left curly bracket
1617  IterativeParsingObjectFinishState, // Right curly bracket
1618  IterativeParsingMemberDelimiterState, // Comma
1619  IterativeParsingErrorState, // Colon
1620  IterativeParsingErrorState, // String
1621  IterativeParsingErrorState, // False
1622  IterativeParsingErrorState, // True
1623  IterativeParsingErrorState, // Null
1624  IterativeParsingErrorState // Number
1625  },
1626  // ObjectFinish(sink state)
1627  {
1628  IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1629  IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1630  IterativeParsingErrorState
1631  },
1632  // ArrayInitial
1633  {
1634  IterativeParsingArrayInitialState, // Left bracket(push Element state)
1635  IterativeParsingArrayFinishState, // Right bracket
1636  IterativeParsingObjectInitialState, // Left curly bracket(push Element state)
1637  IterativeParsingErrorState, // Right curly bracket
1638  IterativeParsingErrorState, // Comma
1639  IterativeParsingErrorState, // Colon
1640  IterativeParsingElementState, // String
1641  IterativeParsingElementState, // False
1642  IterativeParsingElementState, // True
1643  IterativeParsingElementState, // Null
1644  IterativeParsingElementState // Number
1645  },
1646  // Element
1647  {
1648  IterativeParsingErrorState, // Left bracket
1649  IterativeParsingArrayFinishState, // Right bracket
1650  IterativeParsingErrorState, // Left curly bracket
1651  IterativeParsingErrorState, // Right curly bracket
1652  IterativeParsingElementDelimiterState, // Comma
1653  IterativeParsingErrorState, // Colon
1654  IterativeParsingErrorState, // String
1655  IterativeParsingErrorState, // False
1656  IterativeParsingErrorState, // True
1657  IterativeParsingErrorState, // Null
1658  IterativeParsingErrorState // Number
1659  },
1660  // ArrayFinish(sink state)
1661  {
1662  IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1663  IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1664  IterativeParsingErrorState
1665  },
1666  // Single Value (sink state)
1667  {
1668  IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1669  IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1670  IterativeParsingErrorState
1671  },
1672  // ElementDelimiter
1673  {
1674  IterativeParsingArrayInitialState, // Left bracket(push Element state)
1675  IterativeParsingArrayFinishState, // Right bracket
1676  IterativeParsingObjectInitialState, // Left curly bracket(push Element state)
1677  IterativeParsingErrorState, // Right curly bracket
1678  IterativeParsingErrorState, // Comma
1679  IterativeParsingErrorState, // Colon
1680  IterativeParsingElementState, // String
1681  IterativeParsingElementState, // False
1682  IterativeParsingElementState, // True
1683  IterativeParsingElementState, // Null
1684  IterativeParsingElementState // Number
1685  },
1686  // MemberDelimiter
1687  {
1688  IterativeParsingErrorState, // Left bracket
1689  IterativeParsingErrorState, // Right bracket
1690  IterativeParsingErrorState, // Left curly bracket
1691  IterativeParsingObjectFinishState, // Right curly bracket
1692  IterativeParsingErrorState, // Comma
1693  IterativeParsingErrorState, // Colon
1694  IterativeParsingMemberKeyState, // String
1695  IterativeParsingErrorState, // False
1696  IterativeParsingErrorState, // True
1697  IterativeParsingErrorState, // Null
1698  IterativeParsingErrorState // Number
1699  },
1700  // KeyValueDelimiter
1701  {
1702  IterativeParsingArrayInitialState, // Left bracket(push MemberValue state)
1703  IterativeParsingErrorState, // Right bracket
1704  IterativeParsingObjectInitialState, // Left curly bracket(push MemberValue state)
1705  IterativeParsingErrorState, // Right curly bracket
1706  IterativeParsingErrorState, // Comma
1707  IterativeParsingErrorState, // Colon
1708  IterativeParsingMemberValueState, // String
1709  IterativeParsingMemberValueState, // False
1710  IterativeParsingMemberValueState, // True
1711  IterativeParsingMemberValueState, // Null
1712  IterativeParsingMemberValueState // Number
1713  },
1714  }; // End of G
1715 
1716  return static_cast<IterativeParsingState>(G[state][token]);
1717  }
1718 
1719  // Make an advance in the token stream and state based on the candidate destination state which was returned by Transit().
1720  // May return a new state on state pop.
1721  template <unsigned parseFlags, typename InputStream, typename Handler>
1722  RAPIDJSON_FORCEINLINE IterativeParsingState Transit(IterativeParsingState src, Token token, IterativeParsingState dst, InputStream& is, Handler& handler) {
1723  (void)token;
1724 
1725  switch (dst) {
1726  case IterativeParsingErrorState:
1727  return dst;
1728 
1729  case IterativeParsingObjectInitialState:
1730  case IterativeParsingArrayInitialState:
1731  {
1732  // Push the state(Element or MemeberValue) if we are nested in another array or value of member.
1733  // In this way we can get the correct state on ObjectFinish or ArrayFinish by frame pop.
1734  IterativeParsingState n = src;
1735  if (src == IterativeParsingArrayInitialState || src == IterativeParsingElementDelimiterState)
1736  n = IterativeParsingElementState;
1737  else if (src == IterativeParsingKeyValueDelimiterState)
1738  n = IterativeParsingMemberValueState;
1739  // Push current state.
1740  *stack_.template Push<SizeType>(1) = n;
1741  // Initialize and push the member/element count.
1742  *stack_.template Push<SizeType>(1) = 0;
1743  // Call handler
1744  bool hr = (dst == IterativeParsingObjectInitialState) ? handler.StartObject() : handler.StartArray();
1745  // On handler short circuits the parsing.
1746  if (!hr) {
1748  return IterativeParsingErrorState;
1749  }
1750  else {
1751  is.Take();
1752  return dst;
1753  }
1754  }
1755 
1756  case IterativeParsingMemberKeyState:
1757  ParseString<parseFlags>(is, handler, true);
1758  if (HasParseError())
1759  return IterativeParsingErrorState;
1760  else
1761  return dst;
1762 
1763  case IterativeParsingKeyValueDelimiterState:
1764  RAPIDJSON_ASSERT(token == ColonToken);
1765  is.Take();
1766  return dst;
1767 
1768  case IterativeParsingMemberValueState:
1769  // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
1770  ParseValue<parseFlags>(is, handler);
1771  if (HasParseError()) {
1772  return IterativeParsingErrorState;
1773  }
1774  return dst;
1775 
1776  case IterativeParsingElementState:
1777  // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
1778  ParseValue<parseFlags>(is, handler);
1779  if (HasParseError()) {
1780  return IterativeParsingErrorState;
1781  }
1782  return dst;
1783 
1784  case IterativeParsingMemberDelimiterState:
1785  case IterativeParsingElementDelimiterState:
1786  is.Take();
1787  // Update member/element count.
1788  *stack_.template Top<SizeType>() = *stack_.template Top<SizeType>() + 1;
1789  return dst;
1790 
1791  case IterativeParsingObjectFinishState:
1792  {
1793  // Transit from delimiter is only allowed when trailing commas are enabled
1794  if (!(parseFlags & kParseTrailingCommasFlag) && src == IterativeParsingMemberDelimiterState) {
1796  return IterativeParsingErrorState;
1797  }
1798  // Get member count.
1799  SizeType c = *stack_.template Pop<SizeType>(1);
1800  // If the object is not empty, count the last member.
1801  if (src == IterativeParsingMemberValueState)
1802  ++c;
1803  // Restore the state.
1804  IterativeParsingState n = static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1));
1805  // Transit to Finish state if this is the topmost scope.
1806  if (n == IterativeParsingStartState)
1807  n = IterativeParsingFinishState;
1808  // Call handler
1809  bool hr = handler.EndObject(c);
1810  // On handler short circuits the parsing.
1811  if (!hr) {
1813  return IterativeParsingErrorState;
1814  }
1815  else {
1816  is.Take();
1817  return n;
1818  }
1819  }
1820 
1821  case IterativeParsingArrayFinishState:
1822  {
1823  // Transit from delimiter is only allowed when trailing commas are enabled
1824  if (!(parseFlags & kParseTrailingCommasFlag) && src == IterativeParsingElementDelimiterState) {
1826  return IterativeParsingErrorState;
1827  }
1828  // Get element count.
1829  SizeType c = *stack_.template Pop<SizeType>(1);
1830  // If the array is not empty, count the last element.
1831  if (src == IterativeParsingElementState)
1832  ++c;
1833  // Restore the state.
1834  IterativeParsingState n = static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1));
1835  // Transit to Finish state if this is the topmost scope.
1836  if (n == IterativeParsingStartState)
1837  n = IterativeParsingFinishState;
1838  // Call handler
1839  bool hr = handler.EndArray(c);
1840  // On handler short circuits the parsing.
1841  if (!hr) {
1843  return IterativeParsingErrorState;
1844  }
1845  else {
1846  is.Take();
1847  return n;
1848  }
1849  }
1850 
1851  default:
1852  // This branch is for IterativeParsingValueState actually.
1853  // Use `default:` rather than
1854  // `case IterativeParsingValueState:` is for code coverage.
1855 
1856  // The IterativeParsingStartState is not enumerated in this switch-case.
1857  // It is impossible for that case. And it can be caught by following assertion.
1858 
1859  // The IterativeParsingFinishState is not enumerated in this switch-case either.
1860  // It is a "derivative" state which cannot triggered from Predict() directly.
1861  // Therefore it cannot happen here. And it can be caught by following assertion.
1862  RAPIDJSON_ASSERT(dst == IterativeParsingValueState);
1863 
1864  // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
1865  ParseValue<parseFlags>(is, handler);
1866  if (HasParseError()) {
1867  return IterativeParsingErrorState;
1868  }
1869  return IterativeParsingFinishState;
1870  }
1871  }
1872 
1873  template <typename InputStream>
1874  void HandleError(IterativeParsingState src, InputStream& is) {
1875  if (HasParseError()) {
1876  // Error flag has been set.
1877  return;
1878  }
1879 
1880  switch (src) {
1881  case IterativeParsingStartState: RAPIDJSON_PARSE_ERROR(kParseErrorDocumentEmpty, is.Tell()); return;
1882  case IterativeParsingFinishState: RAPIDJSON_PARSE_ERROR(kParseErrorDocumentRootNotSingular, is.Tell()); return;
1883  case IterativeParsingObjectInitialState:
1884  case IterativeParsingMemberDelimiterState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell()); return;
1885  case IterativeParsingMemberKeyState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell()); return;
1886  case IterativeParsingMemberValueState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); return;
1887  case IterativeParsingKeyValueDelimiterState:
1888  case IterativeParsingArrayInitialState:
1889  case IterativeParsingElementDelimiterState: RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell()); return;
1890  default: RAPIDJSON_ASSERT(src == IterativeParsingElementState); RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell()); return;
1891  }
1892  }
1893 
1894  RAPIDJSON_FORCEINLINE bool IsIterativeParsingDelimiterState(IterativeParsingState s) {
1895  return s >= IterativeParsingElementDelimiterState;
1896  }
1897 
1898  RAPIDJSON_FORCEINLINE bool IsIterativeParsingCompleteState(IterativeParsingState s) {
1899  return s <= IterativeParsingErrorState;
1900  }
1901 
1902  template <unsigned parseFlags, typename InputStream, typename Handler>
1903  ParseResult IterativeParse(InputStream& is, Handler& handler) {
1904  parseResult_.Clear();
1905  ClearStackOnExit scope(*this);
1906  IterativeParsingState state = IterativeParsingStartState;
1907 
1908  SkipWhitespaceAndComments<parseFlags>(is);
1909  RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
1910  while (is.Peek() != '\0') {
1911  Token t = Tokenize(is.Peek());
1912  IterativeParsingState n = Predict(state, t);
1913  IterativeParsingState d = Transit<parseFlags>(state, t, n, is, handler);
1914 
1915  if (d == IterativeParsingErrorState) {
1916  HandleError(state, is);
1917  break;
1918  }
1919 
1920  state = d;
1921 
1922  // Do not further consume streams if a root JSON has been parsed.
1923  if ((parseFlags & kParseStopWhenDoneFlag) && state == IterativeParsingFinishState)
1924  break;
1925 
1926  SkipWhitespaceAndComments<parseFlags>(is);
1927  RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
1928  }
1929 
1930  // Handle the end of file.
1931  if (state != IterativeParsingFinishState)
1932  HandleError(state, is);
1933 
1934  return parseResult_;
1935  }
1936 
1937  static const size_t kDefaultStackCapacity = 256; //!< Default stack capacity in bytes for storing a single decoded string.
1938  internal::Stack<StackAllocator> stack_; //!< A stack for storing decoded string temporarily during non-destructive parsing.
1939  ParseResult parseResult_;
1940  IterativeParsingState state_;
1941 }; // class GenericReader
1942 
1943 //! Reader with UTF8 encoding and default allocator.
1944 typedef GenericReader<UTF8<>, UTF8<> > Reader;
1945 
1946 RAPIDJSON_NAMESPACE_END
1947 
1948 #ifdef __clang__
1949 RAPIDJSON_DIAG_POP
1950 #endif
1951 
1952 
1953 #ifdef __GNUC__
1954 RAPIDJSON_DIAG_POP
1955 #endif
1956 
1957 #ifdef _MSC_VER
1958 RAPIDJSON_DIAG_POP
1959 #endif
1960 
1961 #endif // RAPIDJSON_READER_H_
Allow parsing NaN, Inf, Infinity, -Inf and -Infinity as doubles.
Definition: reader.h:155
Read-only string stream.
Definition: fwd.h:47
Iterative(constant complexity in terms of function call stack size) parsing.
Definition: reader.h:149
Concept for receiving events from GenericReader upon parsing. The functions return true if no error o...
Parse all numbers (ints/doubles) as strings.
Definition: reader.h:153
ParseResult Parse(InputStream &is, Handler &handler)
Parse JSON text.
Definition: reader.h:471
Validate encoding of JSON strings.
Definition: reader.h:148
Invalid value.
Definition: error.h:70
The surrogate pair in string is invalid.
Definition: error.h:79
#define RAPIDJSON_PARSE_ERROR(parseErrorCode, offset)
(Internal) macro to indicate and handle a parse error.
Definition: reader.h:118
#define RAPIDJSON_UINT64_C2(high32, low32)
Construct a 64-bit literal by a pair of 32-bit integer.
Definition: rapidjson.h:289
Missing a colon after a name of object member.
Definition: error.h:73
Incorrect hex digit after \u escape in string.
Definition: error.h:78
bool HasParseError() const
Whether a parse error has occured in the last parsing.
Definition: reader.h:594
Miss fraction part in number.
Definition: error.h:85
unsigned SizeType
Size type (for string lengths, array sizes, etc.)
Definition: rapidjson.h:380
bool RawNumber(const Ch *str, SizeType len, bool copy)
enabled via kParseNumbersAsStringsFlag, string is not null-terminated (use length) ...
Definition: reader.h:210
ParseErrorCode
Error code of parsing.
Definition: error.h:64
GenericReader(StackAllocator *stackAllocator=0, size_t stackCapacity=kDefaultStackCapacity)
Constructor.
Definition: reader.h:460
size_t GetErrorOffset() const
Get the position of last parsing error in input, 0 otherwise.
Definition: reader.h:600
RAPIDJSON_FORCEINLINE bool IterativeParseComplete()
Check if token-by-token parsing JSON text is complete.
Definition: reader.h:589
void Clear()
Reset error code.
Definition: error.h:128
Missing a comma or ']' after an array element.
Definition: error.h:76
GenericReader< UTF8< char >, UTF8< char >, CrtAllocator > Reader
Reader with UTF8 encoding and default allocator.
Definition: fwd.h:88
SourceEncoding::Ch Ch
SourceEncoding character type.
Definition: reader.h:454
The document root must not follow by other values.
Definition: error.h:68
const Ch * src_
Current read position.
Definition: stream.h:124
#define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset)
Macro to indicate a parse error.
Definition: reader.h:99
Allow trailing commas at the end of objects and arrays.
Definition: reader.h:154
No flags are set.
Definition: reader.h:146
#define RAPIDJSON_LIKELY(x)
Compiler branching hint for expression with high probability to be true.
Definition: rapidjson.h:455
Unspecific syntax error.
Definition: error.h:89
Parse number in full precision (but slower).
Definition: reader.h:151
Missing a closing quotation mark in string.
Definition: error.h:81
Invalid escape character in string.
Definition: error.h:80
Result of parsing (wraps ParseErrorCode)
Definition: error.h:106
Missing a name for object member.
Definition: error.h:72
Type
Type of JSON value.
Definition: rapidjson.h:603
After parsing a complete JSON root from stream, stop further processing the rest of stream...
Definition: reader.h:150
void SkipWhitespace(InputStream &is)
Skip the JSON white spaces in a stream.
Definition: reader.h:264
#define RAPIDJSON_UNLIKELY(x)
Compiler branching hint for expression with low probability to be true.
Definition: rapidjson.h:468
Default parse flags. Can be customized by defining RAPIDJSON_PARSE_DEFAULT_FLAGS. ...
Definition: reader.h:156
ParseFlag
Combination of parseFlags.
Definition: reader.h:145
Allow one-line (//) and multi-line (/**/) comments.
Definition: reader.h:152
Parsing was terminated.
Definition: error.h:88
Number too big to be stored in double.
Definition: error.h:84
Miss exponent in number.
Definition: error.h:86
void IterativeParseInit()
Initialize JSON text token-by-token parsing.
Definition: reader.h:519
In-situ(destructive) parsing.
Definition: reader.h:147
ParseResult Parse(InputStream &is, Handler &handler)
Parse JSON text (with kParseDefaultFlags)
Definition: reader.h:512
ParseErrorCode GetParseErrorCode() const
Get the ParseErrorCode of last parsing.
Definition: reader.h:597
bool IterativeParseNext(InputStream &is, Handler &handler)
Parse one token from JSON text.
Definition: reader.h:532
GenericStringStream< UTF8< char > > StringStream
String stream with UTF8 encoding.
Definition: fwd.h:47
The document is empty.
Definition: error.h:67
Missing a comma or '}' after an object member.
Definition: error.h:74
Invalid encoding in string.
Definition: error.h:82
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:402
#define RAPIDJSON_PARSE_DEFAULT_FLAGS
User-defined kParseDefaultFlags definition.
Definition: reader.h:139
A read-write string stream.
Definition: fwd.h:52