@@ -12,62 +12,50 @@ namespace stage2 {
12
12
13
13
#define SIMDJSON_TRY (EXPR ) { auto _err = (EXPR); if (_err) { return _err; } }
14
14
15
- template <typename T>
16
15
struct structural_parser : structural_iterator {
17
- /* * Receiver that actually parses the strings and builds the tape */
18
- T builder;
19
16
/* * Current depth (nested objects and arrays) */
20
17
uint32_t depth{0 };
21
18
22
- template <bool STREAMING>
23
- WARN_UNUSED static really_inline error_code parse (dom_parser_implementation &dom_parser, dom::document &doc ) noexcept ;
19
+ template <bool STREAMING, typename T >
20
+ WARN_UNUSED static really_inline error_code parse (dom_parser_implementation &dom_parser, T &builder ) noexcept ;
24
21
25
22
// For non-streaming, to pass an explicit 0 as next_structural, which enables optimizations
26
23
really_inline structural_parser (dom_parser_implementation &_parser, uint32_t start_structural_index)
27
- : structural_iterator(_parser, start_structural_index),
28
- builder{parser.doc ->tape .get (), parser.doc ->string_buf .get ()} {
24
+ : structural_iterator(_parser, start_structural_index) {
29
25
}
30
26
31
27
WARN_UNUSED really_inline error_code start_document () {
32
- builder.start_document (*this );
33
28
parser.is_array [depth] = false ;
34
29
return SUCCESS;
35
30
}
36
- WARN_UNUSED really_inline error_code start_object () {
31
+ template <typename T>
32
+ WARN_UNUSED really_inline error_code start_object (T &builder) {
37
33
depth++;
38
34
if (depth >= parser.max_depth ()) { log_error (" Exceeded max depth!" ); return DEPTH_ERROR; }
39
35
builder.start_object (*this );
40
36
parser.is_array [depth] = false ;
41
37
return SUCCESS;
42
38
}
43
- WARN_UNUSED really_inline error_code start_array () {
39
+ template <typename T>
40
+ WARN_UNUSED really_inline error_code start_array (T &builder) {
44
41
depth++;
45
42
if (depth >= parser.max_depth ()) { log_error (" Exceeded max depth!" ); return DEPTH_ERROR; }
46
43
builder.start_array (*this );
47
44
parser.is_array [depth] = true ;
48
45
return SUCCESS;
49
46
}
50
- really_inline void end_object () {
51
- builder.end_object (*this );
52
- depth--;
53
- }
54
- really_inline void end_array () {
55
- builder.end_array (*this );
56
- depth--;
57
- }
58
- really_inline void end_document () {
59
- builder.end_document (*this );
60
- }
61
47
62
- WARN_UNUSED really_inline bool empty_object () {
48
+ template <typename T>
49
+ WARN_UNUSED really_inline bool empty_object (T &builder) {
63
50
if (peek_next_char () == ' }' ) {
64
51
advance_char ();
65
52
builder.empty_object (*this );
66
53
return true ;
67
54
}
68
55
return false ;
69
56
}
70
- WARN_UNUSED really_inline bool empty_array () {
57
+ template <typename T>
58
+ WARN_UNUSED really_inline bool empty_array (T &builder) {
71
59
if (peek_next_char () == ' ]' ) {
72
60
advance_char ();
73
61
builder.empty_array (*this );
@@ -76,60 +64,21 @@ struct structural_parser : structural_iterator {
76
64
return false ;
77
65
}
78
66
79
- really_inline void increment_count () {
80
- builder.increment_count (*this );
81
- }
82
-
83
- WARN_UNUSED really_inline error_code parse_key (const uint8_t *key) {
84
- return builder.parse_key (*this , key);
85
- }
86
- WARN_UNUSED really_inline error_code parse_string (const uint8_t *value) {
87
- return builder.parse_string (*this , value);
88
- }
89
- WARN_UNUSED really_inline error_code parse_number (const uint8_t *value) {
90
- return builder.parse_number (*this , value);
91
- }
92
- WARN_UNUSED really_inline error_code parse_root_number (const uint8_t *value) {
93
- return builder.parse_root_number (*this , value);
94
- }
95
- WARN_UNUSED really_inline error_code parse_true_atom (const uint8_t *value) {
96
- return builder.parse_true_atom (*this , value);
97
- }
98
- WARN_UNUSED really_inline error_code parse_root_true_atom (const uint8_t *value) {
99
- return builder.parse_root_true_atom (*this , value);
100
- }
101
- WARN_UNUSED really_inline error_code parse_false_atom (const uint8_t *value) {
102
- return builder.parse_false_atom (*this , value);
103
- }
104
- WARN_UNUSED really_inline error_code parse_root_false_atom (const uint8_t *value) {
105
- return builder.parse_root_false_atom (*this , value);
106
- }
107
- WARN_UNUSED really_inline error_code parse_null_atom (const uint8_t *value) {
108
- return builder.parse_null_atom (*this , value);
109
- }
110
- WARN_UNUSED really_inline error_code parse_root_null_atom (const uint8_t *value) {
111
- return builder.parse_root_null_atom (*this , value);
112
- }
113
-
114
- WARN_UNUSED really_inline error_code start () {
115
- logger::log_start ();
116
-
117
- // If there are no structurals left, return EMPTY
118
- if (at_end ()) { return EMPTY; }
119
-
120
- // Push the root scope (there is always at least one scope)
121
- return start_document ();
122
- }
123
-
67
+ template <bool STREAMING>
124
68
WARN_UNUSED really_inline error_code finish () {
125
- end_document ();
126
69
parser.next_structural_index = uint32_t (next_structural - &parser.structural_indexes [0 ]);
127
70
128
71
if (depth != 0 ) {
129
72
log_error (" Unclosed objects or arrays!" );
130
73
return TAPE_ERROR;
131
74
}
132
75
76
+ // If we didn't make it to the end, it's an error
77
+ if ( !STREAMING && parser.next_structural_index != parser.n_structural_indexes ) {
78
+ logger::log_string (" More than one JSON value at the root of the document, or extra characters at the end of the JSON!" );
79
+ return TAPE_ERROR;
80
+ }
81
+
133
82
return SUCCESS;
134
83
}
135
84
@@ -152,12 +101,17 @@ struct structural_parser : structural_iterator {
152
101
}
153
102
}; // struct structural_parser
154
103
155
- template <typename T>
156
- template <bool STREAMING>
157
- WARN_UNUSED really_inline error_code structural_parser<T>::parse(dom_parser_implementation &dom_parser, dom::document &doc) noexcept {
158
- dom_parser.doc = &doc;
159
- stage2::structural_parser<T> parser (dom_parser, STREAMING ? dom_parser.next_structural_index : 0 );
160
- SIMDJSON_TRY ( parser.start () );
104
+ template <bool STREAMING, typename T>
105
+ WARN_UNUSED really_inline error_code structural_parser::parse (dom_parser_implementation &dom_parser, T &builder) noexcept {
106
+ stage2::structural_parser parser (dom_parser, STREAMING ? dom_parser.next_structural_index : 0 );
107
+ logger::log_start ();
108
+
109
+ //
110
+ // Start the document
111
+ //
112
+ if (parser.at_end ()) { return EMPTY; }
113
+ SIMDJSON_TRY ( parser.start_document () );
114
+ builder.start_document (parser);
161
115
162
116
//
163
117
// Read first value
@@ -166,13 +120,13 @@ WARN_UNUSED really_inline error_code structural_parser<T>::parse(dom_parser_impl
166
120
const uint8_t *value = parser.advance ();
167
121
switch (*value) {
168
122
case ' {' : {
169
- if (parser.empty_object ()) { goto document_end; }
170
- SIMDJSON_TRY ( parser.start_object () );
123
+ if (parser.empty_object (builder )) { goto document_end; }
124
+ SIMDJSON_TRY ( parser.start_object (builder ) );
171
125
goto object_begin;
172
126
}
173
127
case ' [' : {
174
- if (parser.empty_array ()) { goto document_end; }
175
- SIMDJSON_TRY ( parser.start_array () );
128
+ if (parser.empty_array (builder )) { goto document_end; }
129
+ SIMDJSON_TRY ( parser.start_array (builder ) );
176
130
// Make sure the outer array is closed before continuing; otherwise, there are ways we could get
177
131
// into memory corruption. See https://github.com/simdjson/simdjson/issues/906
178
132
if (!STREAMING) {
@@ -182,14 +136,14 @@ WARN_UNUSED really_inline error_code structural_parser<T>::parse(dom_parser_impl
182
136
}
183
137
goto array_begin;
184
138
}
185
- case ' "' : SIMDJSON_TRY ( parser .parse_string (value) ); goto document_end;
186
- case ' t' : SIMDJSON_TRY ( parser .parse_root_true_atom (value) ); goto document_end;
187
- case ' f' : SIMDJSON_TRY ( parser .parse_root_false_atom (value) ); goto document_end;
188
- case ' n' : SIMDJSON_TRY ( parser .parse_root_null_atom (value) ); goto document_end;
139
+ case ' "' : SIMDJSON_TRY ( builder .parse_string (parser, value) ); goto document_end;
140
+ case ' t' : SIMDJSON_TRY ( builder .parse_root_true_atom (parser, value) ); goto document_end;
141
+ case ' f' : SIMDJSON_TRY ( builder .parse_root_false_atom (parser, value) ); goto document_end;
142
+ case ' n' : SIMDJSON_TRY ( builder .parse_root_null_atom (parser, value) ); goto document_end;
189
143
case ' -' :
190
144
case ' 0' : case ' 1' : case ' 2' : case ' 3' : case ' 4' :
191
145
case ' 5' : case ' 6' : case ' 7' : case ' 8' : case ' 9' :
192
- SIMDJSON_TRY ( parser .parse_root_number (value) ); goto document_end;
146
+ SIMDJSON_TRY ( builder .parse_root_number (parser, value) ); goto document_end;
193
147
default :
194
148
parser.log_error (" Document starts with a non-value character" );
195
149
return TAPE_ERROR;
@@ -205,8 +159,8 @@ object_begin: {
205
159
parser.log_error (" Object does not start with a key" );
206
160
return TAPE_ERROR;
207
161
}
208
- parser .increment_count ();
209
- SIMDJSON_TRY ( parser .parse_key (key) );
162
+ builder .increment_count (parser );
163
+ SIMDJSON_TRY ( builder .parse_key (parser, key) );
210
164
goto object_field;
211
165
} // object_begin:
212
166
@@ -215,23 +169,23 @@ object_field: {
215
169
const uint8_t *value = parser.advance ();
216
170
switch (*value) {
217
171
case ' {' : {
218
- if (parser.empty_object ()) { break ; };
219
- SIMDJSON_TRY ( parser.start_object () );
172
+ if (parser.empty_object (builder )) { break ; };
173
+ SIMDJSON_TRY ( parser.start_object (builder ) );
220
174
goto object_begin;
221
175
}
222
176
case ' [' : {
223
- if (parser.empty_array ()) { break ; };
224
- SIMDJSON_TRY ( parser.start_array () );
177
+ if (parser.empty_array (builder )) { break ; };
178
+ SIMDJSON_TRY ( parser.start_array (builder ) );
225
179
goto array_begin;
226
180
}
227
- case ' "' : SIMDJSON_TRY ( parser .parse_string (value) ); break ;
228
- case ' t' : SIMDJSON_TRY ( parser .parse_true_atom (value) ); break ;
229
- case ' f' : SIMDJSON_TRY ( parser .parse_false_atom (value) ); break ;
230
- case ' n' : SIMDJSON_TRY ( parser .parse_null_atom (value) ); break ;
181
+ case ' "' : SIMDJSON_TRY ( builder .parse_string (parser, value) ); break ;
182
+ case ' t' : SIMDJSON_TRY ( builder .parse_true_atom (parser, value) ); break ;
183
+ case ' f' : SIMDJSON_TRY ( builder .parse_false_atom (parser, value) ); break ;
184
+ case ' n' : SIMDJSON_TRY ( builder .parse_null_atom (parser, value) ); break ;
231
185
case ' -' :
232
186
case ' 0' : case ' 1' : case ' 2' : case ' 3' : case ' 4' :
233
187
case ' 5' : case ' 6' : case ' 7' : case ' 8' : case ' 9' :
234
- SIMDJSON_TRY ( parser .parse_number (value) ); break ;
188
+ SIMDJSON_TRY ( builder .parse_number (parser, value) ); break ;
235
189
default :
236
190
parser.log_error (" Non-value found when value was expected!" );
237
191
return TAPE_ERROR;
@@ -241,14 +195,15 @@ object_field: {
241
195
object_continue: {
242
196
switch (parser.advance_char ()) {
243
197
case ' ,' : {
244
- parser .increment_count ();
198
+ builder .increment_count (parser );
245
199
const uint8_t *key = parser.advance ();
246
200
if (unlikely ( *key != ' "' )) { parser.log_error (" Key string missing at beginning of field in object" ); return TAPE_ERROR; }
247
- SIMDJSON_TRY ( parser .parse_key (key) );
201
+ SIMDJSON_TRY ( builder .parse_key (parser, key) );
248
202
goto object_field;
249
203
}
250
204
case ' }' :
251
- parser.end_object ();
205
+ builder.end_object (parser);
206
+ parser.depth --;
252
207
goto scope_end;
253
208
default :
254
209
parser.log_error (" No comma between object fields" );
@@ -266,30 +221,30 @@ scope_end: {
266
221
// Array parser states
267
222
//
268
223
array_begin: {
269
- parser .increment_count ();
224
+ builder .increment_count (parser );
270
225
} // array_begin:
271
226
272
227
array_value: {
273
228
const uint8_t *value = parser.advance ();
274
229
switch (*value) {
275
230
case ' {' : {
276
- if (parser.empty_object ()) { break ; };
277
- SIMDJSON_TRY ( parser.start_object () );
231
+ if (parser.empty_object (builder )) { break ; };
232
+ SIMDJSON_TRY ( parser.start_object (builder ) );
278
233
goto object_begin;
279
234
}
280
235
case ' [' : {
281
- if (parser.empty_array ()) { break ; };
282
- SIMDJSON_TRY ( parser.start_array () );
236
+ if (parser.empty_array (builder )) { break ; };
237
+ SIMDJSON_TRY ( parser.start_array (builder ) );
283
238
goto array_begin;
284
239
}
285
- case ' "' : SIMDJSON_TRY ( parser .parse_string (value) ); break ;
286
- case ' t' : SIMDJSON_TRY ( parser .parse_true_atom (value) ); break ;
287
- case ' f' : SIMDJSON_TRY ( parser .parse_false_atom (value) ); break ;
288
- case ' n' : SIMDJSON_TRY ( parser .parse_null_atom (value) ); break ;
240
+ case ' "' : SIMDJSON_TRY ( builder .parse_string (parser, value) ); break ;
241
+ case ' t' : SIMDJSON_TRY ( builder .parse_true_atom (parser, value) ); break ;
242
+ case ' f' : SIMDJSON_TRY ( builder .parse_false_atom (parser, value) ); break ;
243
+ case ' n' : SIMDJSON_TRY ( builder .parse_null_atom (parser, value) ); break ;
289
244
case ' -' :
290
245
case ' 0' : case ' 1' : case ' 2' : case ' 3' : case ' 4' :
291
246
case ' 5' : case ' 6' : case ' 7' : case ' 8' : case ' 9' :
292
- SIMDJSON_TRY ( parser .parse_number (value) ); break ;
247
+ SIMDJSON_TRY ( builder .parse_number (parser, value) ); break ;
293
248
default :
294
249
parser.log_error (" Non-value found when value was expected!" );
295
250
return TAPE_ERROR;
@@ -299,10 +254,11 @@ array_value: {
299
254
array_continue: {
300
255
switch (parser.advance_char ()) {
301
256
case ' ,' :
302
- parser .increment_count ();
257
+ builder .increment_count (parser );
303
258
goto array_value;
304
259
case ' ]' :
305
- parser.end_array ();
260
+ builder.end_array (parser);
261
+ parser.depth --;
306
262
goto scope_end;
307
263
default :
308
264
parser.log_error (" Missing comma between array values" );
@@ -311,7 +267,8 @@ array_continue: {
311
267
} // array_continue:
312
268
313
269
document_end: {
314
- return parser.finish ();
270
+ builder.end_document (parser);
271
+ return parser.finish <STREAMING>();
315
272
} // document_end:
316
273
317
274
} // parse_structurals()
0 commit comments