Skip to content

Commit ec28acb

Browse files
committed
De-templatize stage2::structural_parser
1 parent ee6647c commit ec28acb

File tree

6 files changed

+121
-175
lines changed

6 files changed

+121
-175
lines changed

src/arm64/dom_parser_implementation.cpp

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -145,19 +145,15 @@ WARN_UNUSED bool implementation::validate_utf8(const char *buf, size_t len) cons
145145
}
146146

147147
WARN_UNUSED error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept {
148-
if (auto error = stage2::structural_parser<stage2::tape_builder>::parse<false>(*this, _doc)) { return error; }
149-
150-
// If we didn't make it to the end, it's an error
151-
if ( next_structural_index != n_structural_indexes ) {
152-
logger::log_string("More than one JSON value at the root of the document, or extra characters at the end of the JSON!");
153-
return TAPE_ERROR;
154-
}
155-
156-
return SUCCESS;
148+
doc = &_doc;
149+
stage2::tape_builder builder(*doc);
150+
return stage2::structural_parser::parse<false>(*this, builder);
157151
}
158152

159153
WARN_UNUSED error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept {
160-
return stage2::structural_parser<stage2::tape_builder>::parse<true>(*this, _doc);
154+
doc = &_doc;
155+
stage2::tape_builder builder(_doc);
156+
return stage2::structural_parser::parse<true>(*this, builder);
161157
}
162158

163159
WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept {

src/fallback/dom_parser_implementation.cpp

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -322,19 +322,15 @@ namespace {
322322
namespace SIMDJSON_IMPLEMENTATION {
323323

324324
WARN_UNUSED error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept {
325-
if (auto error = stage2::structural_parser<stage2::tape_builder>::parse<false>(*this, _doc)) { return error; }
326-
327-
// If we didn't make it to the end, it's an error
328-
if ( next_structural_index != n_structural_indexes ) {
329-
logger::log_string("More than one JSON value at the root of the document, or extra characters at the end of the JSON!");
330-
return TAPE_ERROR;
331-
}
332-
333-
return SUCCESS;
325+
doc = &_doc;
326+
stage2::tape_builder builder(*doc);
327+
return stage2::structural_parser::parse<false>(*this, builder);
334328
}
335329

336330
WARN_UNUSED error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept {
337-
return stage2::structural_parser<stage2::tape_builder>::parse<true>(*this, _doc);
331+
doc = &_doc;
332+
stage2::tape_builder builder(_doc);
333+
return stage2::structural_parser::parse<true>(*this, builder);
338334
}
339335

340336
WARN_UNUSED error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept {

src/generic/stage2/structural_parser.h

Lines changed: 68 additions & 111 deletions
Original file line numberDiff line numberDiff line change
@@ -12,62 +12,50 @@ namespace stage2 {
1212

1313
#define SIMDJSON_TRY(EXPR) { auto _err = (EXPR); if (_err) { return _err; } }
1414

15-
template<typename T>
1615
struct structural_parser : structural_iterator {
17-
/** Receiver that actually parses the strings and builds the tape */
18-
T builder;
1916
/** Current depth (nested objects and arrays) */
2017
uint32_t depth{0};
2118

22-
template<bool STREAMING>
23-
WARN_UNUSED static really_inline error_code parse(dom_parser_implementation &dom_parser, dom::document &doc) noexcept;
19+
template<bool STREAMING, typename T>
20+
WARN_UNUSED static really_inline error_code parse(dom_parser_implementation &dom_parser, T &builder) noexcept;
2421

2522
// For non-streaming, to pass an explicit 0 as next_structural, which enables optimizations
2623
really_inline structural_parser(dom_parser_implementation &_parser, uint32_t start_structural_index)
27-
: structural_iterator(_parser, start_structural_index),
28-
builder{parser.doc->tape.get(), parser.doc->string_buf.get()} {
24+
: structural_iterator(_parser, start_structural_index) {
2925
}
3026

3127
WARN_UNUSED really_inline error_code start_document() {
32-
builder.start_document(*this);
3328
parser.is_array[depth] = false;
3429
return SUCCESS;
3530
}
36-
WARN_UNUSED really_inline error_code start_object() {
31+
template<typename T>
32+
WARN_UNUSED really_inline error_code start_object(T &builder) {
3733
depth++;
3834
if (depth >= parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; }
3935
builder.start_object(*this);
4036
parser.is_array[depth] = false;
4137
return SUCCESS;
4238
}
43-
WARN_UNUSED really_inline error_code start_array() {
39+
template<typename T>
40+
WARN_UNUSED really_inline error_code start_array(T &builder) {
4441
depth++;
4542
if (depth >= parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; }
4643
builder.start_array(*this);
4744
parser.is_array[depth] = true;
4845
return SUCCESS;
4946
}
50-
really_inline void end_object() {
51-
builder.end_object(*this);
52-
depth--;
53-
}
54-
really_inline void end_array() {
55-
builder.end_array(*this);
56-
depth--;
57-
}
58-
really_inline void end_document() {
59-
builder.end_document(*this);
60-
}
6147

62-
WARN_UNUSED really_inline bool empty_object() {
48+
template<typename T>
49+
WARN_UNUSED really_inline bool empty_object(T &builder) {
6350
if (peek_next_char() == '}') {
6451
advance_char();
6552
builder.empty_object(*this);
6653
return true;
6754
}
6855
return false;
6956
}
70-
WARN_UNUSED really_inline bool empty_array() {
57+
template<typename T>
58+
WARN_UNUSED really_inline bool empty_array(T &builder) {
7159
if (peek_next_char() == ']') {
7260
advance_char();
7361
builder.empty_array(*this);
@@ -76,60 +64,21 @@ struct structural_parser : structural_iterator {
7664
return false;
7765
}
7866

79-
really_inline void increment_count() {
80-
builder.increment_count(*this);
81-
}
82-
83-
WARN_UNUSED really_inline error_code parse_key(const uint8_t *key) {
84-
return builder.parse_key(*this, key);
85-
}
86-
WARN_UNUSED really_inline error_code parse_string(const uint8_t *value) {
87-
return builder.parse_string(*this, value);
88-
}
89-
WARN_UNUSED really_inline error_code parse_number(const uint8_t *value) {
90-
return builder.parse_number(*this, value);
91-
}
92-
WARN_UNUSED really_inline error_code parse_root_number(const uint8_t *value) {
93-
return builder.parse_root_number(*this, value);
94-
}
95-
WARN_UNUSED really_inline error_code parse_true_atom(const uint8_t *value) {
96-
return builder.parse_true_atom(*this, value);
97-
}
98-
WARN_UNUSED really_inline error_code parse_root_true_atom(const uint8_t *value) {
99-
return builder.parse_root_true_atom(*this, value);
100-
}
101-
WARN_UNUSED really_inline error_code parse_false_atom(const uint8_t *value) {
102-
return builder.parse_false_atom(*this, value);
103-
}
104-
WARN_UNUSED really_inline error_code parse_root_false_atom(const uint8_t *value) {
105-
return builder.parse_root_false_atom(*this, value);
106-
}
107-
WARN_UNUSED really_inline error_code parse_null_atom(const uint8_t *value) {
108-
return builder.parse_null_atom(*this, value);
109-
}
110-
WARN_UNUSED really_inline error_code parse_root_null_atom(const uint8_t *value) {
111-
return builder.parse_root_null_atom(*this, value);
112-
}
113-
114-
WARN_UNUSED really_inline error_code start() {
115-
logger::log_start();
116-
117-
// If there are no structurals left, return EMPTY
118-
if (at_end()) { return EMPTY; }
119-
120-
// Push the root scope (there is always at least one scope)
121-
return start_document();
122-
}
123-
67+
template<bool STREAMING>
12468
WARN_UNUSED really_inline error_code finish() {
125-
end_document();
12669
parser.next_structural_index = uint32_t(next_structural - &parser.structural_indexes[0]);
12770

12871
if (depth != 0) {
12972
log_error("Unclosed objects or arrays!");
13073
return TAPE_ERROR;
13174
}
13275

76+
// If we didn't make it to the end, it's an error
77+
if ( !STREAMING && parser.next_structural_index != parser.n_structural_indexes ) {
78+
logger::log_string("More than one JSON value at the root of the document, or extra characters at the end of the JSON!");
79+
return TAPE_ERROR;
80+
}
81+
13382
return SUCCESS;
13483
}
13584

@@ -152,12 +101,17 @@ struct structural_parser : structural_iterator {
152101
}
153102
}; // struct structural_parser
154103

155-
template<typename T>
156-
template<bool STREAMING>
157-
WARN_UNUSED really_inline error_code structural_parser<T>::parse(dom_parser_implementation &dom_parser, dom::document &doc) noexcept {
158-
dom_parser.doc = &doc;
159-
stage2::structural_parser<T> parser(dom_parser, STREAMING ? dom_parser.next_structural_index : 0);
160-
SIMDJSON_TRY( parser.start() );
104+
template<bool STREAMING, typename T>
105+
WARN_UNUSED really_inline error_code structural_parser::parse(dom_parser_implementation &dom_parser, T &builder) noexcept {
106+
stage2::structural_parser parser(dom_parser, STREAMING ? dom_parser.next_structural_index : 0);
107+
logger::log_start();
108+
109+
//
110+
// Start the document
111+
//
112+
if (parser.at_end()) { return EMPTY; }
113+
SIMDJSON_TRY( parser.start_document() );
114+
builder.start_document(parser);
161115

162116
//
163117
// Read first value
@@ -166,13 +120,13 @@ WARN_UNUSED really_inline error_code structural_parser<T>::parse(dom_parser_impl
166120
const uint8_t *value = parser.advance();
167121
switch (*value) {
168122
case '{': {
169-
if (parser.empty_object()) { goto document_end; }
170-
SIMDJSON_TRY( parser.start_object() );
123+
if (parser.empty_object(builder)) { goto document_end; }
124+
SIMDJSON_TRY( parser.start_object(builder) );
171125
goto object_begin;
172126
}
173127
case '[': {
174-
if (parser.empty_array()) { goto document_end; }
175-
SIMDJSON_TRY( parser.start_array() );
128+
if (parser.empty_array(builder)) { goto document_end; }
129+
SIMDJSON_TRY( parser.start_array(builder) );
176130
// Make sure the outer array is closed before continuing; otherwise, there are ways we could get
177131
// into memory corruption. See https://github.com/simdjson/simdjson/issues/906
178132
if (!STREAMING) {
@@ -182,14 +136,14 @@ WARN_UNUSED really_inline error_code structural_parser<T>::parse(dom_parser_impl
182136
}
183137
goto array_begin;
184138
}
185-
case '"': SIMDJSON_TRY( parser.parse_string(value) ); goto document_end;
186-
case 't': SIMDJSON_TRY( parser.parse_root_true_atom(value) ); goto document_end;
187-
case 'f': SIMDJSON_TRY( parser.parse_root_false_atom(value) ); goto document_end;
188-
case 'n': SIMDJSON_TRY( parser.parse_root_null_atom(value) ); goto document_end;
139+
case '"': SIMDJSON_TRY( builder.parse_string(parser, value) ); goto document_end;
140+
case 't': SIMDJSON_TRY( builder.parse_root_true_atom(parser, value) ); goto document_end;
141+
case 'f': SIMDJSON_TRY( builder.parse_root_false_atom(parser, value) ); goto document_end;
142+
case 'n': SIMDJSON_TRY( builder.parse_root_null_atom(parser, value) ); goto document_end;
189143
case '-':
190144
case '0': case '1': case '2': case '3': case '4':
191145
case '5': case '6': case '7': case '8': case '9':
192-
SIMDJSON_TRY( parser.parse_root_number(value) ); goto document_end;
146+
SIMDJSON_TRY( builder.parse_root_number(parser, value) ); goto document_end;
193147
default:
194148
parser.log_error("Document starts with a non-value character");
195149
return TAPE_ERROR;
@@ -205,8 +159,8 @@ object_begin: {
205159
parser.log_error("Object does not start with a key");
206160
return TAPE_ERROR;
207161
}
208-
parser.increment_count();
209-
SIMDJSON_TRY( parser.parse_key(key) );
162+
builder.increment_count(parser);
163+
SIMDJSON_TRY( builder.parse_key(parser, key) );
210164
goto object_field;
211165
} // object_begin:
212166

@@ -215,23 +169,23 @@ object_field: {
215169
const uint8_t *value = parser.advance();
216170
switch (*value) {
217171
case '{': {
218-
if (parser.empty_object()) { break; };
219-
SIMDJSON_TRY( parser.start_object() );
172+
if (parser.empty_object(builder)) { break; };
173+
SIMDJSON_TRY( parser.start_object(builder) );
220174
goto object_begin;
221175
}
222176
case '[': {
223-
if (parser.empty_array()) { break; };
224-
SIMDJSON_TRY( parser.start_array() );
177+
if (parser.empty_array(builder)) { break; };
178+
SIMDJSON_TRY( parser.start_array(builder) );
225179
goto array_begin;
226180
}
227-
case '"': SIMDJSON_TRY( parser.parse_string(value) ); break;
228-
case 't': SIMDJSON_TRY( parser.parse_true_atom(value) ); break;
229-
case 'f': SIMDJSON_TRY( parser.parse_false_atom(value) ); break;
230-
case 'n': SIMDJSON_TRY( parser.parse_null_atom(value) ); break;
181+
case '"': SIMDJSON_TRY( builder.parse_string(parser, value) ); break;
182+
case 't': SIMDJSON_TRY( builder.parse_true_atom(parser, value) ); break;
183+
case 'f': SIMDJSON_TRY( builder.parse_false_atom(parser, value) ); break;
184+
case 'n': SIMDJSON_TRY( builder.parse_null_atom(parser, value) ); break;
231185
case '-':
232186
case '0': case '1': case '2': case '3': case '4':
233187
case '5': case '6': case '7': case '8': case '9':
234-
SIMDJSON_TRY( parser.parse_number(value) ); break;
188+
SIMDJSON_TRY( builder.parse_number(parser, value) ); break;
235189
default:
236190
parser.log_error("Non-value found when value was expected!");
237191
return TAPE_ERROR;
@@ -241,14 +195,15 @@ object_field: {
241195
object_continue: {
242196
switch (parser.advance_char()) {
243197
case ',': {
244-
parser.increment_count();
198+
builder.increment_count(parser);
245199
const uint8_t *key = parser.advance();
246200
if (unlikely( *key != '"' )) { parser.log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; }
247-
SIMDJSON_TRY( parser.parse_key(key) );
201+
SIMDJSON_TRY( builder.parse_key(parser, key) );
248202
goto object_field;
249203
}
250204
case '}':
251-
parser.end_object();
205+
builder.end_object(parser);
206+
parser.depth--;
252207
goto scope_end;
253208
default:
254209
parser.log_error("No comma between object fields");
@@ -266,30 +221,30 @@ scope_end: {
266221
// Array parser states
267222
//
268223
array_begin: {
269-
parser.increment_count();
224+
builder.increment_count(parser);
270225
} // array_begin:
271226

272227
array_value: {
273228
const uint8_t *value = parser.advance();
274229
switch (*value) {
275230
case '{': {
276-
if (parser.empty_object()) { break; };
277-
SIMDJSON_TRY( parser.start_object() );
231+
if (parser.empty_object(builder)) { break; };
232+
SIMDJSON_TRY( parser.start_object(builder) );
278233
goto object_begin;
279234
}
280235
case '[': {
281-
if (parser.empty_array()) { break; };
282-
SIMDJSON_TRY( parser.start_array() );
236+
if (parser.empty_array(builder)) { break; };
237+
SIMDJSON_TRY( parser.start_array(builder) );
283238
goto array_begin;
284239
}
285-
case '"': SIMDJSON_TRY( parser.parse_string(value) ); break;
286-
case 't': SIMDJSON_TRY( parser.parse_true_atom(value) ); break;
287-
case 'f': SIMDJSON_TRY( parser.parse_false_atom(value) ); break;
288-
case 'n': SIMDJSON_TRY( parser.parse_null_atom(value) ); break;
240+
case '"': SIMDJSON_TRY( builder.parse_string(parser, value) ); break;
241+
case 't': SIMDJSON_TRY( builder.parse_true_atom(parser, value) ); break;
242+
case 'f': SIMDJSON_TRY( builder.parse_false_atom(parser, value) ); break;
243+
case 'n': SIMDJSON_TRY( builder.parse_null_atom(parser, value) ); break;
289244
case '-':
290245
case '0': case '1': case '2': case '3': case '4':
291246
case '5': case '6': case '7': case '8': case '9':
292-
SIMDJSON_TRY( parser.parse_number(value) ); break;
247+
SIMDJSON_TRY( builder.parse_number(parser, value) ); break;
293248
default:
294249
parser.log_error("Non-value found when value was expected!");
295250
return TAPE_ERROR;
@@ -299,10 +254,11 @@ array_value: {
299254
array_continue: {
300255
switch (parser.advance_char()) {
301256
case ',':
302-
parser.increment_count();
257+
builder.increment_count(parser);
303258
goto array_value;
304259
case ']':
305-
parser.end_array();
260+
builder.end_array(parser);
261+
parser.depth--;
306262
goto scope_end;
307263
default:
308264
parser.log_error("Missing comma between array values");
@@ -311,7 +267,8 @@ array_continue: {
311267
} // array_continue:
312268

313269
document_end: {
314-
return parser.finish();
270+
builder.end_document(parser);
271+
return parser.finish<STREAMING>();
315272
} // document_end:
316273

317274
} // parse_structurals()

0 commit comments

Comments
 (0)