Skip to content

Commit 83615ff

Browse files
authored
Fixes issue 1088 (simdjson#1096)
1 parent 75c75ac commit 83615ff

File tree

3 files changed

+60
-5
lines changed

3 files changed

+60
-5
lines changed

include/simdjson/dom/document_stream.h

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,28 @@ class document_stream {
121121
* may change in future versions of simdjson: we find the API somewhat
122122
* awkward and we would like to offer something friendlier.
123123
*/
124-
really_inline size_t current_index() noexcept;
124+
really_inline size_t current_index() const noexcept;
125+
/**
126+
* @private
127+
*
128+
* Gives a view of the current document.
129+
*
130+
* document_stream stream = parser.parse_many(json,window);
131+
* for(auto i = stream.begin(); i != stream.end(); ++i) {
132+
* auto doc = *i;
133+
* std::string_view v = i->source();
134+
* }
135+
*
136+
* The returned string_view instance is simply a map to the (unparsed)
137+
* source string: it may thus include white-space characters and all manner
138+
* of padding.
139+
*
140+
* This function (source()) is experimental and the usage
141+
* may change in future versions of simdjson: we find the API somewhat
142+
* awkward and we would like to offer something friendlier.
143+
*/
144+
really_inline std::string_view source() const noexcept;
145+
125146
private:
126147
really_inline iterator(document_stream &s, bool finished) noexcept;
127148
/** The document_stream we're iterating through. */

include/simdjson/inline/document_stream.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,9 +150,16 @@ inline void document_stream::start() noexcept {
150150
next();
151151
}
152152

153-
really_inline size_t document_stream::iterator::current_index() noexcept {
153+
really_inline size_t document_stream::iterator::current_index() const noexcept {
154154
return stream.doc_index;
155155
}
156+
157+
really_inline std::string_view document_stream::iterator::source() const noexcept {
158+
size_t next_doc_index = stream.batch_start + stream.parser->implementation->structural_indexes[stream.parser->implementation->next_structural_index];
159+
return std::string_view(reinterpret_cast<const char*>(stream.buf) + current_index(), next_doc_index - current_index() - 1);
160+
}
161+
162+
156163
inline void document_stream::next() noexcept {
157164
if (error) { return; }
158165

tests/document_stream_tests.cpp

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,19 @@
55
#include "simdjson.h"
66
#include "test_macros.h"
77

8+
9+
std::string trim(const std::string s) {
10+
auto start = s.begin();
11+
auto end = s.end();
12+
while (start != s.end() && std::isspace(*start)) {
13+
start++;
14+
}
15+
do {
16+
end--;
17+
} while (std::distance(start, end) > 0 && std::isspace(*end));
18+
return std::string(start, end + 1);
19+
}
20+
821
namespace document_stream_tests {
922
static simdjson::dom::document_stream parse_many_stream_return(simdjson::dom::parser &parser, simdjson::padded_string &str) {
1023
simdjson::dom::document_stream stream;
@@ -19,10 +32,18 @@ namespace document_stream_tests {
1932
}
2033
bool test_current_index() {
2134
std::cout << "Running " << __func__ << std::endl;
22-
std::string base("1 ");// one JSON!
35+
std::string base1("1 ");// one JSON!
36+
std::string base2("{\"k\":1} ");// one JSON!
37+
std::string base3("[1,2] ");// one JSON!
38+
assert(base1.size() == base2.size());
39+
assert(base2.size() == base3.size());
40+
std::vector<std::string> source_strings = {base1, base2, base3};
41+
2342
std::string json;
2443
for(size_t k = 0; k < 1000; k++) {
25-
json += base;
44+
json += base1;
45+
json += base2;
46+
json += base3;
2647
}
2748
simdjson::dom::parser parser;
2849
const size_t window = 32; // deliberately small
@@ -38,7 +59,13 @@ namespace document_stream_tests {
3859
std::cout << "expected index:" << count << std::endl;
3960
return false;
4061
}
41-
count += base.size();
62+
std::string answer = source_strings[(count / base1.size()) % source_strings.size()];
63+
if(trim(std::string(i.source())) != trim(answer)) {
64+
std::cout << "got: '" << i.source() << "'" << std::endl;
65+
std::cout << "expected : '" << answer << "'" << std::endl;
66+
return false;
67+
}
68+
count += base1.size();
4269
}
4370
return true;
4471
}

0 commit comments

Comments
 (0)