Skip to content

Commit 2a34d3e

Browse files
authored
Merge pull request #29 from b41sh/perf-add-bench
perf: Add benches for parser and get_path
2 parents 023feb4 + 94e0188 commit 2a34d3e

File tree

6 files changed

+66157
-1
lines changed

6 files changed

+66157
-1
lines changed

Cargo.toml

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,4 +36,18 @@ serde_json = { version = "1.0.95", default-features = false, features = [
3636
] }
3737

3838
[dev-dependencies]
39-
goldenfile = "1.4.5"
39+
goldenfile = "1.5.2"
40+
serde_json = "1.0.105"
41+
json-deserializer = "0.4.4"
42+
simd-json = {version = "0.10.6", features = ["allow-non-simd"]}
43+
mockalloc = "0.1.2"
44+
criterion = "0.5.1"
45+
46+
[[bench]]
47+
name = "parser"
48+
harness = false
49+
50+
[[bench]]
51+
name = "get_path"
52+
harness = false
53+

benches/get_path.rs

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
// Copyright 2023 Datafuse Labs.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
use std::fs;
16+
use std::io::Read;
17+
18+
use criterion::{criterion_group, criterion_main, Criterion};
19+
20+
fn jsonb_get(data: &[u8], paths: &[&str], expected: &str) {
21+
let paths = paths
22+
.iter()
23+
.map(|p| jsonb::jsonpath::Path::DotField(std::borrow::Cow::Borrowed(p)))
24+
.collect::<Vec<_>>();
25+
let json_path = jsonb::jsonpath::JsonPath { paths };
26+
27+
let mut result_data = vec![];
28+
let mut result_offsets = vec![];
29+
30+
jsonb::get_by_path(data, json_path, &mut result_data, &mut result_offsets);
31+
32+
let s = jsonb::as_str(&result_data).unwrap();
33+
assert_eq!(s, expected);
34+
}
35+
36+
fn serde_json_get(data: &[u8], paths: &Vec<&str>, expected: &str) {
37+
let mut v: serde_json::Value = serde_json::from_slice(data).unwrap();
38+
for path in paths {
39+
v = v.get(path).unwrap().clone();
40+
}
41+
let s = v.as_str().unwrap();
42+
assert_eq!(s, expected);
43+
}
44+
45+
fn read(file: &str) -> Vec<u8> {
46+
let mut f = fs::File::open(file).unwrap();
47+
let mut data = vec![];
48+
f.read_to_end(&mut data).unwrap();
49+
data
50+
}
51+
52+
struct TestSuite<'a> {
53+
file: &'a str,
54+
paths: Vec<&'a str>,
55+
expected: &'a str,
56+
}
57+
58+
fn add_benchmark(c: &mut Criterion) {
59+
let test_suites = vec![
60+
TestSuite {
61+
file: "canada",
62+
paths: vec!["type"],
63+
expected: "FeatureCollection",
64+
},
65+
TestSuite {
66+
file: "citm_catalog",
67+
paths: vec!["areaNames", "205705994"],
68+
expected: "1er balcon central",
69+
},
70+
TestSuite {
71+
file: "citm_catalog",
72+
paths: vec!["topicNames", "324846100"],
73+
expected: "Formations musicales",
74+
},
75+
TestSuite {
76+
file: "twitter",
77+
paths: vec!["search_metadata", "max_id_str"],
78+
expected: "505874924095815681",
79+
},
80+
];
81+
82+
for test_suite in test_suites {
83+
let bytes = read(&format!("./data/{}.json", test_suite.file));
84+
85+
let val = jsonb::parse_value(&bytes).unwrap();
86+
let jsonb_bytes = val.to_vec();
87+
88+
c.bench_function(
89+
&format!(
90+
"jsonb get {}->{}",
91+
test_suite.file,
92+
test_suite.paths.join("->")
93+
),
94+
|b| b.iter(|| jsonb_get(&jsonb_bytes, &test_suite.paths, test_suite.expected)),
95+
);
96+
97+
c.bench_function(
98+
&format!(
99+
"serde_json get {}->{}",
100+
test_suite.file,
101+
test_suite.paths.join("->")
102+
),
103+
|b| b.iter(|| serde_json_get(&bytes, &test_suite.paths, test_suite.expected)),
104+
);
105+
}
106+
}
107+
108+
criterion_group!(benches, add_benchmark);
109+
criterion_main!(benches);

benches/parser.rs

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
// Copyright 2023 Datafuse Labs.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
use std::fs;
16+
use std::io::Read;
17+
18+
use criterion::{criterion_group, criterion_main, BatchSize, Criterion};
19+
20+
fn parse_jsonb(data: &[u8]) {
21+
let _v: jsonb::Value = jsonb::parse_value(data).unwrap();
22+
}
23+
24+
fn parse_serde_json(data: &[u8]) {
25+
let _v: serde_json::Value = serde_json::from_slice(data).unwrap();
26+
}
27+
28+
fn parse_json_deserializer(data: &[u8]) {
29+
let _v: json_deserializer::Value = json_deserializer::parse(data).unwrap();
30+
}
31+
32+
fn parse_simd_json(data: &mut [u8]) {
33+
let _v = simd_json::to_borrowed_value(data).unwrap();
34+
}
35+
36+
fn read(file: &str) -> Vec<u8> {
37+
let mut f = fs::File::open(file).unwrap();
38+
let mut data = vec![];
39+
f.read_to_end(&mut data).unwrap();
40+
data
41+
}
42+
43+
fn add_benchmark(c: &mut Criterion) {
44+
let paths = fs::read_dir("./data/").unwrap();
45+
for path in paths {
46+
let file = format!("{}", path.unwrap().path().display());
47+
let bytes = read(&file);
48+
49+
c.bench_function(&format!("jsonb parse {}", file), |b| {
50+
b.iter(|| parse_jsonb(&bytes))
51+
});
52+
53+
c.bench_function(&format!("serde_json parse {}", file), |b| {
54+
b.iter(|| parse_serde_json(&bytes))
55+
});
56+
57+
c.bench_function(&format!("json_deserializer parse {}", file), |b| {
58+
b.iter(|| parse_json_deserializer(&bytes))
59+
});
60+
61+
let bytes = bytes.clone();
62+
c.bench_function(&format!("simd_json parse {}", file), move |b| {
63+
b.iter_batched(
64+
|| bytes.clone(),
65+
|mut data| parse_simd_json(&mut data),
66+
BatchSize::SmallInput,
67+
)
68+
});
69+
}
70+
}
71+
72+
criterion_group!(benches, add_benchmark);
73+
criterion_main!(benches);

data/canada.json

Lines changed: 9 additions & 0 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)