Skip to content

Commit

Permalink
Merge pull request #29 from b41sh/perf-add-bench
Browse files Browse the repository at this point in the history
perf: Add benches for parser and get_path
  • Loading branch information
sundy-li authored Aug 26, 2023
2 parents 023feb4 + 94e0188 commit 2a34d3e
Show file tree
Hide file tree
Showing 6 changed files with 66,157 additions and 1 deletion.
16 changes: 15 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,18 @@ serde_json = { version = "1.0.95", default-features = false, features = [
] }

[dev-dependencies]
goldenfile = "1.4.5"
goldenfile = "1.5.2"
serde_json = "1.0.105"
json-deserializer = "0.4.4"
simd-json = {version = "0.10.6", features = ["allow-non-simd"]}
mockalloc = "0.1.2"
criterion = "0.5.1"

[[bench]]
name = "parser"
harness = false

[[bench]]
name = "get_path"
harness = false

109 changes: 109 additions & 0 deletions benches/get_path.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
// Copyright 2023 Datafuse Labs.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use std::fs;
use std::io::Read;

use criterion::{criterion_group, criterion_main, Criterion};

fn jsonb_get(data: &[u8], paths: &[&str], expected: &str) {
let paths = paths
.iter()
.map(|p| jsonb::jsonpath::Path::DotField(std::borrow::Cow::Borrowed(p)))
.collect::<Vec<_>>();
let json_path = jsonb::jsonpath::JsonPath { paths };

let mut result_data = vec![];
let mut result_offsets = vec![];

jsonb::get_by_path(data, json_path, &mut result_data, &mut result_offsets);

let s = jsonb::as_str(&result_data).unwrap();
assert_eq!(s, expected);
}

fn serde_json_get(data: &[u8], paths: &Vec<&str>, expected: &str) {
let mut v: serde_json::Value = serde_json::from_slice(data).unwrap();
for path in paths {
v = v.get(path).unwrap().clone();
}
let s = v.as_str().unwrap();
assert_eq!(s, expected);
}

fn read(file: &str) -> Vec<u8> {
let mut f = fs::File::open(file).unwrap();
let mut data = vec![];
f.read_to_end(&mut data).unwrap();
data
}

struct TestSuite<'a> {
file: &'a str,
paths: Vec<&'a str>,
expected: &'a str,
}

fn add_benchmark(c: &mut Criterion) {
let test_suites = vec![
TestSuite {
file: "canada",
paths: vec!["type"],
expected: "FeatureCollection",
},
TestSuite {
file: "citm_catalog",
paths: vec!["areaNames", "205705994"],
expected: "1er balcon central",
},
TestSuite {
file: "citm_catalog",
paths: vec!["topicNames", "324846100"],
expected: "Formations musicales",
},
TestSuite {
file: "twitter",
paths: vec!["search_metadata", "max_id_str"],
expected: "505874924095815681",
},
];

for test_suite in test_suites {
let bytes = read(&format!("./data/{}.json", test_suite.file));

let val = jsonb::parse_value(&bytes).unwrap();
let jsonb_bytes = val.to_vec();

c.bench_function(
&format!(
"jsonb get {}->{}",
test_suite.file,
test_suite.paths.join("->")
),
|b| b.iter(|| jsonb_get(&jsonb_bytes, &test_suite.paths, test_suite.expected)),
);

c.bench_function(
&format!(
"serde_json get {}->{}",
test_suite.file,
test_suite.paths.join("->")
),
|b| b.iter(|| serde_json_get(&bytes, &test_suite.paths, test_suite.expected)),
);
}
}

criterion_group!(benches, add_benchmark);
criterion_main!(benches);
73 changes: 73 additions & 0 deletions benches/parser.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
// Copyright 2023 Datafuse Labs.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use std::fs;
use std::io::Read;

use criterion::{criterion_group, criterion_main, BatchSize, Criterion};

fn parse_jsonb(data: &[u8]) {
let _v: jsonb::Value = jsonb::parse_value(data).unwrap();
}

fn parse_serde_json(data: &[u8]) {
let _v: serde_json::Value = serde_json::from_slice(data).unwrap();
}

fn parse_json_deserializer(data: &[u8]) {
let _v: json_deserializer::Value = json_deserializer::parse(data).unwrap();
}

fn parse_simd_json(data: &mut [u8]) {
let _v = simd_json::to_borrowed_value(data).unwrap();
}

fn read(file: &str) -> Vec<u8> {
let mut f = fs::File::open(file).unwrap();
let mut data = vec![];
f.read_to_end(&mut data).unwrap();
data
}

fn add_benchmark(c: &mut Criterion) {
let paths = fs::read_dir("./data/").unwrap();
for path in paths {
let file = format!("{}", path.unwrap().path().display());
let bytes = read(&file);

c.bench_function(&format!("jsonb parse {}", file), |b| {
b.iter(|| parse_jsonb(&bytes))
});

c.bench_function(&format!("serde_json parse {}", file), |b| {
b.iter(|| parse_serde_json(&bytes))
});

c.bench_function(&format!("json_deserializer parse {}", file), |b| {
b.iter(|| parse_json_deserializer(&bytes))
});

let bytes = bytes.clone();
c.bench_function(&format!("simd_json parse {}", file), move |b| {
b.iter_batched(
|| bytes.clone(),
|mut data| parse_simd_json(&mut data),
BatchSize::SmallInput,
)
});
}
}

criterion_group!(benches, add_benchmark);
criterion_main!(benches);
9 changes: 9 additions & 0 deletions data/canada.json

Large diffs are not rendered by default.

Loading

0 comments on commit 2a34d3e

Please sign in to comment.