1717// You should have received a copy of the GNU Affero General Public License
1818// along with this program. If not, see <http://www.gnu.org/licenses/>.
1919
20- use criterion:: { criterion_group, criterion_main, Criterion , Throughput } ;
20+ use binggan:: plugins:: * ;
21+ use binggan:: { black_box, BenchRunner , PeakMemAlloc , INSTRUMENTED_SYSTEM } ;
2122use quickwit_doc_mapper:: DocMapper ;
2223use tantivy:: TantivyDocument ;
2324
24- const JSON_TEST_DATA : & str = include_str ! ( "data/simple-parse-bench.json" ) ;
25+ const SIMPLE_JSON_TEST_DATA : & str = include_str ! ( "data/simple-parse-bench.json" ) ;
26+ const ROUTING_TEST_DATA : & str = include_str ! ( "data/simple-routing-expression-bench.json" ) ;
2527
26- const DOC_MAPPER_CONF : & str = r#"{
28+ const DOC_MAPPER_CONF_SIMPLE_JSON : & str = r#"{
2729 "type": "default",
2830 "default_search_fields": [],
2931 "tag_fields": [],
@@ -35,28 +37,92 @@ const DOC_MAPPER_CONF: &str = r#"{
3537 ]
3638}"# ;
3739
38- pub fn simple_json_to_doc_benchmark ( c : & mut Criterion ) {
39- let doc_mapper: Box < DocMapper > = serde_json:: from_str ( DOC_MAPPER_CONF ) . unwrap ( ) ;
40- let lines: Vec < & str > = JSON_TEST_DATA . lines ( ) . map ( |line| line. trim ( ) ) . collect ( ) ;
40+ /// Note that {"name": "date", "type": "datetime", "input_formats": ["%Y-%m-%d"], "output_format":
41+ /// "%Y-%m-%d"}, is removed since tantivy parsing only supports RFC3339
42+ const ROUTING_DOC_MAPPER_CONF : & str = r#"{
43+ "type": "default",
44+ "default_search_fields": [],
45+ "tag_fields": [],
46+ "field_mappings": [
47+ {"name": "timestamp", "type": "datetime", "input_formats": ["unix_timestamp"], "output_format": "%Y-%m-%d %H:%M:%S", "output_format": "%Y-%m-%d %H:%M:%S", "fast": true },
48+ {"name": "source", "type": "text" },
49+ {"name": "vin", "type": "text" },
50+ {"name": "vid", "type": "text" },
51+ {"name": "domain", "type": "text" },
52+ {"name": "seller", "type": "object", "field_mappings": [
53+ {"name": "id", "type": "text" },
54+ {"name": "name", "type": "text" },
55+ {"name": "address", "type": "text" },
56+ {"name": "zip", "type": "text" }
57+ ]}
58+ ],
59+ "partition_key": "seller.id"
60+ }"# ;
61+
62+ #[ global_allocator]
63+ pub static GLOBAL : & PeakMemAlloc < std:: alloc:: System > = & INSTRUMENTED_SYSTEM ;
64+
65+ fn get_test_data (
66+ name : & ' static str ,
67+ raw : & ' static str ,
68+ doc_mapper : & ' static str ,
69+ ) -> ( & ' static str , usize , Vec < & ' static str > , Box < DocMapper > ) {
70+ let lines: Vec < & str > = raw. lines ( ) . map ( |line| line. trim ( ) ) . collect ( ) ;
71+ (
72+ name,
73+ raw. len ( ) ,
74+ lines,
75+ serde_json:: from_str ( doc_mapper) . unwrap ( ) ,
76+ )
77+ }
4178
42- let mut group = c. benchmark_group ( "simple-json-to-doc" ) ;
43- group. throughput ( Throughput :: Bytes ( JSON_TEST_DATA . len ( ) as u64 ) ) ;
44- group. bench_function ( "simple-json-to-doc" , |b| {
45- b. iter ( || {
46- for line in & lines {
47- doc_mapper. doc_from_json_str ( line) . unwrap ( ) ;
79+ fn run_bench ( ) {
80+ let inputs: Vec < ( & str , usize , Vec < & str > , Box < DocMapper > ) > = vec ! [
81+ ( get_test_data(
82+ "flat_json" ,
83+ SIMPLE_JSON_TEST_DATA ,
84+ DOC_MAPPER_CONF_SIMPLE_JSON ,
85+ ) ) ,
86+ ( get_test_data( "routing_json" , ROUTING_TEST_DATA , ROUTING_DOC_MAPPER_CONF ) ) ,
87+ ] ;
88+
89+ let mut runner: BenchRunner = BenchRunner :: new ( ) ;
90+
91+ runner. config ( ) . set_num_iter_for_bench ( 1 ) ;
92+ runner. config ( ) . set_num_iter_for_group ( 100 ) ;
93+ runner
94+ . add_plugin ( CacheTrasher :: default ( ) )
95+ . add_plugin ( BPUTrasher :: default ( ) )
96+ . add_plugin ( PeakMemAllocPlugin :: new ( GLOBAL ) ) ;
97+
98+ for ( input_name, size, data, doc_mapper) in inputs. iter ( ) {
99+ let dynamic_doc_mapper: DocMapper =
100+ serde_json:: from_str ( r#"{ "mode": "dynamic" }"# ) . unwrap ( ) ;
101+ let mut group = runner. new_group ( ) ;
102+ group. set_name ( input_name) ;
103+ group. set_input_size ( * size) ;
104+ group. register_with_input ( "doc_mapper" , data, |lines| {
105+ for line in lines {
106+ black_box ( doc_mapper. doc_from_json_str ( line) . unwrap ( ) ) ;
48107 }
49- } )
50- } ) ;
51- group. bench_function ( "simple-json-to-doc-tantivy" , |b| {
52- b. iter ( || {
108+ } ) ;
109+
110+ group. register_with_input ( "doc_mapper_dynamic" , data, |lines| {
111+ for line in lines {
112+ black_box ( dynamic_doc_mapper. doc_from_json_str ( line) . unwrap ( ) ) ;
113+ }
114+ } ) ;
115+
116+ group. register_with_input ( "tantivy parse json" , data, |lines| {
53117 let schema = doc_mapper. schema ( ) ;
54- for line in & lines {
55- let _doc = TantivyDocument :: parse_json ( & schema, line) . unwrap ( ) ;
118+ for line in lines {
119+ let _doc = black_box ( TantivyDocument :: parse_json ( & schema, line) . unwrap ( ) ) ;
56120 }
57- } )
58- } ) ;
121+ } ) ;
122+ group. run ( ) ;
123+ }
59124}
60125
61- criterion_group ! ( benches, simple_json_to_doc_benchmark) ;
62- criterion_main ! ( benches) ;
126+ fn main ( ) {
127+ run_bench ( ) ;
128+ }
0 commit comments