Skip to content

Commit 62fd392

Browse files
author
Kazantsev Maksim
committed
Add space benches
1 parent 3c0b045 commit 62fd392

File tree

3 files changed

+89
-25
lines changed

3 files changed

+89
-25
lines changed

datafusion/spark/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,3 +61,7 @@ criterion = { workspace = true }
6161
[[bench]]
6262
harness = false
6363
name = "char"
64+
65+
[[bench]]
66+
harness = false
67+
name = "space"

datafusion/spark/benches/space.rs

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
extern crate criterion;
19+
20+
use arrow::array::PrimitiveArray;
21+
use arrow::datatypes::{DataType, Field, Int32Type};
22+
use criterion::{Criterion, criterion_group, criterion_main};
23+
use datafusion_common::config::ConfigOptions;
24+
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
25+
use datafusion_spark::function::string::space;
26+
use rand::prelude::StdRng;
27+
use rand::{Rng, SeedableRng};
28+
use std::hint::black_box;
29+
use std::sync::Arc;
30+
31+
fn criterion_benchmark(c: &mut Criterion) {
32+
let space_func = space();
33+
let size = 1024;
34+
let input: PrimitiveArray<Int32Type> = {
35+
let null_density = 0.2;
36+
let mut rng = StdRng::seed_from_u64(42);
37+
(0..size)
38+
.map(|_| {
39+
if rng.random::<f32>() < null_density {
40+
None
41+
} else {
42+
Some(rng.random_range::<i32, _>(1i32..10))
43+
}
44+
})
45+
.collect()
46+
};
47+
let input = Arc::new(input);
48+
let args = vec![ColumnarValue::Array(input)];
49+
let arg_fields = args
50+
.iter()
51+
.enumerate()
52+
.map(|(idx, arg)| Field::new(format!("arg_{idx}"), arg.data_type(), true).into())
53+
.collect::<Vec<_>>();
54+
let config_options = Arc::new(ConfigOptions::default());
55+
c.bench_function("space", |b| {
56+
b.iter(|| {
57+
black_box(
58+
space_func
59+
.invoke_with_args(ScalarFunctionArgs {
60+
args: args.clone(),
61+
arg_fields: arg_fields.clone(),
62+
number_rows: size,
63+
return_field: Arc::new(Field::new("f", DataType::Utf8, true)),
64+
config_options: Arc::clone(&config_options),
65+
})
66+
.unwrap(),
67+
)
68+
})
69+
});
70+
}
71+
72+
criterion_group!(benches, criterion_benchmark);
73+
criterion_main!(benches);

datafusion/spark/src/function/string/space.rs

Lines changed: 12 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -140,32 +140,19 @@ fn spark_space_scalar(scalar: &ScalarValue) -> Result<ScalarValue> {
140140
}
141141

142142
fn spark_space_array_inner(array: &Int32Array) -> StringArray {
143-
let values = array.values();
144-
let data_capacity = values
145-
.iter()
146-
.map(|l| if *l < 0 { 0 } else { *l as usize })
147-
.sum();
148-
149-
let max_length = values
150-
.iter()
151-
.filter(|&&l| l > 0)
152-
.max()
153-
.copied()
154-
.unwrap_or(0) as usize;
155-
156-
let space_buffer = " ".repeat(max_length);
157-
let mut builder = StringBuilder::with_capacity(array.len(), data_capacity);
158-
159-
for i in 0..array.len() {
160-
if array.is_null(i) {
161-
builder.append_null();
162-
} else {
163-
let len = array.value(i);
164-
if len <= 0 {
165-
builder.append_value("");
166-
} else {
167-
builder.append_value(&space_buffer[..len as usize]);
143+
let mut builder = StringBuilder::with_capacity(array.len(), array.len() * 16);
144+
let mut space_buf = String::new();
145+
for value in array.iter() {
146+
match value {
147+
None => builder.append_null(),
148+
Some(l) if l > 0 => {
149+
let l = l as usize;
150+
if space_buf.len() < l {
151+
space_buf = " ".repeat(l);
152+
}
153+
builder.append_value(&space_buf[..l]);
168154
}
155+
Some(_) => builder.append_value(""),
169156
}
170157
}
171158
builder.finish()

0 commit comments

Comments
 (0)