Skip to content

Commit 1862d45

Browse files
Merge branch 'develop' into claude/cool-bardeen-l8jlsy-2-mask-eq
2 parents 897128e + a289c23 commit 1862d45

30 files changed

Lines changed: 2326 additions & 225 deletions

File tree

Cargo.lock

Lines changed: 55 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vortex-array/benches/slice_dict_primitive.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@ use divan::Bencher;
88
use vortex_array::ArrayRef;
99
use vortex_array::IntoArray;
1010
use vortex_array::arrays::DictArray;
11+
use vortex_array::arrays::Primitive;
1112
use vortex_array::arrays::PrimitiveArray;
13+
use vortex_array::arrays::slice::SliceReduce;
1214

1315
fn main() {
1416
divan::main();
@@ -46,6 +48,29 @@ fn slice_primitive_tight_loop(bencher: Bencher, len: usize) {
4648
});
4749
}
4850

51+
#[divan::bench(args = ARRAY_LENGTHS)]
52+
fn slice_primitive_reduce_tight_loop(bencher: Bencher, len: usize) {
53+
let arr = build_primitive(len);
54+
let slice_len = 64;
55+
56+
let num_slices = len / slice_len;
57+
58+
bencher
59+
.with_inputs(|| (&arr, Vec::<ArrayRef>::with_capacity(num_slices)))
60+
.bench_refs(|(arr, out)| {
61+
out.clear();
62+
let mut offset = 0;
63+
while offset + slice_len <= len {
64+
out.push(
65+
<Primitive as SliceReduce>::slice(arr.as_view(), offset..offset + slice_len)
66+
.unwrap()
67+
.unwrap(),
68+
);
69+
offset += slice_len;
70+
}
71+
});
72+
}
73+
4974
#[divan::bench(args = ARRAY_LENGTHS)]
5075
fn slice_dict_tight_loop(bencher: Bencher, len: usize) {
5176
let dict = build_dict(len).into_array();

vortex-array/src/array/erased.rs

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -47,13 +47,13 @@ use crate::arrays::VarBinView;
4747
use crate::buffer::BufferHandle;
4848
use crate::builders::ArrayBuilder;
4949
use crate::dtype::DType;
50-
use crate::dtype::Nullability;
5150
use crate::expr::stats::Precision;
5251
use crate::expr::stats::Stat;
5352
use crate::expr::stats::StatsProviderExt;
5453
use crate::matcher::Matcher;
5554
use crate::optimizer::ArrayOptimizer;
5655
use crate::scalar::Scalar;
56+
use crate::scalar::ScalarValue;
5757
use crate::stats::StatsSetRef;
5858
use crate::validity::Validity;
5959

@@ -239,13 +239,10 @@ impl ArrayRef {
239239
matches!(
240240
stat,
241241
Stat::IsConstant | Stat::IsSorted | Stat::IsStrictSorted
242-
) && value.as_ref().as_exact().is_some_and(|v| {
243-
Scalar::try_new(DType::Bool(Nullability::NonNullable), Some(v.clone()))
244-
.vortex_expect("A stat that was expected to be a boolean stat was not")
245-
.as_bool()
246-
.value()
247-
.unwrap_or_default()
248-
})
242+
) && value
243+
.as_ref()
244+
.as_exact()
245+
.is_some_and(|v| matches!(v, ScalarValue::Bool(true)))
249246
}));
250247
});
251248
}

vortex-array/src/arrays/bool/compute/rules.rs

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -45,14 +45,15 @@ impl ArrayParentReduceRule<Bool> for BoolMaskedValidityRule {
4545
return Ok(None);
4646
}
4747

48+
let bit_buffer = array.to_bit_buffer();
4849
// Merge the parent's validity mask into the child's validity
4950
// TODO(joe): make this lazy
50-
Ok(Some(
51-
BoolArray::new(
52-
array.to_bit_buffer(),
53-
array.validity()?.and(parent.validity()?)?,
54-
)
55-
.into_array(),
56-
))
51+
let validity = array.validity()?.and(parent.validity()?)?;
52+
53+
// Safety:
54+
// we know all elements are valid, the AND operation will fail if mismatched.
55+
let array = unsafe { BoolArray::new_unchecked(bit_buffer, validity).into_array() };
56+
57+
Ok(Some(array))
5758
}
5859
}

vortex-array/src/arrays/bool/compute/slice.rs

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,13 @@ use crate::arrays::slice::SliceReduce;
1515

1616
impl SliceReduce for Bool {
1717
fn slice(array: ArrayView<'_, Bool>, range: Range<usize>) -> VortexResult<Option<ArrayRef>> {
18-
Ok(Some(
19-
BoolArray::new(
20-
array.to_bit_buffer().slice(range.clone()),
21-
array.validity()?.slice(range)?,
22-
)
23-
.into_array(),
24-
))
18+
let bit_buffer = array.to_bit_buffer().slice(range.clone());
19+
let validity = array.validity()?.slice(range)?;
20+
21+
// Safety:
22+
// range is verified in the callers and is the same for both bits and validity.
23+
let array = unsafe { BoolArray::new_unchecked(bit_buffer, validity).into_array() };
24+
25+
Ok(Some(array))
2526
}
2627
}

vortex-array/src/arrays/dict/compute/slice.rs

Lines changed: 58 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
use std::ops::Range;
55

6+
use vortex_error::VortexExpect;
67
use vortex_error::VortexResult;
78

89
use crate::ArrayRef;
@@ -12,35 +13,74 @@ use crate::arrays::Constant;
1213
use crate::arrays::ConstantArray;
1314
use crate::arrays::Dict;
1415
use crate::arrays::DictArray;
16+
use crate::arrays::Primitive;
1517
use crate::arrays::dict::DictArraySlotsExt;
1618
use crate::arrays::slice::SliceReduce;
19+
use crate::expr::stats::Precision;
20+
use crate::expr::stats::Stat;
1721
use crate::scalar::Scalar;
22+
use crate::scalar::ScalarValue;
1823

1924
impl SliceReduce for Dict {
2025
fn slice(array: ArrayView<'_, Self>, range: Range<usize>) -> VortexResult<Option<ArrayRef>> {
21-
let sliced_code = array.codes().slice(range)?;
26+
if let Some(code) = array.codes().as_opt::<Constant>() {
27+
return slice_constant_code(array, code.scalar(), range.len());
28+
}
29+
30+
let sliced_code = if let Some(codes) = array.codes().as_typed::<Primitive>() {
31+
let sliced_code = <Primitive as SliceReduce>::slice(codes, range)?
32+
.vortex_expect("Primitive SliceReduce should always return Some");
33+
// Because we specialize the primitive branch here, we have to make sure to handle the stat inheritance
34+
inherit_slice_stats(array.codes(), &sliced_code);
35+
sliced_code
36+
} else {
37+
array.codes().slice(range)?
38+
};
39+
2240
// TODO(joe): if the range is size 1 replace with a constant array
2341
if let Some(code) = sliced_code.as_opt::<Constant>() {
24-
let code = code.scalar().as_primitive().as_::<usize>();
25-
return if let Some(code) = code {
26-
let values = array.values().slice(code..code + 1)?;
27-
Ok(Some(
28-
DictArray::new(
29-
ConstantArray::new(0u8, sliced_code.len()).into_array(),
30-
values,
31-
)
32-
.into_array(),
33-
))
34-
} else {
35-
Ok(Some(
36-
ConstantArray::new(Scalar::null(array.dtype().clone()), sliced_code.len())
37-
.into_array(),
38-
))
39-
};
42+
return slice_constant_code(array, code.scalar(), sliced_code.len());
4043
}
4144
// SAFETY: slicing the codes preserves invariants.
45+
let array =
46+
unsafe { DictArray::new_unchecked(sliced_code, array.values().clone()).into_array() };
47+
48+
Ok(Some(array))
49+
}
50+
}
51+
52+
fn inherit_slice_stats(source: &ArrayRef, sliced: &ArrayRef) {
53+
source.statistics().with_iter(|iter| {
54+
sliced
55+
.statistics()
56+
.inherit(iter.filter(|(stat, value)| is_inheritable_true_slice_stat(*stat, value)));
57+
});
58+
}
59+
60+
fn is_inheritable_true_slice_stat(stat: Stat, value: &Precision<ScalarValue>) -> bool {
61+
matches!(
62+
stat,
63+
Stat::IsConstant | Stat::IsSorted | Stat::IsStrictSorted
64+
) && value
65+
.as_ref()
66+
.as_exact()
67+
.is_some_and(|value| matches!(value, ScalarValue::Bool(true)))
68+
}
69+
70+
fn slice_constant_code(
71+
array: ArrayView<'_, Dict>,
72+
code: &Scalar,
73+
len: usize,
74+
) -> VortexResult<Option<ArrayRef>> {
75+
let code = code.as_primitive().as_::<usize>();
76+
if let Some(code) = code {
77+
let values = array.values().slice(code..code + 1)?;
78+
Ok(Some(
79+
DictArray::new(ConstantArray::new(0u8, len).into_array(), values).into_array(),
80+
))
81+
} else {
4282
Ok(Some(
43-
unsafe { DictArray::new_unchecked(sliced_code, array.values().clone()) }.into_array(),
83+
ConstantArray::new(Scalar::null(array.dtype().clone()), len).into_array(),
4484
))
4585
}
4686
}

vortex-array/src/arrays/primitive/compute/slice.rs

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,19 +11,21 @@ use crate::array::ArrayView;
1111
use crate::arrays::Primitive;
1212
use crate::arrays::PrimitiveArray;
1313
use crate::arrays::slice::SliceReduce;
14-
use crate::dtype::NativePType;
15-
use crate::match_each_native_ptype;
1614

1715
impl SliceReduce for Primitive {
1816
fn slice(array: ArrayView<'_, Self>, range: Range<usize>) -> VortexResult<Option<ArrayRef>> {
19-
let result = match_each_native_ptype!(array.ptype(), |T| {
20-
PrimitiveArray::from_buffer_handle(
21-
array.buffer_handle().slice_typed::<T>(range.clone()),
22-
T::PTYPE,
23-
array.validity()?.slice(range)?,
24-
)
25-
.into_array()
26-
});
27-
Ok(Some(result))
17+
let byte_width = array.ptype().byte_width();
18+
let byte_range = range.start * byte_width..range.end * byte_width;
19+
let values = array.buffer_handle().slice(byte_range);
20+
let validity = array.validity()?.slice(range)?;
21+
22+
// SAFETY:
23+
//slicing an existing PrimitiveArray on element boundaries preserves the buffer
24+
// alignment, ptype, length, and validity invariants.
25+
let array = unsafe {
26+
PrimitiveArray::new_unchecked_from_handle(values, array.ptype(), validity).into_array()
27+
};
28+
29+
Ok(Some(array))
2830
}
2931
}

vortex-array/src/arrays/varbinview/compute/slice.rs

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,19 @@ use crate::arrays::varbinview::BinaryView;
1616

1717
impl SliceReduce for VarBinView {
1818
fn slice(array: ArrayView<'_, Self>, range: Range<usize>) -> VortexResult<Option<ArrayRef>> {
19-
Ok(Some(
20-
VarBinViewArray::new_handle(
21-
array
22-
.views_handle()
23-
.slice_typed::<BinaryView>(range.clone()),
24-
Arc::clone(array.data_buffers()),
25-
array.dtype().clone(),
26-
array.validity()?.slice(range)?,
27-
)
28-
.into_array(),
29-
))
19+
let views = array
20+
.views_handle()
21+
.slice_typed::<BinaryView>(range.clone());
22+
let data_buffers = Arc::clone(array.data_buffers());
23+
let dtype = array.dtype().clone();
24+
let validity = array.validity()?.slice(range)?;
25+
26+
// Safety:
27+
// range is validated within bounds, and is shared between all children.
28+
let array = unsafe {
29+
VarBinViewArray::new_handle_unchecked(views, data_buffers, dtype, validity).into_array()
30+
};
31+
32+
Ok(Some(array))
3033
}
3134
}

vortex-buffer/benches/vortex_bitbuffer.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,17 @@ fn bitwise_and_arrow_buffer(bencher: Bencher, length: usize) {
210210
.bench_refs(|(a, b)| &a.0 & &b.0);
211211
}
212212

213+
/// Owned-LHS AND: the left operand is a fresh, uniquely-owned `BitBuffer` each iteration, so
214+
/// `bitwise_binary_op_lhs_owned` takes the in-place (zero-allocation) fast path. Compare against
215+
/// `bitwise_and_vortex_buffer` (reference-LHS, which always allocates a result buffer).
216+
#[divan::bench(args = INPUT_SIZE)]
217+
fn bitand_owned_lhs_vortex_buffer(bencher: Bencher, length: usize) {
218+
let b = BitBuffer::from_iter((0..length).map(|i| i % 3 == 0));
219+
bencher
220+
.with_inputs(|| BitBuffer::from_iter((0..length).map(|i| i % 2 == 0)))
221+
.bench_values(|a| a & &b);
222+
}
223+
213224
#[divan::bench(args = INPUT_SIZE)]
214225
fn bitwise_or_vortex_buffer(bencher: Bencher, length: usize) {
215226
let a = BitBuffer::from_iter((0..length).map(|i| i % 2 == 0));

0 commit comments

Comments
 (0)