From 714f0f45eb43190626c0eef75537ef5792d8c54f Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 5 Jun 2026 15:35:50 +0000 Subject: [PATCH 1/7] Replace Validity::is_valid with execute_is_valid taking an ExecutionCtx `Validity::is_valid`/`is_null` previously created an internal `LEGACY_SESSION` execution context to evaluate the validity array. Replace them with `execute_is_valid`/`execute_is_null` that accept an `&mut ExecutionCtx` from the caller, threading the context through where one is available and falling back to a `LEGACY_SESSION` context only at boundaries that do not yet have one. Signed-off-by: Joe Isaacs --- encodings/alp/src/alp/array.rs | 8 ++- .../fastlanes/src/rle/array/rle_compress.rs | 2 +- encodings/parquet-variant/src/operations.rs | 2 +- .../src/arrays/constant/vtable/canonical.rs | 13 ++--- .../src/arrays/fixed_size_list/array.rs | 15 ++++-- vortex-array/src/arrays/listview/rebuild.rs | 15 ++++-- vortex-array/src/arrays/varbin/array.rs | 3 +- .../src/arrays/varbin/compute/filter.rs | 4 +- vortex-array/src/arrays/varbinview/array.rs | 5 +- vortex-array/src/builders/fixed_size_list.rs | 50 +++++++++---------- vortex-array/src/builders/list.rs | 6 +-- vortex-array/src/builders/listview.rs | 12 +++-- vortex-array/src/builders/primitive.rs | 6 +-- vortex-array/src/builders/tests.rs | 7 ++- vortex-array/src/validity.rs | 16 +++--- vortex-duckdb/src/duckdb/vector.rs | 12 ++++- .../encodings/turboquant/tests/nullable.rs | 6 +-- vortex-tensor/src/scalar_fns/l2_denorm.rs | 4 +- .../src/scalar_fns/sorf_transform/tests.rs | 4 +- 19 files changed, 108 insertions(+), 82 deletions(-) diff --git a/encodings/alp/src/alp/array.rs b/encodings/alp/src/alp/array.rs index cac93d1d27d..c667cb3e199 100644 --- a/encodings/alp/src/alp/array.rs +++ b/encodings/alp/src/alp/array.rs @@ -707,10 +707,8 @@ mod tests { let slice_len = slice_end - slice_start; let sliced_encoded = encoded.slice(slice_start..slice_end).unwrap(); - let result_canonical = { - let mut ctx = SESSION.create_execution_ctx(); - sliced_encoded.execute::(&mut ctx).unwrap() - }; + let mut ctx = SESSION.create_execution_ctx(); + let result_canonical = sliced_encoded.execute::(&mut ctx).unwrap(); let result_primitive = result_canonical.into_primitive(); for idx in 0..slice_len { @@ -719,7 +717,7 @@ mod tests { let result_valid = result_primitive .validity() .vortex_expect("result validity should be derivable") - .is_valid(idx) + .execute_is_valid(idx, &mut ctx) .unwrap(); assert_eq!( result_valid, diff --git a/encodings/fastlanes/src/rle/array/rle_compress.rs b/encodings/fastlanes/src/rle/array/rle_compress.rs index e48f31f51da..107c912c23f 100644 --- a/encodings/fastlanes/src/rle/array/rle_compress.rs +++ b/encodings/fastlanes/src/rle/array/rle_compress.rs @@ -440,7 +440,7 @@ mod tests { let mut rng_state: u32 = 0xDEAD_BEEF; let validity = indices_prim.validity()?; for (i, idx) in indices_data.iter_mut().enumerate() { - if !validity.is_valid(i).unwrap_or(true) { + if !validity.execute_is_valid(i, ctx).unwrap_or(true) { // xorshift32 rng_state ^= rng_state << 13; rng_state ^= rng_state >> 17; diff --git a/encodings/parquet-variant/src/operations.rs b/encodings/parquet-variant/src/operations.rs index 897c7fca3bb..10f11e2bf2c 100644 --- a/encodings/parquet-variant/src/operations.rs +++ b/encodings/parquet-variant/src/operations.rs @@ -41,7 +41,7 @@ impl OperationsVTable for ParquetVariant { index: usize, ctx: &mut ExecutionCtx, ) -> VortexResult { - if array.validity()?.is_null(index)? { + if array.validity()?.execute_is_null(index, ctx)? { return Ok(Scalar::null(DType::Variant(Nullability::Nullable))); } diff --git a/vortex-array/src/arrays/constant/vtable/canonical.rs b/vortex-array/src/arrays/constant/vtable/canonical.rs index 53dcd3d262d..0db0411c03f 100644 --- a/vortex-array/src/arrays/constant/vtable/canonical.rs +++ b/vortex-array/src/arrays/constant/vtable/canonical.rs @@ -754,14 +754,15 @@ mod tests { let element_validity = elements .validity() .vortex_expect("constant canonical element validity should be derivable"); - assert!(element_validity.is_valid(0).unwrap()); - assert!(!element_validity.is_valid(1).unwrap()); - assert!(element_validity.is_valid(2).unwrap()); + let mut ctx = LEGACY_SESSION.create_execution_ctx(); + assert!(element_validity.execute_is_valid(0, &mut ctx).unwrap()); + assert!(!element_validity.execute_is_valid(1, &mut ctx).unwrap()); + assert!(element_validity.execute_is_valid(2, &mut ctx).unwrap()); // Pattern should repeat. - assert!(element_validity.is_valid(3).unwrap()); - assert!(!element_validity.is_valid(4).unwrap()); - assert!(element_validity.is_valid(5).unwrap()); + assert!(element_validity.execute_is_valid(3, &mut ctx).unwrap()); + assert!(!element_validity.execute_is_valid(4, &mut ctx).unwrap()); + assert!(element_validity.execute_is_valid(5, &mut ctx).unwrap()); } #[test] diff --git a/vortex-array/src/arrays/fixed_size_list/array.rs b/vortex-array/src/arrays/fixed_size_list/array.rs index cc82551ab82..f24001c4e78 100644 --- a/vortex-array/src/arrays/fixed_size_list/array.rs +++ b/vortex-array/src/arrays/fixed_size_list/array.rs @@ -12,6 +12,8 @@ use vortex_error::vortex_ensure; use crate::ArrayRef; use crate::ArraySlots; +use crate::LEGACY_SESSION; +use crate::VortexSessionExecute; use crate::array::Array; use crate::array::ArrayParts; use crate::array::TypedArrayRef; @@ -236,11 +238,14 @@ pub trait FixedSizeListArrayExt: TypedArrayRef { index, self.as_ref().len(), ); - debug_assert!( - self.fixed_size_list_validity() - .is_valid(index) - .unwrap_or(false) - ); + #[expect(clippy::debug_assert_with_mut_call)] + { + debug_assert!( + self.fixed_size_list_validity() + .execute_is_valid(index, &mut LEGACY_SESSION.create_execution_ctx()) + .unwrap_or(false) + ); + } let start = self.list_size() as usize * index; let end = self.list_size() as usize * (index + 1); diff --git a/vortex-array/src/arrays/listview/rebuild.rs b/vortex-array/src/arrays/listview/rebuild.rs index 2bcb4ef0c1f..077aa22b228 100644 --- a/vortex-array/src/arrays/listview/rebuild.rs +++ b/vortex-array/src/arrays/listview/rebuild.rs @@ -209,9 +209,11 @@ impl ListViewArray { let mut new_sizes = BufferMut::::with_capacity(len); let mut take_indices = BufferMut::::with_capacity(self.elements().len()); + let mut ctx = LEGACY_SESSION.create_execution_ctx(); + let validity = self.validity()?; let mut n_elements = NewOffset::zero(); for index in 0..len { - if !self.validity()?.is_valid(index)? { + if !validity.execute_is_valid(index, &mut ctx)? { new_offsets.push(n_elements); new_sizes.push(S::zero()); continue; @@ -292,9 +294,11 @@ impl ListViewArray { let mut new_elements_builder = builder_with_capacity(element_dtype.as_ref(), self.elements().len()); + let mut ctx = LEGACY_SESSION.create_execution_ctx(); + let validity = self.validity()?; let mut n_elements = NewOffset::zero(); for index in 0..len { - if !self.validity()?.is_valid(index)? { + if !validity.execute_is_valid(index, &mut ctx)? { // For NULL lists, place them after the previous item's data to maintain the // no-overlap invariant for zero-copy to `ListArray` arrays. new_offsets.push(n_elements); @@ -482,9 +486,10 @@ mod tests { // Verify nullability is preserved assert_eq!(flattened.dtype().nullability(), Nullability::Nullable); - assert!(flattened.validity()?.is_valid(0)?); - assert!(!flattened.validity()?.is_valid(1)?); - assert!(flattened.validity()?.is_valid(2)?); + let mut ctx = LEGACY_SESSION.create_execution_ctx(); + assert!(flattened.validity()?.execute_is_valid(0, &mut ctx)?); + assert!(!flattened.validity()?.execute_is_valid(1, &mut ctx)?); + assert!(flattened.validity()?.execute_is_valid(2, &mut ctx)?); // Verify valid lists contain correct data assert_arrays_eq!( diff --git a/vortex-array/src/arrays/varbin/array.rs b/vortex-array/src/arrays/varbin/array.rs index deaef952d5e..c357abd86e3 100644 --- a/vortex-array/src/arrays/varbin/array.rs +++ b/vortex-array/src/arrays/varbin/array.rs @@ -248,12 +248,13 @@ impl VarBinData { let primitive_offsets = offsets.to_primitive(); match_each_integer_ptype!(primitive_offsets.dtype().as_ptype(), |O| { let offsets_slice = primitive_offsets.as_slice::(); + let mut ctx = LEGACY_SESSION.create_execution_ctx(); for (i, (start, end)) in offsets_slice .windows(2) .map(|o| (o[0].as_(), o[1].as_())) .enumerate() { - if validity.is_null(i)? { + if validity.execute_is_null(i, &mut ctx)? { continue; } diff --git a/vortex-array/src/arrays/varbin/compute/filter.rs b/vortex-array/src/arrays/varbin/compute/filter.rs index b3e1aa87a4f..69e0125037c 100644 --- a/vortex-array/src/arrays/varbin/compute/filter.rs +++ b/vortex-array/src/arrays/varbin/compute/filter.rs @@ -172,6 +172,7 @@ fn filter_select_var_bin_by_index( mask_indices, values.validity()?, selection_count, + ctx, ) }) } @@ -184,10 +185,11 @@ fn filter_select_var_bin_by_index_primitive_offset( // TODO(ngates): pass LogicalValidity instead validity: Validity, selection_count: usize, + ctx: &mut ExecutionCtx, ) -> VortexResult { let mut builder = VarBinBuilder::::with_capacity(selection_count); for idx in mask_indices.iter().copied() { - if validity.is_valid(idx)? { + if validity.execute_is_valid(idx, ctx)? { let (start, end) = ( offsets[idx].to_usize().ok_or_else(|| { vortex_err!("Failed to convert offset to usize: {}", offsets[idx]) diff --git a/vortex-array/src/arrays/varbinview/array.rs b/vortex-array/src/arrays/varbinview/array.rs index e302c9341d2..713be8b3ab6 100644 --- a/vortex-array/src/arrays/varbinview/array.rs +++ b/vortex-array/src/arrays/varbinview/array.rs @@ -18,6 +18,8 @@ use vortex_error::vortex_err; use vortex_error::vortex_panic; use crate::ArraySlots; +use crate::LEGACY_SESSION; +use crate::VortexSessionExecute; use crate::array::Array; use crate::array::ArrayParts; use crate::array::TypedArrayRef; @@ -316,8 +318,9 @@ impl VarBinViewData { where F: Fn(&[u8]) -> bool, { + let mut ctx = LEGACY_SESSION.create_execution_ctx(); for (idx, &view) in views.iter().enumerate() { - if validity.is_null(idx)? { + if validity.execute_is_null(idx, &mut ctx)? { continue; } diff --git a/vortex-array/src/builders/fixed_size_list.rs b/vortex-array/src/builders/fixed_size_list.rs index aece8c21465..dc3416ccfb2 100644 --- a/vortex-array/src/builders/fixed_size_list.rs +++ b/vortex-array/src/builders/fixed_size_list.rs @@ -474,21 +474,21 @@ mod tests { fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .is_valid(0) + .execute_is_valid(0, &mut LEGACY_SESSION.create_execution_ctx()) .unwrap() ); assert!( !fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .is_valid(1) + .execute_is_valid(1, &mut LEGACY_SESSION.create_execution_ctx()) .unwrap() ); assert!( fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .is_valid(2) + .execute_is_valid(2, &mut LEGACY_SESSION.create_execution_ctx()) .unwrap() ); } @@ -582,7 +582,7 @@ mod tests { !fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .is_valid(i) + .execute_is_valid(i, &mut LEGACY_SESSION.create_execution_ctx()) .unwrap() ); } @@ -612,7 +612,7 @@ mod tests { !fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .is_valid(0) + .execute_is_valid(0, &mut LEGACY_SESSION.create_execution_ctx()) .unwrap() ); } @@ -690,42 +690,42 @@ mod tests { fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .is_valid(0) + .execute_is_valid(0, &mut LEGACY_SESSION.create_execution_ctx()) .unwrap() ); assert!( !fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .is_valid(1) + .execute_is_valid(1, &mut LEGACY_SESSION.create_execution_ctx()) .unwrap() ); assert!( fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .is_valid(2) + .execute_is_valid(2, &mut LEGACY_SESSION.create_execution_ctx()) .unwrap() ); assert!( fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .is_valid(3) + .execute_is_valid(3, &mut LEGACY_SESSION.create_execution_ctx()) .unwrap() ); assert!( !fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .is_valid(4) + .execute_is_valid(4, &mut LEGACY_SESSION.create_execution_ctx()) .unwrap() ); assert!( fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .is_valid(5) + .execute_is_valid(5, &mut LEGACY_SESSION.create_execution_ctx()) .unwrap() ); } @@ -767,35 +767,35 @@ mod tests { fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .is_valid(0) + .execute_is_valid(0, &mut LEGACY_SESSION.create_execution_ctx()) .unwrap() ); assert!( !fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .is_valid(1) + .execute_is_valid(1, &mut LEGACY_SESSION.create_execution_ctx()) .unwrap() ); assert!( fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .is_valid(2) + .execute_is_valid(2, &mut LEGACY_SESSION.create_execution_ctx()) .unwrap() ); assert!( !fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .is_valid(3) + .execute_is_valid(3, &mut LEGACY_SESSION.create_execution_ctx()) .unwrap() ); assert!( fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .is_valid(4) + .execute_is_valid(4, &mut LEGACY_SESSION.create_execution_ctx()) .unwrap() ); } @@ -879,42 +879,42 @@ mod tests { fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .is_valid(0) + .execute_is_valid(0, &mut LEGACY_SESSION.create_execution_ctx()) .unwrap() ); // append_value assert!( !fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .is_valid(1) + .execute_is_valid(1, &mut LEGACY_SESSION.create_execution_ctx()) .unwrap() ); // append_null assert!( fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .is_valid(2) + .execute_is_valid(2, &mut LEGACY_SESSION.create_execution_ctx()) .unwrap() ); // append_zeros assert!( fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .is_valid(3) + .execute_is_valid(3, &mut LEGACY_SESSION.create_execution_ctx()) .unwrap() ); // append_zeros assert!( !fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .is_valid(4) + .execute_is_valid(4, &mut LEGACY_SESSION.create_execution_ctx()) .unwrap() ); // append_nulls assert!( fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .is_valid(5) + .execute_is_valid(5, &mut LEGACY_SESSION.create_execution_ctx()) .unwrap() ); // extend_from_array } @@ -967,21 +967,21 @@ mod tests { array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .is_valid(0) + .execute_is_valid(0, &mut LEGACY_SESSION.create_execution_ctx()) .unwrap() ); assert!( array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .is_valid(1) + .execute_is_valid(1, &mut LEGACY_SESSION.create_execution_ctx()) .unwrap() ); assert!( !array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .is_valid(2) + .execute_is_valid(2, &mut LEGACY_SESSION.create_execution_ctx()) .unwrap() ); diff --git a/vortex-array/src/builders/list.rs b/vortex-array/src/builders/list.rs index a0c1fdd581b..ad45e6b237b 100644 --- a/vortex-array/src/builders/list.rs +++ b/vortex-array/src/builders/list.rs @@ -613,21 +613,21 @@ mod tests { array .validity() .vortex_expect("list validity should be derivable") - .is_valid(0) + .execute_is_valid(0, &mut LEGACY_SESSION.create_execution_ctx()) .unwrap() ); assert!( array .validity() .vortex_expect("list validity should be derivable") - .is_valid(1) + .execute_is_valid(1, &mut LEGACY_SESSION.create_execution_ctx()) .unwrap() ); assert!( !array .validity() .vortex_expect("list validity should be derivable") - .is_valid(2) + .execute_is_valid(2, &mut LEGACY_SESSION.create_execution_ctx()) .unwrap() ); diff --git a/vortex-array/src/builders/listview.rs b/vortex-array/src/builders/listview.rs index d4f8f908dcb..4f18e379451 100644 --- a/vortex-array/src/builders/listview.rs +++ b/vortex-array/src/builders/listview.rs @@ -425,6 +425,8 @@ mod tests { use super::ListViewBuilder; use crate::IntoArray; + use crate::LEGACY_SESSION; + use crate::VortexSessionExecute; use crate::arrays::ListArray; use crate::arrays::ListViewArray; use crate::arrays::listview::ListViewArrayExt; @@ -497,7 +499,7 @@ mod tests { !listview .validity() .vortex_expect("listview validity should be derivable") - .is_valid(2) + .execute_is_valid(2, &mut LEGACY_SESSION.create_execution_ctx()) .unwrap() ); @@ -610,14 +612,14 @@ mod tests { !listview .validity() .vortex_expect("listview validity should be derivable") - .is_valid(2) + .execute_is_valid(2, &mut LEGACY_SESSION.create_execution_ctx()) .unwrap() ); assert!( !listview .validity() .vortex_expect("listview validity should be derivable") - .is_valid(3) + .execute_is_valid(3, &mut LEGACY_SESSION.create_execution_ctx()) .unwrap() ); @@ -679,7 +681,7 @@ mod tests { !listview .validity() .vortex_expect("listview validity should be derivable") - .is_valid(2) + .execute_is_valid(2, &mut LEGACY_SESSION.create_execution_ctx()) .unwrap() ); @@ -724,7 +726,7 @@ mod tests { !listview .validity() .vortex_expect("listview validity should be derivable") - .is_valid(1) + .execute_is_valid(1, &mut LEGACY_SESSION.create_execution_ctx()) .unwrap() ); assert_eq!(listview.list_elements_at(1).unwrap().len(), 0); diff --git a/vortex-array/src/builders/primitive.rs b/vortex-array/src/builders/primitive.rs index 657f4b1a603..268c27ab4d6 100644 --- a/vortex-array/src/builders/primitive.rs +++ b/vortex-array/src/builders/primitive.rs @@ -674,21 +674,21 @@ mod tests { array .validity() .vortex_expect("primitive validity should be derivable") - .is_valid(0) + .execute_is_valid(0, &mut LEGACY_SESSION.create_execution_ctx()) .unwrap() ); assert!( array .validity() .vortex_expect("primitive validity should be derivable") - .is_valid(1) + .execute_is_valid(1, &mut LEGACY_SESSION.create_execution_ctx()) .unwrap() ); assert!( !array .validity() .vortex_expect("primitive validity should be derivable") - .is_valid(2) + .execute_is_valid(2, &mut LEGACY_SESSION.create_execution_ctx()) .unwrap() ); diff --git a/vortex-array/src/builders/tests.rs b/vortex-array/src/builders/tests.rs index 69adfd5b893..4d59fe51957 100644 --- a/vortex-array/src/builders/tests.rs +++ b/vortex-array/src/builders/tests.rs @@ -857,7 +857,7 @@ fn test_set_validity_overrides_validity( let validity = builder.finish().validity()?; for (i, &valid) in expected.iter().enumerate() { assert_eq!( - validity.is_valid(i)?, + validity.execute_is_valid(i, &mut LEGACY_SESSION.create_execution_ctx())?, valid, "validity mismatch at index {i}" ); @@ -877,7 +877,10 @@ fn test_set_validity_noop_when_non_nullable() -> VortexResult<()> { let validity = builder.finish().validity()?; for i in 0..4 { - assert!(validity.is_valid(i)?, "index {i} should remain valid"); + assert!( + validity.execute_is_valid(i, &mut LEGACY_SESSION.create_execution_ctx())?, + "index {i} should remain valid" + ); } Ok(()) } diff --git a/vortex-array/src/validity.rs b/vortex-array/src/validity.rs index 204205d1f51..28c82e267b6 100644 --- a/vortex-array/src/validity.rs +++ b/vortex-array/src/validity.rs @@ -19,8 +19,6 @@ use crate::ArrayRef; use crate::Canonical; use crate::ExecutionCtx; use crate::IntoArray; -use crate::LEGACY_SESSION; -use crate::VortexSessionExecute; use crate::arrays::BoolArray; use crate::arrays::ChunkedArray; use crate::arrays::ConstantArray; @@ -128,24 +126,24 @@ impl Validity { } } - /// Returns whether the `index` item is valid. + /// Returns whether the `index` item is valid, using `ctx` to execute the validity array. #[inline] - pub fn is_valid(&self, index: usize) -> VortexResult { + pub fn execute_is_valid(&self, index: usize, ctx: &mut ExecutionCtx) -> VortexResult { Ok(match self { Self::NonNullable | Self::AllValid => true, Self::AllInvalid => false, Self::Array(a) => a - .execute_scalar(index, &mut LEGACY_SESSION.create_execution_ctx()) - .vortex_expect("Validity array must support execute_scalar") + .execute_scalar(index, ctx)? .as_bool() .value() - .vortex_expect("Validity must be non-nullable"), + .ok_or_else(|| vortex_err!("validity value at index {index} is null"))?, }) } + /// Returns whether the `index` item is null, using `ctx` to execute the validity array. #[inline] - pub fn is_null(&self, index: usize) -> VortexResult { - Ok(!self.is_valid(index)?) + pub fn execute_is_null(&self, index: usize, ctx: &mut ExecutionCtx) -> VortexResult { + Ok(!self.execute_is_valid(index, ctx)?) } #[inline] diff --git a/vortex-duckdb/src/duckdb/vector.rs b/vortex-duckdb/src/duckdb/vector.rs index 9a160f1668e..889f8f9fbaf 100644 --- a/vortex-duckdb/src/duckdb/vector.rs +++ b/vortex-duckdb/src/duckdb/vector.rs @@ -455,7 +455,11 @@ mod tests { let validity = vector.validity_ref(len); let validity = validity.to_validity(); - assert!(validity.is_null(0).unwrap()); + assert!( + validity + .execute_is_null(0, &mut LEGACY_SESSION.create_execution_ctx()) + .unwrap() + ); } #[test] @@ -468,7 +472,11 @@ mod tests { let validity = vector.validity_ref(len); let validity = validity.to_validity(); - assert!(validity.is_valid(0).unwrap()); + assert!( + validity + .execute_is_valid(0, &mut LEGACY_SESSION.create_execution_ctx()) + .unwrap() + ); } #[test] diff --git a/vortex-tensor/src/encodings/turboquant/tests/nullable.rs b/vortex-tensor/src/encodings/turboquant/tests/nullable.rs index e3febbaab7b..92eb0e7b152 100644 --- a/vortex-tensor/src/encodings/turboquant/tests/nullable.rs +++ b/vortex-tensor/src/encodings/turboquant/tests/nullable.rs @@ -36,7 +36,7 @@ fn nullable_vectors_roundtrip() -> VortexResult<()> { for i in 0..10 { let expected = ![2, 5, 7].contains(&i); assert_eq!( - encoded_validity.is_valid(i)?, + encoded_validity.execute_is_valid(i, &mut ctx)?, expected, "validity mismatch at row {i}" ); @@ -95,7 +95,7 @@ fn nullable_norms_match_validity() -> VortexResult<()> { for i in 0..5 { let expected = i % 2 == 0; assert_eq!( - norms_validity.is_valid(i)?, + norms_validity.execute_is_valid(i, &mut ctx)?, expected, "norms validity mismatch at row {i}" ); @@ -169,7 +169,7 @@ fn nullable_slice_preserves_validity() -> VortexResult<()> { let expected = [true, false, true, true, false]; for (i, &exp) in expected.iter().enumerate() { assert_eq!( - sliced_validity.is_valid(i)?, + sliced_validity.execute_is_valid(i, &mut ctx)?, exp, "sliced validity mismatch at index {i}" ); diff --git a/vortex-tensor/src/scalar_fns/l2_denorm.rs b/vortex-tensor/src/scalar_fns/l2_denorm.rs index c91a614ddff..94e397a276d 100644 --- a/vortex-tensor/src/scalar_fns/l2_denorm.rs +++ b/vortex-tensor/src/scalar_fns/l2_denorm.rs @@ -448,7 +448,7 @@ pub fn normalize_as_l2_denorm( let total_elements = row_count * tensor_flat_size; let mut elements = BufferMut::::with_capacity(total_elements); for i in 0..row_count { - let is_valid = norms_validity.is_valid(i)?; + let is_valid = norms_validity.execute_is_valid(i, ctx)?; let norm = norm_values[i]; // SAFETY: We allocated `row_count * tensor_flat_size` capacity and push exactly @@ -642,7 +642,7 @@ pub fn validate_l2_normalized_rows_against_norms( let stored_norms = norms.as_ref().map(|norms| norms.as_slice::()); for i in 0..row_count { - if !combined_validity.is_valid(i)? { + if !combined_validity.execute_is_valid(i, ctx)? { continue; } diff --git a/vortex-tensor/src/scalar_fns/sorf_transform/tests.rs b/vortex-tensor/src/scalar_fns/sorf_transform/tests.rs index 3dc281a29ea..5efe1436ed6 100644 --- a/vortex-tensor/src/scalar_fns/sorf_transform/tests.rs +++ b/vortex-tensor/src/scalar_fns/sorf_transform/tests.rs @@ -250,8 +250,8 @@ fn nullable_validity_propagation() -> VortexResult<()> { let output_validity = result_fsl.validity()?; for row in 0..num_rows { assert_eq!( - output_validity.is_valid(row)?, - validity.is_valid(row)?, + output_validity.execute_is_valid(row, &mut ctx)?, + validity.execute_is_valid(row, &mut ctx)?, "validity mismatch at row {row}" ); } From 4624ef6334a8d76e79009812d2c110ed854ae6c5 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 5 Jun 2026 15:53:34 +0000 Subject: [PATCH 2/7] Retain Validity::is_valid/is_null as deprecated wrappers Keep the original ctx-free `is_valid`/`is_null` methods for backward compatibility, marked `#[deprecated]`, delegating to the new `execute_is_valid`/`execute_is_null` via a `LEGACY_SESSION` context. Signed-off-by: Joe Isaacs --- vortex-array/src/validity.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/vortex-array/src/validity.rs b/vortex-array/src/validity.rs index 28c82e267b6..31fe82cf357 100644 --- a/vortex-array/src/validity.rs +++ b/vortex-array/src/validity.rs @@ -19,6 +19,8 @@ use crate::ArrayRef; use crate::Canonical; use crate::ExecutionCtx; use crate::IntoArray; +use crate::LEGACY_SESSION; +use crate::VortexSessionExecute; use crate::arrays::BoolArray; use crate::arrays::ChunkedArray; use crate::arrays::ConstantArray; @@ -146,6 +148,20 @@ impl Validity { Ok(!self.execute_is_valid(index, ctx)?) } + /// Returns whether the `index` item is valid. + #[deprecated(note = "use `execute_is_valid` with an explicit `ExecutionCtx`")] + #[inline] + pub fn is_valid(&self, index: usize) -> VortexResult { + self.execute_is_valid(index, &mut LEGACY_SESSION.create_execution_ctx()) + } + + /// Returns whether the `index` item is null. + #[deprecated(note = "use `execute_is_null` with an explicit `ExecutionCtx`")] + #[inline] + pub fn is_null(&self, index: usize) -> VortexResult { + self.execute_is_null(index, &mut LEGACY_SESSION.create_execution_ctx()) + } + #[inline] pub fn slice(&self, range: Range) -> VortexResult { match self { From 15cd35a95ebdc88b375dc4e5734a10dbf8b9b2eb Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 5 Jun 2026 16:20:21 +0000 Subject: [PATCH 3/7] Use a single session and ExecutionCtx per touched test Consolidate the validity/scalar checks in the tests updated by the `execute_is_valid` migration so each test creates one execution context and reuses it, instead of constructing a fresh `LEGACY_SESSION` context per call (some inside loops). Also unify the ALP sliced-vector test on a single `SESSION` context for both encode and execute, rather than mixing `SESSION` and `LEGACY_SESSION`. Signed-off-by: Joe Isaacs --- encodings/alp/src/alp/array.rs | 9 +-- vortex-array/src/builders/fixed_size_list.rs | 65 ++++++++++---------- vortex-array/src/builders/list.rs | 20 +++--- vortex-array/src/builders/listview.rs | 14 +++-- vortex-array/src/builders/primitive.rs | 7 ++- vortex-array/src/builders/tests.rs | 6 +- 6 files changed, 61 insertions(+), 60 deletions(-) diff --git a/encodings/alp/src/alp/array.rs b/encodings/alp/src/alp/array.rs index c667cb3e199..c430f587e63 100644 --- a/encodings/alp/src/alp/array.rs +++ b/encodings/alp/src/alp/array.rs @@ -695,19 +695,14 @@ mod tests { }) .collect(); + let mut ctx = SESSION.create_execution_ctx(); let array = PrimitiveArray::from_option_iter(values.clone()); - let encoded = alp_encode( - array.as_view(), - None, - &mut LEGACY_SESSION.create_execution_ctx(), - ) - .unwrap(); + let encoded = alp_encode(array.as_view(), None, &mut ctx).unwrap(); let slice_end = size - slice_start; let slice_len = slice_end - slice_start; let sliced_encoded = encoded.slice(slice_start..slice_end).unwrap(); - let mut ctx = SESSION.create_execution_ctx(); let result_canonical = sliced_encoded.execute::(&mut ctx).unwrap(); let result_primitive = result_canonical.into_primitive(); diff --git a/vortex-array/src/builders/fixed_size_list.rs b/vortex-array/src/builders/fixed_size_list.rs index dc3416ccfb2..7a3c3bb068a 100644 --- a/vortex-array/src/builders/fixed_size_list.rs +++ b/vortex-array/src/builders/fixed_size_list.rs @@ -443,6 +443,7 @@ mod tests { #[test] fn test_nullable_lists_non_nullable_elements() { + let mut ctx = LEGACY_SESSION.create_execution_ctx(); let dtype: Arc = Arc::new(DType::Primitive(I32, NonNullable)); let mut builder = FixedSizeListBuilder::with_capacity(Arc::clone(&dtype), 2, Nullable, 0); @@ -474,21 +475,21 @@ mod tests { fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .execute_is_valid(0, &mut LEGACY_SESSION.create_execution_ctx()) + .execute_is_valid(0, &mut ctx) .unwrap() ); assert!( !fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .execute_is_valid(1, &mut LEGACY_SESSION.create_execution_ctx()) + .execute_is_valid(1, &mut ctx) .unwrap() ); assert!( fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .execute_is_valid(2, &mut LEGACY_SESSION.create_execution_ctx()) + .execute_is_valid(2, &mut ctx) .unwrap() ); } @@ -561,6 +562,7 @@ mod tests { #[test] fn test_append_nulls() { + let mut ctx = LEGACY_SESSION.create_execution_ctx(); // Elements must be nullable if we're going to append null lists let dtype: Arc = Arc::new(DType::Primitive(I32, Nullable)); let mut builder = FixedSizeListBuilder::with_capacity(dtype, 2, Nullable, 0); @@ -582,7 +584,7 @@ mod tests { !fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .execute_is_valid(i, &mut LEGACY_SESSION.create_execution_ctx()) + .execute_is_valid(i, &mut ctx) .unwrap() ); } @@ -590,6 +592,7 @@ mod tests { #[test] fn test_append_scalar_nulls() { + let mut ctx = LEGACY_SESSION.create_execution_ctx(); // Elements must be nullable if we're going to append null lists let dtype: Arc = Arc::new(DType::Primitive(I32, Nullable)); let mut builder = FixedSizeListBuilder::with_capacity(dtype, 2, Nullable, 0); @@ -612,7 +615,7 @@ mod tests { !fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .execute_is_valid(0, &mut LEGACY_SESSION.create_execution_ctx()) + .execute_is_valid(0, &mut ctx) .unwrap() ); } @@ -662,6 +665,7 @@ mod tests { #[test] fn test_extend_from_array() { + let mut ctx = LEGACY_SESSION.create_execution_ctx(); let dtype: Arc = Arc::new(I32.into()); // Create a source array. @@ -690,48 +694,49 @@ mod tests { fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .execute_is_valid(0, &mut LEGACY_SESSION.create_execution_ctx()) + .execute_is_valid(0, &mut ctx) .unwrap() ); assert!( !fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .execute_is_valid(1, &mut LEGACY_SESSION.create_execution_ctx()) + .execute_is_valid(1, &mut ctx) .unwrap() ); assert!( fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .execute_is_valid(2, &mut LEGACY_SESSION.create_execution_ctx()) + .execute_is_valid(2, &mut ctx) .unwrap() ); assert!( fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .execute_is_valid(3, &mut LEGACY_SESSION.create_execution_ctx()) + .execute_is_valid(3, &mut ctx) .unwrap() ); assert!( !fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .execute_is_valid(4, &mut LEGACY_SESSION.create_execution_ctx()) + .execute_is_valid(4, &mut ctx) .unwrap() ); assert!( fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .execute_is_valid(5, &mut LEGACY_SESSION.create_execution_ctx()) + .execute_is_valid(5, &mut ctx) .unwrap() ); } #[test] fn test_extend_degenerate_arrays() { + let mut ctx = LEGACY_SESSION.create_execution_ctx(); let dtype: Arc = Arc::new(I32.into()); // Create degenerate source arrays (size = 0). @@ -767,35 +772,35 @@ mod tests { fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .execute_is_valid(0, &mut LEGACY_SESSION.create_execution_ctx()) + .execute_is_valid(0, &mut ctx) .unwrap() ); assert!( !fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .execute_is_valid(1, &mut LEGACY_SESSION.create_execution_ctx()) + .execute_is_valid(1, &mut ctx) .unwrap() ); assert!( fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .execute_is_valid(2, &mut LEGACY_SESSION.create_execution_ctx()) + .execute_is_valid(2, &mut ctx) .unwrap() ); assert!( !fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .execute_is_valid(3, &mut LEGACY_SESSION.create_execution_ctx()) + .execute_is_valid(3, &mut ctx) .unwrap() ); assert!( fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .execute_is_valid(4, &mut LEGACY_SESSION.create_execution_ctx()) + .execute_is_valid(4, &mut ctx) .unwrap() ); } @@ -836,6 +841,7 @@ mod tests { #[test] fn test_mixed_operations() { + let mut ctx = LEGACY_SESSION.create_execution_ctx(); // Use nullable elements since we'll be appending nulls let dtype: Arc = Arc::new(DType::Primitive(I32, Nullable)); let mut builder = FixedSizeListBuilder::with_capacity(Arc::clone(&dtype), 2, Nullable, 0); @@ -879,48 +885,49 @@ mod tests { fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .execute_is_valid(0, &mut LEGACY_SESSION.create_execution_ctx()) + .execute_is_valid(0, &mut ctx) .unwrap() ); // append_value assert!( !fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .execute_is_valid(1, &mut LEGACY_SESSION.create_execution_ctx()) + .execute_is_valid(1, &mut ctx) .unwrap() ); // append_null assert!( fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .execute_is_valid(2, &mut LEGACY_SESSION.create_execution_ctx()) + .execute_is_valid(2, &mut ctx) .unwrap() ); // append_zeros assert!( fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .execute_is_valid(3, &mut LEGACY_SESSION.create_execution_ctx()) + .execute_is_valid(3, &mut ctx) .unwrap() ); // append_zeros assert!( !fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .execute_is_valid(4, &mut LEGACY_SESSION.create_execution_ctx()) + .execute_is_valid(4, &mut ctx) .unwrap() ); // append_nulls assert!( fsl_array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .execute_is_valid(5, &mut LEGACY_SESSION.create_execution_ctx()) + .execute_is_valid(5, &mut ctx) .unwrap() ); // extend_from_array } #[test] fn test_append_scalar() { + let mut ctx = LEGACY_SESSION.create_execution_ctx(); let dtype: Arc = Arc::new(I32.into()); let mut builder = FixedSizeListBuilder::with_capacity(Arc::clone(&dtype), 2, Nullable, 10); @@ -942,9 +949,7 @@ mod tests { // Check actual values using scalar_at. - let scalar0 = array - .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx()) - .unwrap(); + let scalar0 = array.execute_scalar(0, &mut ctx).unwrap(); let list0 = scalar0.as_list(); assert_eq!(list0.len(), 2); if let Some(list0_items) = list0.elements() { @@ -952,9 +957,7 @@ mod tests { assert_eq!(list0_items[1].as_primitive().typed_value::(), Some(2)); } - let scalar1 = array - .execute_scalar(1, &mut LEGACY_SESSION.create_execution_ctx()) - .unwrap(); + let scalar1 = array.execute_scalar(1, &mut ctx).unwrap(); let list1 = scalar1.as_list(); assert_eq!(list1.len(), 2); if let Some(list1_items) = list1.elements() { @@ -967,21 +970,21 @@ mod tests { array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .execute_is_valid(0, &mut LEGACY_SESSION.create_execution_ctx()) + .execute_is_valid(0, &mut ctx) .unwrap() ); assert!( array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .execute_is_valid(1, &mut LEGACY_SESSION.create_execution_ctx()) + .execute_is_valid(1, &mut ctx) .unwrap() ); assert!( !array .validity() .vortex_expect("fixed-size-list validity should be derivable") - .execute_is_valid(2, &mut LEGACY_SESSION.create_execution_ctx()) + .execute_is_valid(2, &mut ctx) .unwrap() ); diff --git a/vortex-array/src/builders/list.rs b/vortex-array/src/builders/list.rs index ad45e6b237b..c7b506f228a 100644 --- a/vortex-array/src/builders/list.rs +++ b/vortex-array/src/builders/list.rs @@ -579,11 +579,11 @@ mod tests { let array = builder.finish_into_list(); assert_eq!(array.len(), 3); + let mut ctx = LEGACY_SESSION.create_execution_ctx(); + // Check actual values using scalar_at. - let scalar0 = array - .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx()) - .unwrap(); + let scalar0 = array.execute_scalar(0, &mut ctx).unwrap(); let list0 = scalar0.as_list(); assert_eq!(list0.len(), 2); if let Some(list0_items) = list0.elements() { @@ -591,9 +591,7 @@ mod tests { assert_eq!(list0_items[1].as_primitive().typed_value::(), Some(2)); } - let scalar1 = array - .execute_scalar(1, &mut LEGACY_SESSION.create_execution_ctx()) - .unwrap(); + let scalar1 = array.execute_scalar(1, &mut ctx).unwrap(); let list1 = scalar1.as_list(); assert_eq!(list1.len(), 3); if let Some(list1_items) = list1.elements() { @@ -602,9 +600,7 @@ mod tests { assert_eq!(list1_items[2].as_primitive().typed_value::(), Some(5)); } - let scalar2 = array - .execute_scalar(2, &mut LEGACY_SESSION.create_execution_ctx()) - .unwrap(); + let scalar2 = array.execute_scalar(2, &mut ctx).unwrap(); let list2 = scalar2.as_list(); assert!(list2.is_null()); // This should be null. @@ -613,21 +609,21 @@ mod tests { array .validity() .vortex_expect("list validity should be derivable") - .execute_is_valid(0, &mut LEGACY_SESSION.create_execution_ctx()) + .execute_is_valid(0, &mut ctx) .unwrap() ); assert!( array .validity() .vortex_expect("list validity should be derivable") - .execute_is_valid(1, &mut LEGACY_SESSION.create_execution_ctx()) + .execute_is_valid(1, &mut ctx) .unwrap() ); assert!( !array .validity() .vortex_expect("list validity should be derivable") - .execute_is_valid(2, &mut LEGACY_SESSION.create_execution_ctx()) + .execute_is_valid(2, &mut ctx) .unwrap() ); diff --git a/vortex-array/src/builders/listview.rs b/vortex-array/src/builders/listview.rs index 4f18e379451..945fc48d630 100644 --- a/vortex-array/src/builders/listview.rs +++ b/vortex-array/src/builders/listview.rs @@ -451,6 +451,7 @@ mod tests { #[test] fn test_basic_append_and_nulls() { + let mut ctx = LEGACY_SESSION.create_execution_ctx(); let dtype: Arc = Arc::new(I32.into()); let mut builder = ListViewBuilder::::with_capacity(Arc::clone(&dtype), Nullable, 0, 0); @@ -499,7 +500,7 @@ mod tests { !listview .validity() .vortex_expect("listview validity should be derivable") - .execute_is_valid(2, &mut LEGACY_SESSION.create_execution_ctx()) + .execute_is_valid(2, &mut ctx) .unwrap() ); @@ -581,6 +582,7 @@ mod tests { #[test] fn test_builder_trait_methods() { + let mut ctx = LEGACY_SESSION.create_execution_ctx(); let dtype: Arc = Arc::new(I32.into()); let mut builder = ListViewBuilder::::with_capacity(Arc::clone(&dtype), Nullable, 0, 0); @@ -612,14 +614,14 @@ mod tests { !listview .validity() .vortex_expect("listview validity should be derivable") - .execute_is_valid(2, &mut LEGACY_SESSION.create_execution_ctx()) + .execute_is_valid(2, &mut ctx) .unwrap() ); assert!( !listview .validity() .vortex_expect("listview validity should be derivable") - .execute_is_valid(3, &mut LEGACY_SESSION.create_execution_ctx()) + .execute_is_valid(3, &mut ctx) .unwrap() ); @@ -632,6 +634,7 @@ mod tests { #[test] fn test_extend_from_array() { + let mut ctx = LEGACY_SESSION.create_execution_ctx(); let dtype: Arc = Arc::new(I32.into()); // Create a source ListArray. @@ -681,7 +684,7 @@ mod tests { !listview .validity() .vortex_expect("listview validity should be derivable") - .execute_is_valid(2, &mut LEGACY_SESSION.create_execution_ctx()) + .execute_is_valid(2, &mut ctx) .unwrap() ); @@ -694,6 +697,7 @@ mod tests { #[test] fn test_extend_from_array_overlapping_listview() { + let mut ctx = LEGACY_SESSION.create_execution_ctx(); let dtype: Arc = Arc::new(I32.into()); // Non-ZCTL source: @@ -726,7 +730,7 @@ mod tests { !listview .validity() .vortex_expect("listview validity should be derivable") - .execute_is_valid(1, &mut LEGACY_SESSION.create_execution_ctx()) + .execute_is_valid(1, &mut ctx) .unwrap() ); assert_eq!(listview.list_elements_at(1).unwrap().len(), 0); diff --git a/vortex-array/src/builders/primitive.rs b/vortex-array/src/builders/primitive.rs index 268c27ab4d6..c4b5abfb083 100644 --- a/vortex-array/src/builders/primitive.rs +++ b/vortex-array/src/builders/primitive.rs @@ -670,25 +670,26 @@ mod tests { // values[2] might be any value since it's null. // Check validity - first two should be valid, third should be null. + let mut ctx = LEGACY_SESSION.create_execution_ctx(); assert!( array .validity() .vortex_expect("primitive validity should be derivable") - .execute_is_valid(0, &mut LEGACY_SESSION.create_execution_ctx()) + .execute_is_valid(0, &mut ctx) .unwrap() ); assert!( array .validity() .vortex_expect("primitive validity should be derivable") - .execute_is_valid(1, &mut LEGACY_SESSION.create_execution_ctx()) + .execute_is_valid(1, &mut ctx) .unwrap() ); assert!( !array .validity() .vortex_expect("primitive validity should be derivable") - .execute_is_valid(2, &mut LEGACY_SESSION.create_execution_ctx()) + .execute_is_valid(2, &mut ctx) .unwrap() ); diff --git a/vortex-array/src/builders/tests.rs b/vortex-array/src/builders/tests.rs index 4d59fe51957..b725a4636b5 100644 --- a/vortex-array/src/builders/tests.rs +++ b/vortex-array/src/builders/tests.rs @@ -855,9 +855,10 @@ fn test_set_validity_overrides_validity( builder.set_validity(mask); let validity = builder.finish().validity()?; + let mut ctx = LEGACY_SESSION.create_execution_ctx(); for (i, &valid) in expected.iter().enumerate() { assert_eq!( - validity.execute_is_valid(i, &mut LEGACY_SESSION.create_execution_ctx())?, + validity.execute_is_valid(i, &mut ctx)?, valid, "validity mismatch at index {i}" ); @@ -876,9 +877,10 @@ fn test_set_validity_noop_when_non_nullable() -> VortexResult<()> { builder.set_validity(Mask::new_false(4)); let validity = builder.finish().validity()?; + let mut ctx = LEGACY_SESSION.create_execution_ctx(); for i in 0..4 { assert!( - validity.execute_is_valid(i, &mut LEGACY_SESSION.create_execution_ctx())?, + validity.execute_is_valid(i, &mut ctx)?, "index {i} should remain valid" ); } From dbb16c12ceeebe262a0b677cf2bbf59c8a4dd2c5 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 5 Jun 2026 16:30:04 +0000 Subject: [PATCH 4/7] Avoid constructing an ExecutionCtx for non-null validity during validation `VarBinViewArray`/`VarBinArray` validation only needs an execution context when validity is array-backed. Previously `is_null` short-circuited for the `NonNullable`/`AllValid` variants without ever creating a context, but the ctx-threaded version constructed a `LEGACY_SESSION` context unconditionally, adding overhead to the common non-null construction path (observed as a regression in the `varbinview_zip` benchmarks). Construct the context lazily, only for `Validity::Array`, and resolve the constant variants without one. The UTF-8 validation loop is extracted into `VarBinArray::validate_utf8` with an `is_null_at` helper to keep the function within the cognitive-complexity budget. Signed-off-by: Joe Isaacs --- vortex-array/src/arrays/varbin/array.rs | 68 ++++++++++++++------- vortex-array/src/arrays/varbinview/array.rs | 11 +++- 2 files changed, 54 insertions(+), 25 deletions(-) diff --git a/vortex-array/src/arrays/varbin/array.rs b/vortex-array/src/arrays/varbin/array.rs index c357abd86e3..c68829804a6 100644 --- a/vortex-array/src/arrays/varbin/array.rs +++ b/vortex-array/src/arrays/varbin/array.rs @@ -14,6 +14,7 @@ use vortex_error::vortex_err; use crate::ArrayRef; use crate::ArraySlots; +use crate::ExecutionCtx; use crate::LEGACY_SESSION; #[expect(deprecated)] use crate::ToCanonical as _; @@ -244,34 +245,55 @@ impl VarBinData { && matches!(dtype, DType::Utf8(_)) && let Some(bytes) = bytes.as_host_opt() { - #[expect(deprecated)] - let primitive_offsets = offsets.to_primitive(); - match_each_integer_ptype!(primitive_offsets.dtype().as_ptype(), |O| { - let offsets_slice = primitive_offsets.as_slice::(); - let mut ctx = LEGACY_SESSION.create_execution_ctx(); - for (i, (start, end)) in offsets_slice - .windows(2) - .map(|o| (o[0].as_(), o[1].as_())) - .enumerate() - { - if validity.execute_is_null(i, &mut ctx)? { - continue; - } - - let string_bytes = &bytes.as_ref()[start..end]; - simdutf8::basic::from_utf8(string_bytes).map_err(|_| { - #[expect(clippy::unwrap_used)] - // run validation using `compat` package to get more detailed error message - let err = simdutf8::compat::from_utf8(string_bytes).unwrap_err(); - vortex_err!("invalid utf-8: {err} at index {i}") - })?; - } - }); + Self::validate_utf8(offsets, bytes.as_ref(), validity)?; } Ok(()) } + /// Validates that every non-null value is valid UTF-8. + fn validate_utf8(offsets: &ArrayRef, bytes: &[u8], validity: &Validity) -> VortexResult<()> { + #[expect(deprecated)] + let primitive_offsets = offsets.to_primitive(); + // Only array-backed validity needs an execution context; the constant variants resolve + // null-ness without one, so avoid constructing a context otherwise. + let mut ctx = + matches!(validity, Validity::Array(_)).then(|| LEGACY_SESSION.create_execution_ctx()); + match_each_integer_ptype!(primitive_offsets.dtype().as_ptype(), |O| { + let offsets_slice = primitive_offsets.as_slice::(); + for (i, (start, end)) in offsets_slice + .windows(2) + .map(|o| (o[0].as_(), o[1].as_())) + .enumerate() + { + if Self::is_null_at(validity, i, ctx.as_mut())? { + continue; + } + + let string_bytes = &bytes[start..end]; + simdutf8::basic::from_utf8(string_bytes).map_err(|_| { + #[expect(clippy::unwrap_used)] + // run validation using `compat` package to get more detailed error message + let err = simdutf8::compat::from_utf8(string_bytes).unwrap_err(); + vortex_err!("invalid utf-8: {err} at index {i}") + })?; + } + }); + Ok(()) + } + + /// Resolves null-ness at `index`, using `ctx` only for array-backed validity. + fn is_null_at( + validity: &Validity, + index: usize, + ctx: Option<&mut ExecutionCtx>, + ) -> VortexResult { + match ctx { + Some(ctx) => validity.execute_is_null(index, ctx), + None => Ok(matches!(validity, Validity::AllInvalid)), + } + } + /// Access the value bytes child buffer /// /// # Note diff --git a/vortex-array/src/arrays/varbinview/array.rs b/vortex-array/src/arrays/varbinview/array.rs index 713be8b3ab6..8df89984dc8 100644 --- a/vortex-array/src/arrays/varbinview/array.rs +++ b/vortex-array/src/arrays/varbinview/array.rs @@ -318,9 +318,16 @@ impl VarBinViewData { where F: Fn(&[u8]) -> bool, { - let mut ctx = LEGACY_SESSION.create_execution_ctx(); + // Only array-backed validity needs an execution context; the constant variants resolve + // null-ness without one, so avoid constructing a context in the common (non-null) case. + let mut ctx = + matches!(validity, Validity::Array(_)).then(|| LEGACY_SESSION.create_execution_ctx()); for (idx, &view) in views.iter().enumerate() { - if validity.execute_is_null(idx, &mut ctx)? { + let is_null = match ctx.as_mut() { + Some(ctx) => validity.execute_is_null(idx, ctx)?, + None => matches!(validity, Validity::AllInvalid), + }; + if is_null { continue; } From 9f8e794d545e7cd73ca25a433e7f9647e9b006cc Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Mon, 8 Jun 2026 14:08:22 +0100 Subject: [PATCH 5/7] wip Signed-off-by: Joe Isaacs --- vortex-array/src/arrays/varbinview/array.rs | 38 ++++++++++++++------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/vortex-array/src/arrays/varbinview/array.rs b/vortex-array/src/arrays/varbinview/array.rs index 8df89984dc8..3ba90409939 100644 --- a/vortex-array/src/arrays/varbinview/array.rs +++ b/vortex-array/src/arrays/varbinview/array.rs @@ -318,19 +318,7 @@ impl VarBinViewData { where F: Fn(&[u8]) -> bool, { - // Only array-backed validity needs an execution context; the constant variants resolve - // null-ness without one, so avoid constructing a context in the common (non-null) case. - let mut ctx = - matches!(validity, Validity::Array(_)).then(|| LEGACY_SESSION.create_execution_ctx()); - for (idx, &view) in views.iter().enumerate() { - let is_null = match ctx.as_mut() { - Some(ctx) => validity.execute_is_null(idx, ctx)?, - None => matches!(validity, Validity::AllInvalid), - }; - if is_null { - continue; - } - + let validate_view = |idx: usize, view: &BinaryView| -> VortexResult<()> { if view.is_inlined() { // Validate the inline bytestring let bytes = &view.as_inlined().data[..view.len() as usize]; @@ -374,6 +362,30 @@ impl VarBinViewData { InvalidArgument: "view at index {idx}: outlined bytes fails utf-8 validation" ); } + Ok(()) + }; + + match validity { + // Array-backed validity is the only variant that needs an execution context: execute it + // into a mask once and zip it with the views, validating only the valid (non-null) + // entries. + Validity::Array(_) => { + let mut ctx = LEGACY_SESSION.create_execution_ctx(); + let mask = validity.execute_mask(views.len(), &mut ctx)?; + for ((idx, view), valid) in views.iter().enumerate().zip(mask.iter()) { + if valid { + validate_view(idx, view)?; + } + } + } + // Every entry is null, so there is nothing to validate. + Validity::AllInvalid => {} + // No nulls: validate every view. + Validity::NonNullable | Validity::AllValid => { + for (idx, view) in views.iter().enumerate() { + validate_view(idx, view)?; + } + } } Ok(()) From 71aedb31dd119d6eb19b5de0c72da715d540a0cc Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Mon, 8 Jun 2026 14:25:59 +0100 Subject: [PATCH 6/7] wip Signed-off-by: Joe Isaacs --- vortex-array/src/arrays/varbin/array.rs | 103 +++++++++++------------- 1 file changed, 47 insertions(+), 56 deletions(-) diff --git a/vortex-array/src/arrays/varbin/array.rs b/vortex-array/src/arrays/varbin/array.rs index c68829804a6..4e21cbe934e 100644 --- a/vortex-array/src/arrays/varbin/array.rs +++ b/vortex-array/src/arrays/varbin/array.rs @@ -6,6 +6,7 @@ use std::fmt::Formatter; use num_traits::AsPrimitive; use smallvec::smallvec; +use vortex_array::arrays::PrimitiveArray; use vortex_buffer::ByteBuffer; use vortex_error::VortexExpect; use vortex_error::VortexResult; @@ -14,10 +15,7 @@ use vortex_error::vortex_err; use crate::ArrayRef; use crate::ArraySlots; -use crate::ExecutionCtx; use crate::LEGACY_SESSION; -#[expect(deprecated)] -use crate::ToCanonical as _; use crate::VortexSessionExecute; use crate::array::Array; use crate::array::ArrayParts; @@ -209,26 +207,6 @@ impl VarBinData { InvalidArgument: "Offsets must have at least one element" ); - // Skip host-only validation when offsets/bytes are not host-resident. - if offsets.is_host() && bytes.is_on_host() { - let last_offset = offsets - .execute_scalar( - offsets.len() - 1, - &mut LEGACY_SESSION.create_execution_ctx(), - )? - .as_primitive() - .as_::() - .ok_or_else( - || vortex_err!(InvalidArgument: "Last offset must be convertible to usize"), - )?; - vortex_ensure!( - last_offset <= bytes.len(), - InvalidArgument: "Last offset {} exceeds bytes length {}", - last_offset, - bytes.len() - ); - } - // Check validity length if let Some(validity_len) = validity.maybe_len() { vortex_ensure!( @@ -253,47 +231,60 @@ impl VarBinData { /// Validates that every non-null value is valid UTF-8. fn validate_utf8(offsets: &ArrayRef, bytes: &[u8], validity: &Validity) -> VortexResult<()> { - #[expect(deprecated)] - let primitive_offsets = offsets.to_primitive(); - // Only array-backed validity needs an execution context; the constant variants resolve - // null-ness without one, so avoid constructing a context otherwise. - let mut ctx = - matches!(validity, Validity::Array(_)).then(|| LEGACY_SESSION.create_execution_ctx()); + let validate_at = |i: usize, start: usize, end: usize| -> VortexResult<()> { + let string_bytes = &bytes[start..end]; + simdutf8::basic::from_utf8(string_bytes).map_err(|_| { + #[expect(clippy::unwrap_used)] + // run validation using `compat` package to get more detailed error message + let err = simdutf8::compat::from_utf8(string_bytes).unwrap_err(); + vortex_err!("invalid utf-8: {err} at index {i}") + })?; + Ok(()) + }; + + let mut ctx = LEGACY_SESSION.create_execution_ctx(); + // TODO(joe): update the created VarBin with this decompressed Array. + let primitive_offsets = offsets.clone().execute::(&mut ctx)?; + match_each_integer_ptype!(primitive_offsets.dtype().as_ptype(), |O| { let offsets_slice = primitive_offsets.as_slice::(); - for (i, (start, end)) in offsets_slice - .windows(2) - .map(|o| (o[0].as_(), o[1].as_())) - .enumerate() - { - if Self::is_null_at(validity, i, ctx.as_mut())? { - continue; - } + let windows = offsets_slice.windows(2).map(|o| (o[0].as_(), o[1].as_())); - let string_bytes = &bytes[start..end]; - simdutf8::basic::from_utf8(string_bytes).map_err(|_| { - #[expect(clippy::unwrap_used)] - // run validation using `compat` package to get more detailed error message - let err = simdutf8::compat::from_utf8(string_bytes).unwrap_err(); - vortex_err!("invalid utf-8: {err} at index {i}") - })?; + let last_offset = usize::try_from(offsets_slice[offsets.len() - 1]) + .vortex_expect("must fit into usize"); + vortex_ensure!( + last_offset <= bytes.len(), + InvalidArgument: "Last offset {} exceeds bytes length {}", + last_offset, + bytes.len() + ); + + match validity { + // Array-backed validity is the only variant that needs an execution context: + // execute it into a mask once and zip it with the offset windows, validating only + // the valid (non-null) entries. + Validity::Array(_) => { + let mask = + validity.execute_mask(offsets_slice.len().saturating_sub(1), &mut ctx)?; + for (i, ((start, end), valid)) in windows.zip(mask.iter()).enumerate() { + if valid { + validate_at(i, start, end)?; + } + } + } + // Every entry is null, so there is nothing to validate. + Validity::AllInvalid => {} + // No nulls: validate every entry. + Validity::NonNullable | Validity::AllValid => { + for (i, (start, end)) in windows.enumerate() { + validate_at(i, start, end)?; + } + } } }); Ok(()) } - /// Resolves null-ness at `index`, using `ctx` only for array-backed validity. - fn is_null_at( - validity: &Validity, - index: usize, - ctx: Option<&mut ExecutionCtx>, - ) -> VortexResult { - match ctx { - Some(ctx) => validity.execute_is_null(index, ctx), - None => Ok(matches!(validity, Validity::AllInvalid)), - } - } - /// Access the value bytes child buffer /// /// # Note From 803380cdc3accc6da0da5bfe8535ca5bf19a4849 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 8 Jun 2026 17:56:01 +0000 Subject: [PATCH 7/7] Fix clippy lints in VarBin UTF-8 validation The reworked `validate_utf8` tripped two clippy lints: - `cognitive_complexity`: the per-validity-variant `match` lived inside the `match_each_integer_ptype!` macro, multiplying its complexity across every integer arm. Resolve the validity mask once before the dtype dispatch and keep a single loop inside the macro. - `unnecessary_fallible_conversions`: replace `usize::try_from(..)` on the offset with the infallible `AsPrimitive::as_()` already used for the window bounds. Signed-off-by: Joe Isaacs --- vortex-array/src/arrays/varbin/array.rs | 43 ++++++++++++------------- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/vortex-array/src/arrays/varbin/array.rs b/vortex-array/src/arrays/varbin/array.rs index 4e21cbe934e..4d435471ce0 100644 --- a/vortex-array/src/arrays/varbin/array.rs +++ b/vortex-array/src/arrays/varbin/array.rs @@ -246,12 +246,21 @@ impl VarBinData { // TODO(joe): update the created VarBin with this decompressed Array. let primitive_offsets = offsets.clone().execute::(&mut ctx)?; + // Array-backed validity is the only variant that needs an execution context: execute it into + // a mask once. The constant variants resolve null-ness without one. Resolving this before + // the per-type dispatch keeps the dtype loop simple. + let mask = match validity { + Validity::Array(_) => { + Some(validity.execute_mask(primitive_offsets.len().saturating_sub(1), &mut ctx)?) + } + _ => None, + }; + let all_invalid = matches!(validity, Validity::AllInvalid); + match_each_integer_ptype!(primitive_offsets.dtype().as_ptype(), |O| { let offsets_slice = primitive_offsets.as_slice::(); - let windows = offsets_slice.windows(2).map(|o| (o[0].as_(), o[1].as_())); - let last_offset = usize::try_from(offsets_slice[offsets.len() - 1]) - .vortex_expect("must fit into usize"); + let last_offset: usize = offsets_slice[offsets_slice.len() - 1].as_(); vortex_ensure!( last_offset <= bytes.len(), InvalidArgument: "Last offset {} exceeds bytes length {}", @@ -259,26 +268,14 @@ impl VarBinData { bytes.len() ); - match validity { - // Array-backed validity is the only variant that needs an execution context: - // execute it into a mask once and zip it with the offset windows, validating only - // the valid (non-null) entries. - Validity::Array(_) => { - let mask = - validity.execute_mask(offsets_slice.len().saturating_sub(1), &mut ctx)?; - for (i, ((start, end), valid)) in windows.zip(mask.iter()).enumerate() { - if valid { - validate_at(i, start, end)?; - } - } - } - // Every entry is null, so there is nothing to validate. - Validity::AllInvalid => {} - // No nulls: validate every entry. - Validity::NonNullable | Validity::AllValid => { - for (i, (start, end)) in windows.enumerate() { - validate_at(i, start, end)?; - } + for (i, (start, end)) in offsets_slice + .windows(2) + .map(|o| (o[0].as_(), o[1].as_())) + .enumerate() + { + let valid = mask.as_ref().map_or(!all_invalid, |mask| mask.value(i)); + if valid { + validate_at(i, start, end)?; } } });