diff --git a/vortex-array/src/arrays/fixed_size_list/vtable/operations.rs b/vortex-array/src/arrays/fixed_size_list/vtable/operations.rs index 9f4cf02fbf8..fda30f708f3 100644 --- a/vortex-array/src/arrays/fixed_size_list/vtable/operations.rs +++ b/vortex-array/src/arrays/fixed_size_list/vtable/operations.rs @@ -3,7 +3,9 @@ use vortex_error::VortexResult; +use crate::Canonical; use crate::ExecutionCtx; +use crate::IntoArray; use crate::array::ArrayView; use crate::array::OperationsVTable; use crate::arrays::FixedSizeList; @@ -17,7 +19,12 @@ impl OperationsVTable for FixedSizeList { ctx: &mut ExecutionCtx, ) -> VortexResult { // By the preconditions we know that the list scalar is not null. - let list = array.fixed_size_list_elements_at(index)?; + // Canonicalize the element slice once so the per-element `execute_scalar` calls below do + // not re-decode the underlying encoding for every element. + let list = array + .fixed_size_list_elements_at(index)? + .execute::(ctx)? + .into_array(); let children_elements: Vec = (0..list.len()) .map(|i| list.execute_scalar(i, ctx)) .collect::>()?; diff --git a/vortex-array/src/arrays/list/vtable/operations.rs b/vortex-array/src/arrays/list/vtable/operations.rs index 02c686cd1f1..99e9b972124 100644 --- a/vortex-array/src/arrays/list/vtable/operations.rs +++ b/vortex-array/src/arrays/list/vtable/operations.rs @@ -5,7 +5,9 @@ use std::sync::Arc; use vortex_error::VortexResult; +use crate::Canonical; use crate::ExecutionCtx; +use crate::IntoArray; use crate::array::ArrayView; use crate::array::OperationsVTable; use crate::arrays::List; @@ -19,7 +21,12 @@ impl OperationsVTable for List { ctx: &mut ExecutionCtx, ) -> VortexResult { // By the preconditions we know that the list scalar is not null. - let elems = array.list_elements_at(index)?; + // Canonicalize the element slice once so the per-element `execute_scalar` calls below do + // not re-decode the underlying encoding for every element. + let elems = array + .list_elements_at(index)? + .execute::(ctx)? + .into_array(); let scalars: Vec = (0..elems.len()) .map(|i| elems.execute_scalar(i, ctx)) .collect::>()?; diff --git a/vortex-array/src/arrays/listview/rebuild.rs b/vortex-array/src/arrays/listview/rebuild.rs index 2bcb4ef0c1f..27949746819 100644 --- a/vortex-array/src/arrays/listview/rebuild.rs +++ b/vortex-array/src/arrays/listview/rebuild.rs @@ -209,9 +209,16 @@ impl ListViewArray { let mut new_sizes = BufferMut::::with_capacity(len); let mut take_indices = BufferMut::::with_capacity(self.elements().len()); + // Resolve validity to a mask once instead of probing it per row: `Validity::is_valid` + // executes a scalar (and spins up an execution context) on every call for array-backed + // validity, which is O(len) work repeated `len` times. + let validity = self + .validity()? + .execute_mask(len, &mut LEGACY_SESSION.create_execution_ctx())?; + let mut n_elements = NewOffset::zero(); for index in 0..len { - if !self.validity()?.is_valid(index)? { + if !validity.value(index) { new_offsets.push(n_elements); new_sizes.push(S::zero()); continue; @@ -292,9 +299,14 @@ impl ListViewArray { let mut new_elements_builder = builder_with_capacity(element_dtype.as_ref(), self.elements().len()); + // Resolve validity to a mask once instead of probing it per row (see `rebuild_with_take`). + let validity = self + .validity()? + .execute_mask(len, &mut LEGACY_SESSION.create_execution_ctx())?; + let mut n_elements = NewOffset::zero(); for index in 0..len { - if !self.validity()?.is_valid(index)? { + if !validity.value(index) { // For NULL lists, place them after the previous item's data to maintain the // no-overlap invariant for zero-copy to `ListArray` arrays. new_offsets.push(n_elements); diff --git a/vortex-array/src/arrays/listview/vtable/operations.rs b/vortex-array/src/arrays/listview/vtable/operations.rs index f0cb9539cc3..74dfbfca111 100644 --- a/vortex-array/src/arrays/listview/vtable/operations.rs +++ b/vortex-array/src/arrays/listview/vtable/operations.rs @@ -5,7 +5,9 @@ use std::sync::Arc; use vortex_error::VortexResult; +use crate::Canonical; use crate::ExecutionCtx; +use crate::IntoArray; use crate::array::ArrayView; use crate::array::OperationsVTable; use crate::arrays::ListView; @@ -19,7 +21,12 @@ impl OperationsVTable for ListView { ctx: &mut ExecutionCtx, ) -> VortexResult { // By the preconditions we know that the list scalar is not null. - let list = array.list_elements_at(index)?; + // Canonicalize the element slice once so the per-element `execute_scalar` calls below do + // not re-decode the underlying encoding for every element. + let list = array + .list_elements_at(index)? + .execute::(ctx)? + .into_array(); let children: Vec = (0..list.len()) .map(|i| list.execute_scalar(i, ctx)) .collect::>()?; diff --git a/vortex-array/src/arrays/varbin/compute/filter.rs b/vortex-array/src/arrays/varbin/compute/filter.rs index b3e1aa87a4f..301ece43e45 100644 --- a/vortex-array/src/arrays/varbin/compute/filter.rs +++ b/vortex-array/src/arrays/varbin/compute/filter.rs @@ -24,7 +24,6 @@ use crate::arrays::varbin::builder::VarBinBuilder; use crate::dtype::DType; use crate::dtype::IntegerPType; use crate::match_each_integer_ptype; -use crate::validity::Validity; impl FilterKernel for VarBin { fn filter( @@ -170,7 +169,10 @@ fn filter_select_var_bin_by_index( offsets.as_slice::(), values.bytes().as_slice(), mask_indices, - values.validity()?, + values + .varbin_validity() + .execute_mask(values.as_ref().len(), ctx) + .vortex_expect("Failed to compute validity mask"), selection_count, ) }) @@ -181,24 +183,37 @@ fn filter_select_var_bin_by_index_primitive_offset( offsets: &[O], data: &[u8], mask_indices: &[usize], - // TODO(ngates): pass LogicalValidity instead - validity: Validity, + logical_validity: Mask, selection_count: usize, ) -> VortexResult { + let value_at = |idx: usize| -> VortexResult<&[u8]> { + let start = offsets[idx] + .to_usize() + .ok_or_else(|| vortex_err!("Failed to convert offset to usize: {}", offsets[idx]))?; + let end = offsets[idx + 1].to_usize().ok_or_else(|| { + vortex_err!("Failed to convert offset to usize: {}", offsets[idx + 1]) + })?; + Ok(&data[start..end]) + }; + let mut builder = VarBinBuilder::::with_capacity(selection_count); - for idx in mask_indices.iter().copied() { - if validity.is_valid(idx)? { - let (start, end) = ( - offsets[idx].to_usize().ok_or_else(|| { - vortex_err!("Failed to convert offset to usize: {}", offsets[idx]) - })?, - offsets[idx + 1].to_usize().ok_or_else(|| { - vortex_err!("Failed to convert offset to usize: {}", offsets[idx + 1]) - })?, - ); - builder.append_value(&data[start..end]) - } else { - builder.append_null() + match logical_validity.bit_buffer() { + AllOr::All => { + for idx in mask_indices.iter().copied() { + builder.append_value(value_at(idx)?) + } + } + AllOr::None => { + builder.append_n_nulls(selection_count); + } + AllOr::Some(validity) => { + for idx in mask_indices.iter().copied() { + if validity.value(idx) { + builder.append_value(value_at(idx)?) + } else { + builder.append_null() + } + } } } Ok(builder.finish(dtype)) diff --git a/vortex-array/src/arrays/variant/vtable/kernel.rs b/vortex-array/src/arrays/variant/vtable/kernel.rs index a24eb81fd5d..f4870e9498f 100644 --- a/vortex-array/src/arrays/variant/vtable/kernel.rs +++ b/vortex-array/src/arrays/variant/vtable/kernel.rs @@ -6,6 +6,7 @@ use vortex_error::VortexResult; use super::merge_typed_scalar_as_variant; use crate::ArrayRef; +use crate::Canonical; use crate::ExecutionCtx; use crate::IntoArray; use crate::array::Array; @@ -180,6 +181,13 @@ fn merge_typed_as_variant( let dtype = DType::Variant(Nullability::Nullable); // TODO(variant): replace this with a Variant builder once one exists. // Chunked canonicalizes to VariantArray, so this row-wise fallback is safe. + // Canonicalize the inputs once up front so the per-row `execute_scalar` calls below do not + // re-decode the underlying encodings on every row. + let typed = typed.execute::(ctx)?.into_array(); + let fallback = match fallback { + Some(fallback) => Some(fallback.execute::(ctx)?.into_array()), + None => None, + }; let mut chunks = Vec::with_capacity(typed.len()); for idx in 0..typed.len() { diff --git a/vortex-array/src/display/mod.rs b/vortex-array/src/display/mod.rs index 3d121cf80b9..103a3202aae 100644 --- a/vortex-array/src/display/mod.rs +++ b/vortex-array/src/display/mod.rs @@ -579,19 +579,23 @@ impl ArrayRef { } #[cfg(feature = "table-display")] DisplayOptions::TableDisplay => { + use vortex_mask::Mask; + use crate::arrays::struct_::StructArrayExt; #[expect(deprecated)] use crate::canonical::ToCanonical as _; use crate::dtype::DType; let mut builder = tabled::builder::Builder::default(); + // Reuse a single execution context across all per-row accesses below. + let mut ctx = LEGACY_SESSION.create_execution_ctx(); // Special logic for struct arrays. let DType::Struct(sf, _) = self.dtype() else { // For non-struct arrays, simply display a single column table without header. for row_idx in 0..self.len() { let value = self - .execute_scalar(row_idx, &mut LEGACY_SESSION.create_execution_ctx()) + .execute_scalar(row_idx, &mut ctx) .map_or_else(|e| format!(""), |s| s.to_string()); builder.push_record([value]); } @@ -606,18 +610,21 @@ impl ArrayRef { let struct_ = self.to_struct(); builder.push_record(sf.names().iter().map(|name| name.to_string())); + // Resolve validity to a mask once instead of probing it per row. + let validity = self + .validity() + .and_then(|v| v.execute_mask(self.len(), &mut ctx)) + .unwrap_or_else(|_| Mask::new_false(self.len())); + for row_idx in 0..self.len() { - if !self - .is_valid(row_idx, &mut LEGACY_SESSION.create_execution_ctx()) - .unwrap_or(false) - { + if !validity.value(row_idx) { let null_row = vec!["null".to_string(); sf.names().len()]; builder.push_record(null_row); } else { let mut row = Vec::new(); for field_array in StructArrayExt::iter_unmasked_fields(&struct_) { let value = field_array - .execute_scalar(row_idx, &mut LEGACY_SESSION.create_execution_ctx()) + .execute_scalar(row_idx, &mut ctx) .map_or_else(|e| format!(""), |s| s.to_string()); row.push(value); } @@ -634,10 +641,7 @@ impl ArrayRef { } for row_idx in 0..self.len() { - if !self - .is_valid(row_idx, &mut LEGACY_SESSION.create_execution_ctx()) - .unwrap_or(false) - { + if !validity.value(row_idx) { table.modify( (1 + row_idx, 0), tabled::settings::Span::column(sf.names().len() as isize), diff --git a/vortex-tensor/src/scalar_fns/l2_denorm.rs b/vortex-tensor/src/scalar_fns/l2_denorm.rs index c91a614ddff..a814af33f33 100644 --- a/vortex-tensor/src/scalar_fns/l2_denorm.rs +++ b/vortex-tensor/src/scalar_fns/l2_denorm.rs @@ -441,6 +441,10 @@ pub fn normalize_as_l2_denorm( let normalized_dtype = input.dtype().as_nonnullable(); let flat = extract_flat_elements(input.storage_array(), tensor_flat_size, ctx)?; + // Resolve validity to a mask once rather than probing it per row (each `Validity::is_valid` + // executes a scalar for array-backed validity). + let norms_valid = norms_validity.execute_mask(row_count, ctx)?; + // Normalize all of the vectors. let normalized = match_each_float_ptype!(flat.ptype(), |T| { let norm_values = primitive_norms.as_slice::(); @@ -448,7 +452,7 @@ pub fn normalize_as_l2_denorm( let total_elements = row_count * tensor_flat_size; let mut elements = BufferMut::::with_capacity(total_elements); for i in 0..row_count { - let is_valid = norms_validity.is_valid(i)?; + let is_valid = norms_valid.value(i); let norm = norm_values[i]; // SAFETY: We allocated `row_count * tensor_flat_size` capacity and push exactly @@ -637,12 +641,14 @@ pub fn validate_l2_normalized_rows_against_norms( Some(norms) => normalized_validity.and(norms.validity()?)?, None => normalized_validity, }; + // Resolve validity to a mask once rather than probing it per row. + let combined_valid = combined_validity.execute_mask(row_count, ctx)?; match_each_float_ptype!(element_ptype, |T| { let stored_norms = norms.as_ref().map(|norms| norms.as_slice::()); for i in 0..row_count { - if !combined_validity.is_valid(i)? { + if !combined_valid.value(i) { continue; } diff --git a/vortex-tui/src/browse/ui/layouts.rs b/vortex-tui/src/browse/ui/layouts.rs index c3245446197..14c9531645c 100644 --- a/vortex-tui/src/browse/ui/layouts.rs +++ b/vortex-tui/src/browse/ui/layouts.rs @@ -27,6 +27,8 @@ use ratatui::widgets::Table; use ratatui::widgets::Widget; use ratatui::widgets::Wrap; use vortex::array::ArrayRef; +use vortex::array::Canonical; +use vortex::array::IntoArray; use vortex::array::VortexSessionExecute; use vortex::array::arrays::StructArray; use vortex::array::arrays::struct_::StructArrayExt; @@ -161,7 +163,19 @@ fn render_array(app: &AppState, area: Rect, buf: &mut Buffer, is_stats_table: bo assert_eq!(app.cursor.dtype(), array.dtype()); - let field_arrays: Vec = struct_array.unmasked_fields().to_vec(); + // Canonicalize each field once so the per-row `execute_scalar` calls below do not + // re-decode the field encodings for every row. + let field_arrays: Vec = struct_array + .unmasked_fields() + .iter() + .map(|field| { + field + .clone() + .execute::(&mut ctx) + .vortex_expect("failed to canonicalize field array") + .into_array() + }) + .collect(); // TODO: trim the number of displayed rows and allow paging through column stats. let mut rows = Vec::with_capacity(array.len());