Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion vortex-array/src/arrays/fixed_size_list/vtable/operations.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@

use vortex_error::VortexResult;

use crate::Canonical;
use crate::ExecutionCtx;
use crate::IntoArray;
use crate::array::ArrayView;
use crate::array::OperationsVTable;
use crate::arrays::FixedSizeList;
Expand All @@ -17,7 +19,12 @@ impl OperationsVTable<FixedSizeList> for FixedSizeList {
ctx: &mut ExecutionCtx,
) -> VortexResult<Scalar> {
// By the preconditions we know that the list scalar is not null.
let list = array.fixed_size_list_elements_at(index)?;
// Canonicalize the element slice once so the per-element `execute_scalar` calls below do
// not re-decode the underlying encoding for every element.
let list = array
.fixed_size_list_elements_at(index)?
.execute::<Canonical>(ctx)?
.into_array();
let children_elements: Vec<Scalar> = (0..list.len())
.map(|i| list.execute_scalar(i, ctx))
.collect::<VortexResult<_>>()?;
Expand Down
9 changes: 8 additions & 1 deletion vortex-array/src/arrays/list/vtable/operations.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ use std::sync::Arc;

use vortex_error::VortexResult;

use crate::Canonical;
use crate::ExecutionCtx;
use crate::IntoArray;
use crate::array::ArrayView;
use crate::array::OperationsVTable;
use crate::arrays::List;
Expand All @@ -19,7 +21,12 @@ impl OperationsVTable<List> for List {
ctx: &mut ExecutionCtx,
) -> VortexResult<Scalar> {
// By the preconditions we know that the list scalar is not null.
let elems = array.list_elements_at(index)?;
// Canonicalize the element slice once so the per-element `execute_scalar` calls below do
// not re-decode the underlying encoding for every element.
let elems = array
.list_elements_at(index)?
.execute::<Canonical>(ctx)?
.into_array();
let scalars: Vec<Scalar> = (0..elems.len())
.map(|i| elems.execute_scalar(i, ctx))
.collect::<VortexResult<_>>()?;
Expand Down
16 changes: 14 additions & 2 deletions vortex-array/src/arrays/listview/rebuild.rs
Original file line number Diff line number Diff line change
Expand Up @@ -209,9 +209,16 @@ impl ListViewArray {
let mut new_sizes = BufferMut::<S>::with_capacity(len);
let mut take_indices = BufferMut::<u64>::with_capacity(self.elements().len());

// Resolve validity to a mask once instead of probing it per row: `Validity::is_valid`
// executes a scalar (and spins up an execution context) on every call for array-backed
// validity, which is O(len) work repeated `len` times.
let validity = self
.validity()?
.execute_mask(len, &mut LEGACY_SESSION.create_execution_ctx())?;

let mut n_elements = NewOffset::zero();
for index in 0..len {
if !self.validity()?.is_valid(index)? {
if !validity.value(index) {
new_offsets.push(n_elements);
new_sizes.push(S::zero());
continue;
Expand Down Expand Up @@ -292,9 +299,14 @@ impl ListViewArray {
let mut new_elements_builder =
builder_with_capacity(element_dtype.as_ref(), self.elements().len());

// Resolve validity to a mask once instead of probing it per row (see `rebuild_with_take`).
let validity = self
.validity()?
.execute_mask(len, &mut LEGACY_SESSION.create_execution_ctx())?;

let mut n_elements = NewOffset::zero();
for index in 0..len {
if !self.validity()?.is_valid(index)? {
if !validity.value(index) {
// For NULL lists, place them after the previous item's data to maintain the
// no-overlap invariant for zero-copy to `ListArray` arrays.
new_offsets.push(n_elements);
Expand Down
9 changes: 8 additions & 1 deletion vortex-array/src/arrays/listview/vtable/operations.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ use std::sync::Arc;

use vortex_error::VortexResult;

use crate::Canonical;
use crate::ExecutionCtx;
use crate::IntoArray;
use crate::array::ArrayView;
use crate::array::OperationsVTable;
use crate::arrays::ListView;
Expand All @@ -19,7 +21,12 @@ impl OperationsVTable<ListView> for ListView {
ctx: &mut ExecutionCtx,
) -> VortexResult<Scalar> {
// By the preconditions we know that the list scalar is not null.
let list = array.list_elements_at(index)?;
// Canonicalize the element slice once so the per-element `execute_scalar` calls below do
// not re-decode the underlying encoding for every element.
let list = array
.list_elements_at(index)?
.execute::<Canonical>(ctx)?
.into_array();
let children: Vec<Scalar> = (0..list.len())
.map(|i| list.execute_scalar(i, ctx))
.collect::<VortexResult<_>>()?;
Expand Down
49 changes: 32 additions & 17 deletions vortex-array/src/arrays/varbin/compute/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ use crate::arrays::varbin::builder::VarBinBuilder;
use crate::dtype::DType;
use crate::dtype::IntegerPType;
use crate::match_each_integer_ptype;
use crate::validity::Validity;

impl FilterKernel for VarBin {
fn filter(
Expand Down Expand Up @@ -170,7 +169,10 @@ fn filter_select_var_bin_by_index(
offsets.as_slice::<O>(),
values.bytes().as_slice(),
mask_indices,
values.validity()?,
values
.varbin_validity()
.execute_mask(values.as_ref().len(), ctx)
.vortex_expect("Failed to compute validity mask"),
selection_count,
)
})
Expand All @@ -181,24 +183,37 @@ fn filter_select_var_bin_by_index_primitive_offset<O: IntegerPType>(
offsets: &[O],
data: &[u8],
mask_indices: &[usize],
// TODO(ngates): pass LogicalValidity instead
validity: Validity,
logical_validity: Mask,
selection_count: usize,
) -> VortexResult<VarBinArray> {
let value_at = |idx: usize| -> VortexResult<&[u8]> {
let start = offsets[idx]
.to_usize()
.ok_or_else(|| vortex_err!("Failed to convert offset to usize: {}", offsets[idx]))?;
let end = offsets[idx + 1].to_usize().ok_or_else(|| {
vortex_err!("Failed to convert offset to usize: {}", offsets[idx + 1])
})?;
Ok(&data[start..end])
};

let mut builder = VarBinBuilder::<O>::with_capacity(selection_count);
for idx in mask_indices.iter().copied() {
if validity.is_valid(idx)? {
let (start, end) = (
offsets[idx].to_usize().ok_or_else(|| {
vortex_err!("Failed to convert offset to usize: {}", offsets[idx])
})?,
offsets[idx + 1].to_usize().ok_or_else(|| {
vortex_err!("Failed to convert offset to usize: {}", offsets[idx + 1])
})?,
);
builder.append_value(&data[start..end])
} else {
builder.append_null()
match logical_validity.bit_buffer() {
AllOr::All => {
for idx in mask_indices.iter().copied() {
builder.append_value(value_at(idx)?)
}
}
AllOr::None => {
builder.append_n_nulls(selection_count);
}
AllOr::Some(validity) => {
for idx in mask_indices.iter().copied() {
if validity.value(idx) {
builder.append_value(value_at(idx)?)
} else {
builder.append_null()
}
}
}
}
Ok(builder.finish(dtype))
Expand Down
8 changes: 8 additions & 0 deletions vortex-array/src/arrays/variant/vtable/kernel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use vortex_error::VortexResult;

use super::merge_typed_scalar_as_variant;
use crate::ArrayRef;
use crate::Canonical;
use crate::ExecutionCtx;
use crate::IntoArray;
use crate::array::Array;
Expand Down Expand Up @@ -180,6 +181,13 @@ fn merge_typed_as_variant(
let dtype = DType::Variant(Nullability::Nullable);
// TODO(variant): replace this with a Variant builder once one exists.
// Chunked<Variant> canonicalizes to VariantArray, so this row-wise fallback is safe.
// Canonicalize the inputs once up front so the per-row `execute_scalar` calls below do not
// re-decode the underlying encodings on every row.
let typed = typed.execute::<Canonical>(ctx)?.into_array();
let fallback = match fallback {
Some(fallback) => Some(fallback.execute::<Canonical>(ctx)?.into_array()),
None => None,
};
let mut chunks = Vec::with_capacity(typed.len());

for idx in 0..typed.len() {
Expand Down
24 changes: 14 additions & 10 deletions vortex-array/src/display/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -579,19 +579,23 @@ impl ArrayRef {
}
#[cfg(feature = "table-display")]
DisplayOptions::TableDisplay => {
use vortex_mask::Mask;

use crate::arrays::struct_::StructArrayExt;
#[expect(deprecated)]
use crate::canonical::ToCanonical as _;
use crate::dtype::DType;

let mut builder = tabled::builder::Builder::default();
// Reuse a single execution context across all per-row accesses below.
let mut ctx = LEGACY_SESSION.create_execution_ctx();

// Special logic for struct arrays.
let DType::Struct(sf, _) = self.dtype() else {
// For non-struct arrays, simply display a single column table without header.
for row_idx in 0..self.len() {
let value = self
.execute_scalar(row_idx, &mut LEGACY_SESSION.create_execution_ctx())
.execute_scalar(row_idx, &mut ctx)
.map_or_else(|e| format!("<error: {e}>"), |s| s.to_string());
builder.push_record([value]);
}
Expand All @@ -606,18 +610,21 @@ impl ArrayRef {
let struct_ = self.to_struct();
builder.push_record(sf.names().iter().map(|name| name.to_string()));

// Resolve validity to a mask once instead of probing it per row.
let validity = self
.validity()
.and_then(|v| v.execute_mask(self.len(), &mut ctx))
.unwrap_or_else(|_| Mask::new_false(self.len()));

for row_idx in 0..self.len() {
if !self
.is_valid(row_idx, &mut LEGACY_SESSION.create_execution_ctx())
.unwrap_or(false)
{
if !validity.value(row_idx) {
let null_row = vec!["null".to_string(); sf.names().len()];
builder.push_record(null_row);
} else {
let mut row = Vec::new();
for field_array in StructArrayExt::iter_unmasked_fields(&struct_) {
let value = field_array
.execute_scalar(row_idx, &mut LEGACY_SESSION.create_execution_ctx())
.execute_scalar(row_idx, &mut ctx)
.map_or_else(|e| format!("<error: {e}>"), |s| s.to_string());
row.push(value);
}
Expand All @@ -634,10 +641,7 @@ impl ArrayRef {
}

for row_idx in 0..self.len() {
if !self
.is_valid(row_idx, &mut LEGACY_SESSION.create_execution_ctx())
.unwrap_or(false)
{
if !validity.value(row_idx) {
table.modify(
(1 + row_idx, 0),
tabled::settings::Span::column(sf.names().len() as isize),
Expand Down
10 changes: 8 additions & 2 deletions vortex-tensor/src/scalar_fns/l2_denorm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -441,14 +441,18 @@ pub fn normalize_as_l2_denorm(
let normalized_dtype = input.dtype().as_nonnullable();
let flat = extract_flat_elements(input.storage_array(), tensor_flat_size, ctx)?;

// Resolve validity to a mask once rather than probing it per row (each `Validity::is_valid`
// executes a scalar for array-backed validity).
let norms_valid = norms_validity.execute_mask(row_count, ctx)?;

// Normalize all of the vectors.
let normalized = match_each_float_ptype!(flat.ptype(), |T| {
let norm_values = primitive_norms.as_slice::<T>();

let total_elements = row_count * tensor_flat_size;
let mut elements = BufferMut::<T>::with_capacity(total_elements);
for i in 0..row_count {
let is_valid = norms_validity.is_valid(i)?;
let is_valid = norms_valid.value(i);
let norm = norm_values[i];

// SAFETY: We allocated `row_count * tensor_flat_size` capacity and push exactly
Expand Down Expand Up @@ -637,12 +641,14 @@ pub fn validate_l2_normalized_rows_against_norms(
Some(norms) => normalized_validity.and(norms.validity()?)?,
None => normalized_validity,
};
// Resolve validity to a mask once rather than probing it per row.
let combined_valid = combined_validity.execute_mask(row_count, ctx)?;

match_each_float_ptype!(element_ptype, |T| {
let stored_norms = norms.as_ref().map(|norms| norms.as_slice::<T>());

for i in 0..row_count {
if !combined_validity.is_valid(i)? {
if !combined_valid.value(i) {
continue;
}

Expand Down
16 changes: 15 additions & 1 deletion vortex-tui/src/browse/ui/layouts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ use ratatui::widgets::Table;
use ratatui::widgets::Widget;
use ratatui::widgets::Wrap;
use vortex::array::ArrayRef;
use vortex::array::Canonical;
use vortex::array::IntoArray;
use vortex::array::VortexSessionExecute;
use vortex::array::arrays::StructArray;
use vortex::array::arrays::struct_::StructArrayExt;
Expand Down Expand Up @@ -161,7 +163,19 @@ fn render_array(app: &AppState, area: Rect, buf: &mut Buffer, is_stats_table: bo

assert_eq!(app.cursor.dtype(), array.dtype());

let field_arrays: Vec<ArrayRef> = struct_array.unmasked_fields().to_vec();
// Canonicalize each field once so the per-row `execute_scalar` calls below do not
// re-decode the field encodings for every row.
let field_arrays: Vec<ArrayRef> = struct_array
.unmasked_fields()
.iter()
.map(|field| {
field
.clone()
.execute::<Canonical>(&mut ctx)
.vortex_expect("failed to canonicalize field array")
.into_array()
})
.collect();

// TODO: trim the number of displayed rows and allow paging through column stats.
let mut rows = Vec::with_capacity(array.len());
Expand Down
Loading