Skip to content
Draft
3 changes: 3 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 5 additions & 7 deletions encodings/datetime-parts/src/canonical.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,12 +143,11 @@ mod test {
&mut ctx,
)?;

assert!(
date_times
.as_array()
.validity()?
.mask_eq(&validity, &mut ctx)?
);
assert!(date_times.as_array().validity()?.mask_eq(
&validity,
milliseconds.len(),
&mut ctx
)?);

let dtype = date_times.dtype().clone();
let parts = DateTimePartsParts {
Expand All @@ -163,7 +162,6 @@ mod test {
.execute::<PrimitiveArray>(&mut ctx)?;

assert_arrays_eq!(primitive_values, milliseconds);
assert!(primitive_values.validity()?.mask_eq(&validity, &mut ctx)?);
Ok(())
}
}
2 changes: 1 addition & 1 deletion encodings/datetime-parts/src/compress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ mod tests {
days_prim
.validity()
.vortex_expect("days validity should be derivable")
.mask_eq(&validity, &mut ctx)
.mask_eq(&validity, days_prim.len(), &mut ctx)
.unwrap()
);
let seconds_prim = seconds.execute::<PrimitiveArray>(&mut ctx).unwrap();
Expand Down
1 change: 1 addition & 0 deletions encodings/pco/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ fn test_validity_and_multiple_chunks_and_pages() {
.unwrap()
.mask_eq(
&Validity::Array(BoolArray::from_iter(vec![true, false, true]).into_array()),
primitive.len(),
&mut ctx,
)
.unwrap()
Expand Down
3 changes: 2 additions & 1 deletion encodings/zstd/src/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ fn test_zstd_with_validity_and_multi_frame() {
decompressed
.validity()
.unwrap()
.mask_eq(&array.validity().unwrap(), &mut ctx)
.mask_eq(&array.validity().unwrap(), decompressed.len(), &mut ctx)
.unwrap()
);

Expand All @@ -106,6 +106,7 @@ fn test_zstd_with_validity_and_multi_frame() {
.unwrap()
.mask_eq(
&Validity::Array(BoolArray::from_iter(vec![false, true, false]).into_array()),
primitive.len(),
&mut ctx
)
.unwrap()
Expand Down
3 changes: 1 addition & 2 deletions vortex-array/src/arrays/dict/vtable/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ use crate::executor::ExecutionResult;
use crate::require_child;
use crate::scalar::Scalar;
use crate::serde::ArrayChildren;
use crate::validity::Validity;

mod kernel;
mod operations;
Expand Down Expand Up @@ -179,7 +178,7 @@ impl VTable for Dict {

let array = require_child!(array, array.codes(), DictSlots::CODES => Primitive);

if matches!(array.codes().validity()?, Validity::AllInvalid) {
if array.codes().validity()?.definitely_all_invalid() {
return Ok(ExecutionResult::done(ConstantArray::new(
Scalar::null(array.dtype().as_nullable()),
array.codes().len(),
Expand Down
2 changes: 1 addition & 1 deletion vortex-array/src/arrays/masked/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ fn test_masked_child_preserves_length(#[case] validity: Validity) {
array
.validity()
.vortex_expect("masked validity should be derivable")
.mask_eq(&validity, &mut ctx)
.mask_eq(&validity, array.len(), &mut ctx)
.unwrap(),
);
}
2 changes: 1 addition & 1 deletion vortex-array/src/arrays/masked/vtable/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ impl VTable for Masked {
let validity = array.masked_validity();

// Fast path: all masked means result is all nulls.
if matches!(validity, Validity::AllInvalid) {
if validity.definitely_all_invalid() {
return Ok(ExecutionResult::done(
ConstantArray::new(Scalar::null(array.dtype().as_nullable()), array.len())
.into_array(),
Expand Down
3 changes: 1 addition & 2 deletions vortex-array/src/arrays/primitive/array/top_value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ use crate::arrays::primitive::NativeValue;
use crate::dtype::NativePType;
use crate::match_each_native_ptype;
use crate::scalar::PValue;
use crate::validity::Validity;

impl PrimitiveArray {
/// Compute most common present value of this array
Expand All @@ -26,7 +25,7 @@ impl PrimitiveArray {
return Ok(None);
}

if matches!(self.validity()?, Validity::AllInvalid) {
if self.validity()?.definitely_all_invalid() {
return Ok(None);
}

Expand Down
62 changes: 57 additions & 5 deletions vortex-array/src/arrays/scalar_fn/vtable/validity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,22 @@
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use vortex_error::VortexResult;
use vortex_error::vortex_bail;

use crate::ArrayRef;
use crate::IntoArray;
use crate::LEGACY_SESSION;
use crate::VortexSessionExecute;
use crate::array::Array;
use crate::array::ArrayView;
use crate::array::ValidityVTable;
use crate::array::child_to_validity;
use crate::arrays::ConstantArray;
use crate::arrays::scalar_fn::ScalarFnArrayExt;
use crate::arrays::scalar_fn::vtable::ArrayExpr;
use crate::arrays::scalar_fn::vtable::FakeEq;
use crate::arrays::scalar_fn::vtable::ScalarFn;
use crate::dtype::Nullability;
use crate::expr::Expression;
use crate::expr::lit;
use crate::scalar_fn::TypedScalarFnInstance;
Expand All @@ -21,6 +26,36 @@ use crate::scalar_fn::fns::literal::Literal;
use crate::scalar_fn::fns::root::Root;
use crate::validity::Validity;

/// Convert an expression tree into a lazy array DAG without executing it.
///
/// This assumes all leaf expressions are either ArrayExpr (wrapping actual arrays) or Literals.
fn expr_to_lazy_array(expr: &Expression, row_count: usize) -> VortexResult<ArrayRef> {
// Handle Root expression - this should not happen in validity expressions
if expr.is::<Root>() {
vortex_bail!("Root expression cannot be converted in validity context");
}

// Handle Literal expression - create a constant array
if expr.is::<Literal>() {
let scalar = expr.as_::<Literal>();
return Ok(ConstantArray::new(scalar.clone(), row_count).into_array());
}

// Handle ArrayExpr leaves - unwrap the array they hold
if expr.is::<ArrayExpr>() {
return Ok(expr.as_::<ArrayExpr>().0.clone());
}

// Recursively convert child expressions into lazy input arrays
let children: Vec<ArrayRef> = expr
.children()
.iter()
.map(|child| expr_to_lazy_array(child, row_count))
.collect::<VortexResult<_>>()?;

Ok(Array::<ScalarFn>::try_new(expr.scalar_fn().clone(), children, row_count)?.into_array())
}

/// Execute an expression tree recursively.
///
/// This assumes all leaf expressions are either ArrayExpr (wrapping actual arrays) or Literals.
Expand All @@ -29,13 +64,13 @@ fn execute_expr(expr: &Expression, row_count: usize) -> VortexResult<ArrayRef> {

// Handle Root expression - this should not happen in validity expressions
if expr.is::<Root>() {
vortex_error::vortex_bail!("Root expression cannot be executed in validity context");
vortex_bail!("Root expression cannot be executed in validity context");
}

// Handle Literal expression - create a constant array
if expr.is::<Literal>() {
let scalar = expr.as_::<Literal>();
return Ok(crate::arrays::ConstantArray::new(scalar.clone(), row_count).into_array());
return Ok(ConstantArray::new(scalar.clone(), row_count).into_array());
}

// Recursively execute child expressions to get input arrays
Expand Down Expand Up @@ -66,9 +101,26 @@ impl ValidityVTable<ScalarFn> for ScalarFn {
.collect::<VortexResult<_>>()?;

let expr = Expression::try_new(array.scalar_fn().clone(), inputs)?;
let validity_expr = array.scalar_fn().validity(&expr)?;

// Execute the validity expression. All leaves are ArrayExpr nodes.
Ok(Validity::Array(execute_expr(&validity_expr, array.len())?))
match array.scalar_fn().validity_opt(&expr)? {
Some(validity_expr) => {
// The function defines its validity as an expression over its inputs, so we can
// represent it as a lazy array DAG without executing anything. If the expression
// is already a constant it is folded back into AllValid/AllInvalid.
let validity_array = expr_to_lazy_array(&validity_expr, array.len())?;
Ok(child_to_validity(
Some(&validity_array),
Nullability::Nullable,
))
}
None => {
// The function's validity can only be determined by executing the function
// itself (e.g. Kleene logic and/or). Representing that lazily would create a
// self-referential array (is_not_null over this very expression), so execute it
// eagerly instead.
let validity_expr = array.scalar_fn().validity(&expr)?;
Ok(Validity::Array(execute_expr(&validity_expr, array.len())?))
}
}
}
}
2 changes: 1 addition & 1 deletion vortex-array/src/arrays/varbin/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ impl VarBinData {
}
_ => None,
};
let all_invalid = matches!(validity, Validity::AllInvalid);
let all_invalid = validity.definitely_all_invalid();

match_each_integer_ptype!(primitive_offsets.dtype().as_ptype(), |O| {
let offsets_slice = primitive_offsets.as_slice::<O>();
Expand Down
10 changes: 5 additions & 5 deletions vortex-array/src/builders/bool.rs
Original file line number Diff line number Diff line change
Expand Up @@ -209,11 +209,11 @@ mod tests {
#[expect(deprecated)]
let into_canon = chunk.to_bool();

assert!(
canon_into
.validity()?
.mask_eq(&into_canon.validity()?, &mut ctx)?
);
assert!(canon_into.validity()?.mask_eq(
&into_canon.validity()?,
canon_into.len(),
&mut ctx
)?);
assert_eq!(canon_into.to_bit_buffer(), into_canon.to_bit_buffer());
Ok(())
}
Expand Down
1 change: 1 addition & 0 deletions vortex-array/src/builders/list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,7 @@ mod tests {
&expected
.validity()
.vortex_expect("list validity should be derivable"),
actual.len(),
&mut ctx,
)
.unwrap(),
Expand Down
12 changes: 11 additions & 1 deletion vortex-array/src/scalar_fn/erased.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,12 +134,22 @@ impl ScalarFnRef {

/// Transforms the expression into one representing the validity of this expression.
pub fn validity(&self, expr: &Expression) -> VortexResult<Expression> {
Ok(self.0.validity(expr)?.unwrap_or_else(|| {
Ok(self.validity_opt(expr)?.unwrap_or_else(|| {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do you want to remove the TODO?

// TODO(ngates): make validity a mandatory method on VTable to avoid this fallback.
IsNotNull.new_expr(EmptyOptions, [expr.clone()])
}))
}

/// Transforms the expression into one representing the validity of this expression,
/// returning `None` if the function does not define a validity expression.
///
/// When `None` is returned, the validity can only be determined by executing the
/// expression itself (e.g. Kleene logic `and`/`or`), and [`Self::validity`] falls back to
/// `is_not_null` over the expression.
pub fn validity_opt(&self, expr: &Expression) -> VortexResult<Option<Expression>> {
self.0.validity(expr)
}

/// Execute the expression given the input arguments.
pub fn execute(
&self,
Expand Down
2 changes: 1 addition & 1 deletion vortex-array/src/scalar_fn/fns/fill_null/kernel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ pub(super) fn precondition(
}

// If all values are null, replace the entire array with the fill value.
if matches!(array.validity()?, Validity::AllInvalid) {
if array.validity()?.definitely_all_invalid() {
return Ok(Some(
ConstantArray::new(fill_value.clone(), array.len()).into_array(),
));
Expand Down
2 changes: 1 addition & 1 deletion vortex-array/src/scalar_fn/fns/list_contains/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,7 @@ fn list_is_not_empty(
ctx: &mut ExecutionCtx,
) -> VortexResult<ArrayRef> {
// Short-circuit for all invalid.
if matches!(list_array.validity()?, Validity::AllInvalid) {
if list_array.validity()?.definitely_all_invalid() {
return Ok(ConstantArray::new(
Scalar::null(DType::Bool(Nullability::Nullable)),
list_array.len(),
Expand Down
43 changes: 34 additions & 9 deletions vortex-array/src/stats/rewrite.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,18 +23,34 @@ pub(crate) use builtins::register_builtins;
/// Shared reference to a stats rewrite rule.
pub(crate) type StatsRewriteRuleRef = Arc<dyn StatsRewriteRule>;

/// A plugin-provided rule that rewrites predicates into stats-backed proof expressions.
/// A plugin-provided rule for predicates whose root scalar function matches this rule.
///
/// A falsifier evaluates to `true` only when the original predicate is definitely false for the
/// current stats scope. A satisfier evaluates to `true` only when the original predicate is
/// definitely true for the current stats scope. Returning `None` means the rule cannot prove
/// anything for the expression.
#[allow(dead_code)]
/// Rules do not produce expressions equivalent to `expr`. They produce optional sufficient
/// conditions over stats for the current scope:
///
/// - a falsifier evaluating to `true` proves that `expr` is false for every row in the scope;
/// - a satisfier evaluating to `true` proves that `expr` is true for every row in the scope.
///
/// Returning `None` means this rule cannot prove anything for the expression. A returned proof
/// expression that evaluates to `false` or `null` is also inconclusive.
///
/// Multiple rules may be registered for the same scalar function. Their proofs are combined with
/// `OR`, so every proof returned by an individual rule must be sound on its own.
///
/// `expr` is the full predicate expression whose root scalar function id is
/// [`Self::scalar_fn_id`]. Use [`StatsRewriteCtx`] to resolve dtypes and recursively rewrite child
/// predicates.
pub(crate) trait StatsRewriteRule: Debug + Send + Sync + 'static {
/// The scalar function ID this rule applies to.
/// Returns the scalar function id handled by this rule.
fn scalar_fn_id(&self) -> ScalarFnId;

/// Rewrite an expression into a stats-backed falsifier.
/// Returns a stats-backed proof that `expr` is false for the current scope.
///
/// If the returned expression evaluates to `true` against the scope's stats, then `expr` is
/// guaranteed to be false for every row in that scope. A returned proof expression that
/// evaluates to `false` or `null` is inconclusive.
///
/// Returns `Ok(None)` when this rule cannot construct a sound falsity proof for `expr`.
fn falsify(
&self,
expr: &Expression,
Expand All @@ -45,7 +61,16 @@ pub(crate) trait StatsRewriteRule: Debug + Send + Sync + 'static {
Ok(None)
}

/// Rewrite an expression into a stats-backed satisfier.
/// Returns a stats-backed proof that `expr` is true for the current scope.
///
/// If the returned expression evaluates to `true` against the scope's stats, then `expr` is
/// guaranteed to be true for every row in that scope. A returned proof expression that
/// evaluates to `false` or `null` is inconclusive.
///
/// This is not the complement of [`Self::falsify`]; both methods are one-way proofs and may be
/// implemented independently.
///
/// Returns `Ok(None)` when this rule cannot construct a sound truth proof for `expr`.
fn satisfy(
&self,
expr: &Expression,
Expand Down
Loading
Loading