From 83ab0e8f52d78628f23652ce7ae5b9e4741d6e5e Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Thu, 2 Apr 2026 09:43:04 +0100 Subject: [PATCH 1/6] variant get Signed-off-by: Adam Gutglick --- encodings/parquet-variant/src/kernel.rs | 2 + encodings/parquet-variant/src/lib.rs | 3 + encodings/parquet-variant/src/variant_get.rs | 76 ++++++++ .../parquet-variant/src/variant_get_tests.rs | 163 ++++++++++++++++++ vortex-array/public-api.lock | 86 +++++++++ vortex-array/src/expr/exprs.rs | 15 ++ vortex-array/src/scalar_fn/fns/mod.rs | 1 + vortex-array/src/scalar_fn/fns/variant_get.rs | 145 ++++++++++++++++ vortex-array/src/scalar_fn/session.rs | 2 + vortex-proto/proto/expr.proto | 5 + vortex-proto/public-api.lock | 34 ++++ vortex-proto/src/generated/vortex.expr.rs | 6 + 12 files changed, 538 insertions(+) create mode 100644 encodings/parquet-variant/src/variant_get.rs create mode 100644 encodings/parquet-variant/src/variant_get_tests.rs create mode 100644 vortex-array/src/scalar_fn/fns/variant_get.rs diff --git a/encodings/parquet-variant/src/kernel.rs b/encodings/parquet-variant/src/kernel.rs index d47d4e31267..cf395c0a345 100644 --- a/encodings/parquet-variant/src/kernel.rs +++ b/encodings/parquet-variant/src/kernel.rs @@ -19,11 +19,13 @@ use vortex_mask::Mask; use crate::ParquetVariant; use crate::ParquetVariantArrayExt; +use crate::variant_get::VariantGetExecuteParent; pub(crate) static PARENT_KERNELS: ParentKernelSet = ParentKernelSet::new(&[ ParentKernelSet::lift(&FilterExecuteAdaptor(ParquetVariant)), ParentKernelSet::lift(&SliceExecuteAdaptor(ParquetVariant)), ParentKernelSet::lift(&TakeExecuteAdaptor(ParquetVariant)), + ParentKernelSet::lift(&VariantGetExecuteParent), ]); impl SliceKernel for ParquetVariant { diff --git a/encodings/parquet-variant/src/lib.rs b/encodings/parquet-variant/src/lib.rs index 9ecc9216daa..ce56151ffef 100644 --- a/encodings/parquet-variant/src/lib.rs +++ b/encodings/parquet-variant/src/lib.rs @@ -28,6 +28,9 @@ mod array; mod kernel; mod operations; mod validity; +mod variant_get; +#[cfg(test)] +mod variant_get_tests; mod vtable; pub use array::ParquetVariantArrayExt; diff --git a/encodings/parquet-variant/src/variant_get.rs b/encodings/parquet-variant/src/variant_get.rs new file mode 100644 index 00000000000..f03af191adb --- /dev/null +++ b/encodings/parquet-variant/src/variant_get.rs @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Execute-parent kernel for `variant_get` on `ParquetVariantArray`. +//! +//! Delegates to `parquet_variant_compute::variant_get` after converting to Arrow. + +use std::sync::Arc; + +use parquet_variant::VariantPathElement; +use parquet_variant_compute::GetOptions; +use parquet_variant_compute::VariantArray as ArrowVariantArray; +use vortex_array::ArrayRef; +use vortex_array::ArrayView; +use vortex_array::ExecutionCtx; +use vortex_array::arrays::scalar_fn::ExactScalarFn; +use vortex_array::arrays::scalar_fn::ScalarFnArrayView; +use vortex_array::dtype::FieldName; +use vortex_array::kernel::ExecuteParentKernel; +use vortex_array::scalar_fn::fns::variant_get::VariantGet; +use vortex_error::VortexResult; +use vortex_error::vortex_err; + +use crate::ParquetVariant; +use crate::ParquetVariantArrayExt; +use crate::ParquetVariantData; + +#[derive(Debug)] +pub(crate) struct VariantGetExecuteParent; + +impl ExecuteParentKernel for VariantGetExecuteParent { + type Parent = ExactScalarFn; + + fn execute_parent( + &self, + array: ArrayView<'_, ParquetVariant>, + parent: ScalarFnArrayView<'_, VariantGet>, + _child_idx: usize, + ctx: &mut ExecutionCtx, + ) -> VortexResult> { + let field_name: &FieldName = parent.options; + variant_get_impl(array, field_name, ctx).map(Some) + } +} + +fn variant_get_impl( + array: ArrayView<'_, ParquetVariant>, + field_name: &FieldName, + ctx: &mut ExecutionCtx, +) -> VortexResult { + // Convert to Arrow VariantArray + let arrow_variant = array.to_arrow(ctx)?; + + // Build path for a single field access + let path_element = VariantPathElement::Field { + name: field_name.as_ref().into(), + }; + let options = GetOptions::new_with_path(vec![path_element].into()); + + // Delegate to the parquet-variant-compute kernel. + // With as_type = None, the result is itself a VariantArray. + let inner: Arc = Arc::new(arrow_variant.into_inner()); + let arrow_result = parquet_variant_compute::variant_get(&inner, options) + .map_err(|e| vortex_err!("variant_get failed: {e}"))?; + + // Convert back to Vortex + let result_variant = ArrowVariantArray::try_new( + arrow_result + .as_any() + .downcast_ref::() + .ok_or_else(|| vortex_err!("variant_get did not return a StructArray"))?, + ) + .map_err(|e| vortex_err!("failed to create VariantArray from result: {e}"))?; + + ParquetVariantData::from_arrow_variant(&result_variant) +} diff --git a/encodings/parquet-variant/src/variant_get_tests.rs b/encodings/parquet-variant/src/variant_get_tests.rs new file mode 100644 index 00000000000..f4ade4a1003 --- /dev/null +++ b/encodings/parquet-variant/src/variant_get_tests.rs @@ -0,0 +1,163 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +#[cfg(test)] +mod tests { + use arrow_array::StructArray; + use arrow_buffer::NullBuffer; + use parquet_variant::Variant as PqVariant; + use parquet_variant::VariantBuilderExt; + use parquet_variant_compute::VariantArrayBuilder; + use vortex_array::ArrayRef; + use vortex_array::LEGACY_SESSION; + use vortex_array::VortexSessionExecute; + use vortex_array::expr::root; + use vortex_array::expr::variant_get; + use vortex_error::VortexResult; + + use crate::ParquetVariantData; + + /// Apply variant_get and execute through the full pipeline (including execute_parent). + fn apply_variant_get(arr: &ArrayRef, field: &str) -> VortexResult { + let expr = variant_get(field, root()); + let lazy = arr.clone().apply(&expr)?; + let mut ctx = LEGACY_SESSION.create_execution_ctx(); + lazy.execute::(&mut ctx) + } + + /// Build a VariantArray of objects: [{"a": 1, "b": "x"}, {"a": 2, "c": true}, {"b": "y"}] + fn make_object_array() -> VortexResult { + let mut builder = VariantArrayBuilder::new(3); + + builder + .new_object() + .with_field("a", 1i32) + .with_field("b", "x") + .finish(); + + builder + .new_object() + .with_field("a", 2i32) + .with_field("c", true) + .finish(); + + builder.new_object().with_field("b", "y").finish(); + + ParquetVariantData::from_arrow_variant(&builder.build()) + } + + /// Build a nullable VariantArray: [{"a": 10}, NULL, {"a": 30}] + fn make_nullable_object_array() -> VortexResult { + let mut builder = VariantArrayBuilder::new(3); + + builder.new_object().with_field("a", 10i32).finish(); + + builder.new_object().with_field("a", 20i32).finish(); + + builder.new_object().with_field("a", 30i32).finish(); + + let inner = builder.build().into_inner(); + let null_struct = StructArray::try_new( + inner.fields().clone(), + inner.columns().to_vec(), + Some(NullBuffer::from(vec![true, false, true])), + ) + .unwrap(); + let arrow_variant = parquet_variant_compute::VariantArray::try_new(&null_struct).unwrap(); + ParquetVariantData::from_arrow_variant(&arrow_variant) + } + + #[test] + fn test_variant_get_basic() -> VortexResult<()> { + let arr = make_object_array()?; + let result = apply_variant_get(&arr, "a")?; + + assert_eq!(result.len(), 3); + + // Row 0: {"a": 1, ...} → variant(1) + let s0 = result.scalar_at(0)?; + assert!(!s0.is_null()); + let inner0 = s0.as_variant().value().unwrap(); + assert_eq!(*inner0, 1i32.into()); + + // Row 1: {"a": 2, ...} → variant(2) + let s1 = result.scalar_at(1)?; + assert!(!s1.is_null()); + let inner1 = s1.as_variant().value().unwrap(); + assert_eq!(*inner1, 2i32.into()); + + // Row 2: {"b": "y"} → null (field "a" missing) + let s2 = result.scalar_at(2)?; + assert!(s2.is_null()); + + Ok(()) + } + + #[test] + fn test_variant_get_missing_field() -> VortexResult<()> { + let arr = make_object_array()?; + let result = apply_variant_get(&arr, "nonexistent")?; + + assert_eq!(result.len(), 3); + for i in 0..3 { + assert!(result.scalar_at(i)?.is_null(), "row {i} should be null"); + } + + Ok(()) + } + + #[test] + fn test_variant_get_null_input() -> VortexResult<()> { + let arr = make_nullable_object_array()?; + let result = apply_variant_get(&arr, "a")?; + + assert_eq!(result.len(), 3); + + // Row 0: {"a": 10} → variant(10) + assert!(!result.scalar_at(0)?.is_null()); + + // Row 1: NULL → null + assert!(result.scalar_at(1)?.is_null()); + + // Row 2: {"a": 30} → variant(30) + assert!(!result.scalar_at(2)?.is_null()); + + Ok(()) + } + + #[test] + fn test_variant_get_non_object() -> VortexResult<()> { + // Array of primitive variants (not objects) + let mut builder = VariantArrayBuilder::new(2); + builder.append_variant(PqVariant::from(42i32)); + builder.append_variant(PqVariant::from("hello")); + let arr = ParquetVariantData::from_arrow_variant(&builder.build())?; + + let result = apply_variant_get(&arr, "a")?; + + assert_eq!(result.len(), 2); + assert!(result.scalar_at(0)?.is_null()); + assert!(result.scalar_at(1)?.is_null()); + + Ok(()) + } + + #[test] + fn test_variant_get_different_field() -> VortexResult<()> { + let arr = make_object_array()?; + let result = apply_variant_get(&arr, "b")?; + + assert_eq!(result.len(), 3); + + // Row 0: {"a": 1, "b": "x"} → variant("x") + assert!(!result.scalar_at(0)?.is_null()); + + // Row 1: {"a": 2, "c": true} → null (no "b") + assert!(result.scalar_at(1)?.is_null()); + + // Row 2: {"b": "y"} → variant("y") + assert!(!result.scalar_at(2)?.is_null()); + + Ok(()) + } +} diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index 25736c6df84..5535608d920 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -12430,6 +12430,8 @@ pub fn vortex_array::expr::select_exclude(fields: impl core::convert::Into alloc::vec::Vec +pub fn vortex_array::expr::variant_get(field: impl core::convert::Into, child: vortex_array::expr::Expression) -> vortex_array::expr::Expression + pub fn vortex_array::expr::zip_expr(mask: vortex_array::expr::Expression, if_true: vortex_array::expr::Expression, if_false: vortex_array::expr::Expression) -> vortex_array::expr::Expression pub type vortex_array::expr::Annotations<'a, A> = vortex_utils::aliases::hash_map::HashMap<&'a vortex_array::expr::Expression, vortex_utils::aliases::hash_set::HashSet> @@ -17198,6 +17200,52 @@ pub fn vortex_array::scalar_fn::fns::select::Select::stat_falsification(&self, o pub fn vortex_array::scalar_fn::fns::select::Select::validity(&self, options: &Self::Options, expression: &vortex_array::expr::Expression) -> vortex_error::VortexResult> +pub mod vortex_array::scalar_fn::fns::variant_get + +pub struct vortex_array::scalar_fn::fns::variant_get::VariantGet + +impl core::clone::Clone for vortex_array::scalar_fn::fns::variant_get::VariantGet + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::clone(&self) -> vortex_array::scalar_fn::fns::variant_get::VariantGet + +impl vortex_array::scalar_fn::ScalarFnVTable for vortex_array::scalar_fn::fns::variant_get::VariantGet + +pub type vortex_array::scalar_fn::fns::variant_get::VariantGet::Options = vortex_array::dtype::FieldName + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::arity(&self, _field_name: &vortex_array::dtype::FieldName) -> vortex_array::scalar_fn::Arity + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::child_name(&self, _instance: &Self::Options, child_idx: usize) -> vortex_array::scalar_fn::ChildName + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::coerce_args(&self, options: &Self::Options, args: &[vortex_array::dtype::DType]) -> vortex_error::VortexResult> + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::deserialize(&self, metadata: &[u8], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::execute(&self, _field_name: &vortex_array::dtype::FieldName, _args: &dyn vortex_array::scalar_fn::ExecutionArgs, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::fmt_sql(&self, field_name: &vortex_array::dtype::FieldName, expr: &vortex_array::expr::Expression, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::id(&self) -> vortex_array::scalar_fn::ScalarFnId + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::is_fallible(&self, _field_name: &vortex_array::dtype::FieldName) -> bool + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::is_null_sensitive(&self, _field_name: &vortex_array::dtype::FieldName) -> bool + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::reduce(&self, field_name: &vortex_array::dtype::FieldName, node: &dyn vortex_array::scalar_fn::ReduceNode, ctx: &dyn vortex_array::scalar_fn::ReduceCtx) -> vortex_error::VortexResult> + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::return_dtype(&self, _field_name: &vortex_array::dtype::FieldName, arg_dtypes: &[vortex_array::dtype::DType]) -> vortex_error::VortexResult + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::serialize(&self, instance: &Self::Options) -> vortex_error::VortexResult>> + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::simplify(&self, options: &Self::Options, expr: &vortex_array::expr::Expression, ctx: &dyn vortex_array::scalar_fn::SimplifyCtx) -> vortex_error::VortexResult> + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::simplify_untyped(&self, options: &Self::Options, expr: &vortex_array::expr::Expression) -> vortex_error::VortexResult> + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::stat_expression(&self, options: &Self::Options, expr: &vortex_array::expr::Expression, stat: vortex_array::expr::stats::Stat, catalog: &dyn vortex_array::expr::pruning::StatsCatalog) -> core::option::Option + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::stat_falsification(&self, options: &Self::Options, expr: &vortex_array::expr::Expression, catalog: &dyn vortex_array::expr::pruning::StatsCatalog) -> core::option::Option + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::validity(&self, options: &Self::Options, expression: &vortex_array::expr::Expression) -> vortex_error::VortexResult> + pub mod vortex_array::scalar_fn::fns::zip pub struct vortex_array::scalar_fn::fns::zip::Zip @@ -18406,6 +18454,44 @@ pub fn vortex_array::scalar_fn::fns::select::Select::stat_falsification(&self, o pub fn vortex_array::scalar_fn::fns::select::Select::validity(&self, options: &Self::Options, expression: &vortex_array::expr::Expression) -> vortex_error::VortexResult> +impl vortex_array::scalar_fn::ScalarFnVTable for vortex_array::scalar_fn::fns::variant_get::VariantGet + +pub type vortex_array::scalar_fn::fns::variant_get::VariantGet::Options = vortex_array::dtype::FieldName + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::arity(&self, _field_name: &vortex_array::dtype::FieldName) -> vortex_array::scalar_fn::Arity + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::child_name(&self, _instance: &Self::Options, child_idx: usize) -> vortex_array::scalar_fn::ChildName + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::coerce_args(&self, options: &Self::Options, args: &[vortex_array::dtype::DType]) -> vortex_error::VortexResult> + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::deserialize(&self, metadata: &[u8], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::execute(&self, _field_name: &vortex_array::dtype::FieldName, _args: &dyn vortex_array::scalar_fn::ExecutionArgs, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::fmt_sql(&self, field_name: &vortex_array::dtype::FieldName, expr: &vortex_array::expr::Expression, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::id(&self) -> vortex_array::scalar_fn::ScalarFnId + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::is_fallible(&self, _field_name: &vortex_array::dtype::FieldName) -> bool + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::is_null_sensitive(&self, _field_name: &vortex_array::dtype::FieldName) -> bool + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::reduce(&self, field_name: &vortex_array::dtype::FieldName, node: &dyn vortex_array::scalar_fn::ReduceNode, ctx: &dyn vortex_array::scalar_fn::ReduceCtx) -> vortex_error::VortexResult> + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::return_dtype(&self, _field_name: &vortex_array::dtype::FieldName, arg_dtypes: &[vortex_array::dtype::DType]) -> vortex_error::VortexResult + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::serialize(&self, instance: &Self::Options) -> vortex_error::VortexResult>> + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::simplify(&self, options: &Self::Options, expr: &vortex_array::expr::Expression, ctx: &dyn vortex_array::scalar_fn::SimplifyCtx) -> vortex_error::VortexResult> + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::simplify_untyped(&self, options: &Self::Options, expr: &vortex_array::expr::Expression) -> vortex_error::VortexResult> + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::stat_expression(&self, options: &Self::Options, expr: &vortex_array::expr::Expression, stat: vortex_array::expr::stats::Stat, catalog: &dyn vortex_array::expr::pruning::StatsCatalog) -> core::option::Option + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::stat_falsification(&self, options: &Self::Options, expr: &vortex_array::expr::Expression, catalog: &dyn vortex_array::expr::pruning::StatsCatalog) -> core::option::Option + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::validity(&self, options: &Self::Options, expression: &vortex_array::expr::Expression) -> vortex_error::VortexResult> + impl vortex_array::scalar_fn::ScalarFnVTable for vortex_array::scalar_fn::fns::zip::Zip pub type vortex_array::scalar_fn::fns::zip::Zip::Options = vortex_array::scalar_fn::EmptyOptions diff --git a/vortex-array/src/expr/exprs.rs b/vortex-array/src/expr/exprs.rs index fb8d2f0cb77..9aced319454 100644 --- a/vortex-array/src/expr/exprs.rs +++ b/vortex-array/src/expr/exprs.rs @@ -46,6 +46,7 @@ use crate::scalar_fn::fns::pack::PackOptions; use crate::scalar_fn::fns::root::Root; use crate::scalar_fn::fns::select::FieldSelection; use crate::scalar_fn::fns::select::Select; +use crate::scalar_fn::fns::variant_get::VariantGet; use crate::scalar_fn::fns::zip::Zip; // ---- Root ---- @@ -699,3 +700,17 @@ pub fn dynamic( pub fn list_contains(list: Expression, value: Expression) -> Expression { ListContains.new_expr(EmptyOptions, [list, value]) } + +// ---- VariantGet ---- + +/// Creates an expression that extracts a field from a variant object by name. +/// +/// Returns a new variant containing the field's value, or null if the field does not exist. +/// +/// ```rust +/// # use vortex_array::expr::{variant_get, root}; +/// let expr = variant_get("field_name", root()); +/// ``` +pub fn variant_get(field: impl Into, child: Expression) -> Expression { + VariantGet.new_expr(field.into(), vec![child]) +} diff --git a/vortex-array/src/scalar_fn/fns/mod.rs b/vortex-array/src/scalar_fn/fns/mod.rs index 8fa1b66532d..0b0b624e57b 100644 --- a/vortex-array/src/scalar_fn/fns/mod.rs +++ b/vortex-array/src/scalar_fn/fns/mod.rs @@ -20,4 +20,5 @@ pub mod operators; pub mod pack; pub mod root; pub mod select; +pub mod variant_get; pub mod zip; diff --git a/vortex-array/src/scalar_fn/fns/variant_get.rs b/vortex-array/src/scalar_fn/fns/variant_get.rs new file mode 100644 index 00000000000..304a7197cb7 --- /dev/null +++ b/vortex-array/src/scalar_fn/fns/variant_get.rs @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::fmt::Formatter; +use std::sync::Arc; + +use prost::Message; +use vortex_error::VortexResult; +use vortex_error::vortex_bail; +use vortex_proto::expr as pb; +use vortex_session::VortexSession; + +use crate::ArrayRef; +use crate::ExecutionCtx; +use crate::arrays::Variant; +use crate::arrays::variant::VariantArrayExt; +use crate::dtype::DType; +use crate::dtype::FieldName; +use crate::dtype::Nullability; +use crate::expr::Expression; +use crate::scalar_fn::Arity; +use crate::scalar_fn::ChildName; +use crate::scalar_fn::ExecutionArgs; +use crate::scalar_fn::ReduceCtx; +use crate::scalar_fn::ReduceNode; +use crate::scalar_fn::ReduceNodeRef; +use crate::scalar_fn::ScalarFnId; +use crate::scalar_fn::ScalarFnVTable; +use crate::scalar_fn::ScalarFnVTableExt; + +/// Extracts a field from a variant object by name, returning a new variant. +/// +/// This is analogous to [`GetItem`](super::get_item::GetItem) for structs, but operates on +/// semi-structured variant data. The result is always `DType::Variant(Nullable)` since the +/// requested field may not exist in every row. +/// +/// Execution is handled by variant encodings (e.g. `ParquetVariantArray`) via `execute_parent`. +/// The canonical `VariantArray` does not support direct execution; a `reduce` rule unwraps +/// the `VariantArray` wrapper to expose the underlying encoding. +#[derive(Clone)] +pub struct VariantGet; + +impl ScalarFnVTable for VariantGet { + type Options = FieldName; + + fn id(&self) -> ScalarFnId { + ScalarFnId::from("vortex.variant_get") + } + + fn serialize(&self, instance: &Self::Options) -> VortexResult>> { + Ok(Some( + pb::VariantGetOpts { + path: instance.to_string(), + } + .encode_to_vec(), + )) + } + + fn deserialize( + &self, + metadata: &[u8], + _session: &VortexSession, + ) -> VortexResult { + let opts = pb::VariantGetOpts::decode(metadata)?; + Ok(FieldName::from(opts.path)) + } + + fn arity(&self, _field_name: &FieldName) -> Arity { + Arity::Exact(1) + } + + fn child_name(&self, _instance: &Self::Options, child_idx: usize) -> ChildName { + match child_idx { + 0 => ChildName::from("input"), + _ => unreachable!( + "Invalid child index {} for VariantGet expression", + child_idx + ), + } + } + + fn fmt_sql( + &self, + field_name: &FieldName, + expr: &Expression, + f: &mut Formatter<'_>, + ) -> std::fmt::Result { + write!(f, "variant_get(")?; + expr.children()[0].fmt_sql(f)?; + write!(f, ", '{}')", field_name) + } + + fn return_dtype(&self, _field_name: &FieldName, arg_dtypes: &[DType]) -> VortexResult { + if !matches!(arg_dtypes[0], DType::Variant(_)) { + vortex_bail!( + "variant_get requires a Variant input, got {:?}", + arg_dtypes[0] + ); + } + // Always nullable: the field may not exist in every variant value. + Ok(DType::Variant(Nullability::Nullable)) + } + + fn execute( + &self, + _field_name: &FieldName, + _args: &dyn ExecutionArgs, + _ctx: &mut ExecutionCtx, + ) -> VortexResult { + vortex_bail!( + "variant_get cannot be executed directly; \ + it must be pushed down to a variant encoding via execute_parent" + ) + } + + fn reduce( + &self, + field_name: &FieldName, + node: &dyn ReduceNode, + ctx: &dyn ReduceCtx, + ) -> VortexResult> { + // If the child is a canonical VariantArray wrapper, unwrap it to expose the + // underlying encoding (e.g. ParquetVariantArray) so that execute_parent can + // handle the operation. + let child = node.child(0); + if let Some(child_array) = child.as_any().downcast_ref::() + && child_array.is::() + { + let inner = child_array.as_::().child().clone(); + return Ok(Some(ctx.new_node( + VariantGet.bind(field_name.clone()), + &[Arc::new(inner) as ReduceNodeRef], + )?)); + } + Ok(None) + } + + fn is_null_sensitive(&self, _field_name: &FieldName) -> bool { + true + } + + fn is_fallible(&self, _field_name: &FieldName) -> bool { + false + } +} diff --git a/vortex-array/src/scalar_fn/session.rs b/vortex-array/src/scalar_fn/session.rs index 3c78f56928d..9268ed1403a 100644 --- a/vortex-array/src/scalar_fn/session.rs +++ b/vortex-array/src/scalar_fn/session.rs @@ -24,6 +24,7 @@ use crate::scalar_fn::fns::not::Not; use crate::scalar_fn::fns::pack::Pack; use crate::scalar_fn::fns::root::Root; use crate::scalar_fn::fns::select::Select; +use crate::scalar_fn::fns::variant_get::VariantGet; /// Registry of scalar function vtables. /// Registry of scalar function vtables. @@ -69,6 +70,7 @@ impl Default for ScalarFnSession { this.register(Pack); this.register(Root); this.register(Select); + this.register(VariantGet); this } diff --git a/vortex-proto/proto/expr.proto b/vortex-proto/proto/expr.proto index 73ba7209a15..3c062dffb79 100644 --- a/vortex-proto/proto/expr.proto +++ b/vortex-proto/proto/expr.proto @@ -87,6 +87,11 @@ message SelectOpts { } } +// Options for `vortex.variant_get` +message VariantGetOpts { + string path = 1; +} + // Options for `vortex.case_when` // Encodes num_when_then_pairs and has_else into a single u32 (num_children). // num_children = num_when_then_pairs * 2 + (has_else ? 1 : 0) diff --git a/vortex-proto/public-api.lock b/vortex-proto/public-api.lock index 045b53d17eb..d7ea01cb7bb 100644 --- a/vortex-proto/public-api.lock +++ b/vortex-proto/public-api.lock @@ -1192,6 +1192,40 @@ pub fn vortex_proto::expr::SelectOpts::clear(&mut self) pub fn vortex_proto::expr::SelectOpts::encoded_len(&self) -> usize +pub struct vortex_proto::expr::VariantGetOpts + +pub vortex_proto::expr::VariantGetOpts::path: alloc::string::String + +impl core::clone::Clone for vortex_proto::expr::VariantGetOpts + +pub fn vortex_proto::expr::VariantGetOpts::clone(&self) -> vortex_proto::expr::VariantGetOpts + +impl core::cmp::Eq for vortex_proto::expr::VariantGetOpts + +impl core::cmp::PartialEq for vortex_proto::expr::VariantGetOpts + +pub fn vortex_proto::expr::VariantGetOpts::eq(&self, other: &vortex_proto::expr::VariantGetOpts) -> bool + +impl core::default::Default for vortex_proto::expr::VariantGetOpts + +pub fn vortex_proto::expr::VariantGetOpts::default() -> Self + +impl core::fmt::Debug for vortex_proto::expr::VariantGetOpts + +pub fn vortex_proto::expr::VariantGetOpts::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::hash::Hash for vortex_proto::expr::VariantGetOpts + +pub fn vortex_proto::expr::VariantGetOpts::hash<__H: core::hash::Hasher>(&self, state: &mut __H) + +impl core::marker::StructuralPartialEq for vortex_proto::expr::VariantGetOpts + +impl prost::message::Message for vortex_proto::expr::VariantGetOpts + +pub fn vortex_proto::expr::VariantGetOpts::clear(&mut self) + +pub fn vortex_proto::expr::VariantGetOpts::encoded_len(&self) -> usize + pub mod vortex_proto::scalar pub mod vortex_proto::scalar::scalar_value diff --git a/vortex-proto/src/generated/vortex.expr.rs b/vortex-proto/src/generated/vortex.expr.rs index 9c7ddb1d90c..607dc848d8a 100644 --- a/vortex-proto/src/generated/vortex.expr.rs +++ b/vortex-proto/src/generated/vortex.expr.rs @@ -153,6 +153,12 @@ pub mod select_opts { Exclude(super::FieldNames), } } +/// Options for `vortex.variant_get` +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct VariantGetOpts { + #[prost(string, tag = "1")] + pub path: ::prost::alloc::string::String, +} /// Options for `vortex.case_when` /// Encodes num_when_then_pairs and has_else into a single u32 (num_children). /// num_children = num_when_then_pairs * 2 + (has_else ? 1 : 0) From d019aad170b96e28f31f382791971d2ab3046ed5 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Thu, 2 Apr 2026 10:18:20 +0100 Subject: [PATCH 2/6] stuff Signed-off-by: Adam Gutglick --- encodings/parquet-variant/src/lib.rs | 2 - .../{variant_get.rs => variant_get/mod.rs} | 47 ++- .../parquet-variant/src/variant_get/tests.rs | 351 ++++++++++++++++++ 3 files changed, 395 insertions(+), 5 deletions(-) rename encodings/parquet-variant/src/{variant_get.rs => variant_get/mod.rs} (63%) create mode 100644 encodings/parquet-variant/src/variant_get/tests.rs diff --git a/encodings/parquet-variant/src/lib.rs b/encodings/parquet-variant/src/lib.rs index ce56151ffef..2ce35a44388 100644 --- a/encodings/parquet-variant/src/lib.rs +++ b/encodings/parquet-variant/src/lib.rs @@ -29,8 +29,6 @@ mod kernel; mod operations; mod validity; mod variant_get; -#[cfg(test)] -mod variant_get_tests; mod vtable; pub use array::ParquetVariantArrayExt; diff --git a/encodings/parquet-variant/src/variant_get.rs b/encodings/parquet-variant/src/variant_get/mod.rs similarity index 63% rename from encodings/parquet-variant/src/variant_get.rs rename to encodings/parquet-variant/src/variant_get/mod.rs index f03af191adb..b1133846a51 100644 --- a/encodings/parquet-variant/src/variant_get.rs +++ b/encodings/parquet-variant/src/variant_get/mod.rs @@ -13,17 +13,26 @@ use parquet_variant_compute::VariantArray as ArrowVariantArray; use vortex_array::ArrayRef; use vortex_array::ArrayView; use vortex_array::ExecutionCtx; +use vortex_array::IntoArray; +use vortex_array::arrays::VariantArray; use vortex_array::arrays::scalar_fn::ExactScalarFn; use vortex_array::arrays::scalar_fn::ScalarFnArrayView; +use vortex_array::arrow::FromArrowArray; +use vortex_array::dtype::DType; use vortex_array::dtype::FieldName; +use vortex_array::dtype::Nullability; use vortex_array::kernel::ExecuteParentKernel; use vortex_array::scalar_fn::fns::variant_get::VariantGet; +use vortex_array::validity::Validity; +use vortex_buffer::BitBuffer; use vortex_error::VortexResult; use vortex_error::vortex_err; use crate::ParquetVariant; use crate::ParquetVariantArrayExt; -use crate::ParquetVariantData; + +#[cfg(test)] +mod tests; #[derive(Debug)] pub(crate) struct VariantGetExecuteParent; @@ -63,7 +72,7 @@ fn variant_get_impl( let arrow_result = parquet_variant_compute::variant_get(&inner, options) .map_err(|e| vortex_err!("variant_get failed: {e}"))?; - // Convert back to Vortex + // Convert back to Vortex. let result_variant = ArrowVariantArray::try_new( arrow_result .as_any() @@ -72,5 +81,37 @@ fn variant_get_impl( ) .map_err(|e| vortex_err!("failed to create VariantArray from result: {e}"))?; - ParquetVariantData::from_arrow_variant(&result_variant) + // Ensure the result is always nullable (matching variant_get's return_dtype). + // Arrow may return a non-nullable result when no nulls are present. + let validity = result_variant + .nulls() + .map(|nulls| { + if nulls.null_count() == nulls.len() { + Validity::AllInvalid + } else { + Validity::from(BitBuffer::from(nulls.inner().clone())) + } + }) + .unwrap_or(Validity::AllValid); + + let metadata = ArrayRef::from_arrow( + result_variant.metadata_field() as &dyn arrow_array::Array, + false, + )?; + let value = result_variant + .value_field() + .map(|v| ArrayRef::from_arrow(v as &dyn arrow_array::Array, true)) + .transpose()?; + let typed_value = result_variant + .typed_value_field() + .map(|tv| ArrayRef::from_arrow(tv.as_ref(), true)) + .transpose()?; + + let pv = ParquetVariant::try_new(validity, metadata, value, typed_value)?; + debug_assert_eq!( + pv.dtype(), + &DType::Variant(Nullability::Nullable), + "variant_get result must be nullable" + ); + Ok(VariantArray::new(pv.into_array()).into_array()) } diff --git a/encodings/parquet-variant/src/variant_get/tests.rs b/encodings/parquet-variant/src/variant_get/tests.rs new file mode 100644 index 00000000000..56e4ac52704 --- /dev/null +++ b/encodings/parquet-variant/src/variant_get/tests.rs @@ -0,0 +1,351 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::sync::Arc; + +use arrow_array::Array as ArrowArray; +use arrow_array::ArrayRef as ArrowArrayRef; +use arrow_array::StringArray; +use arrow_array::StructArray; +use arrow_buffer::NullBuffer; +use parquet_variant::Variant as PqVariant; +use parquet_variant::VariantBuilderExt; +use parquet_variant::VariantPath; +use parquet_variant_compute::GetOptions; +use parquet_variant_compute::VariantArray as ArrowVariantArray; +use parquet_variant_compute::VariantArrayBuilder; +use parquet_variant_compute::json_to_variant; +use rstest::rstest; +use vortex_array::ArrayRef; +use vortex_array::LEGACY_SESSION; +use vortex_array::VortexSessionExecute; +use vortex_array::arrays::variant::VariantArrayExt; +use vortex_array::expr::root; +use vortex_array::expr::variant_get; +use vortex_error::VortexResult; + +use crate::ParquetVariant; +use crate::ParquetVariantArrayExt; +use crate::ParquetVariantData; + +/// Apply variant_get and execute through the full pipeline (including execute_parent). +fn apply_variant_get(arr: &ArrayRef, field: &str) -> VortexResult { + let expr = variant_get(field, root()); + let array = arr.clone().apply(&expr)?; + let mut ctx = LEGACY_SESSION.create_execution_ctx(); + array.execute::(&mut ctx) +} + +/// Convert a Vortex result back to an Arrow VariantArray for comparison. +fn vortex_to_arrow_variant(arr: &ArrayRef) -> ArrowVariantArray { + let variant = arr.as_::(); + let pv = variant + .child() + .as_::(); + let mut ctx = LEGACY_SESSION.create_execution_ctx(); + pv.to_arrow(&mut ctx).unwrap() +} + +/// Run variant_get through both Arrow and Vortex on the same input, and assert +/// the per-row results (value + validity) are identical by comparing at the Arrow level. +fn assert_matches_arrow(json_rows: &[&str], field: &str) { + // --- Arrow side --- + let arrow_strings: ArrowArrayRef = Arc::new(StringArray::from( + json_rows.iter().map(|s| Some(*s)).collect::>(), + )); + let arrow_variant = json_to_variant(&arrow_strings).unwrap(); + let path = VariantPath::try_from(field).unwrap(); + let arrow_result = parquet_variant_compute::variant_get( + &arrow_variant.clone().into(), + GetOptions::new_with_path(path), + ) + .unwrap(); + let arrow_result_variant = + ArrowVariantArray::try_new(arrow_result.as_any().downcast_ref::().unwrap()) + .unwrap(); + + // --- Vortex side --- + let vortex_input = ParquetVariantData::from_arrow_variant(&arrow_variant).unwrap(); + let vortex_result = apply_variant_get(&vortex_input, field).unwrap(); + let vortex_as_arrow = vortex_to_arrow_variant(&vortex_result); + + // --- Compare row-by-row at Arrow Variant level --- + assert_eq!( + vortex_as_arrow.len(), + arrow_result_variant.len(), + "length mismatch" + ); + + for i in 0..arrow_result_variant.len() { + let arrow_is_null = arrow_result_variant.is_null(i); + let vortex_is_null = vortex_as_arrow.is_null(i); + + assert_eq!( + vortex_is_null, arrow_is_null, + "row {i}: null mismatch (vortex={vortex_is_null}, arrow={arrow_is_null})" + ); + + if !arrow_is_null { + let arrow_value = arrow_result_variant.value(i); + let vortex_value = vortex_as_arrow.value(i); + assert_eq!( + vortex_value, arrow_value, + "row {i}: value mismatch\n vortex: {vortex_value:?}\n arrow: {arrow_value:?}" + ); + } + } +} + +/// Run variant_get through both Arrow and Vortex on nullable input (with NullBuffer), +/// and assert the results match. +fn assert_matches_arrow_nullable(json_rows: &[&str], validity: &[bool], field: &str) { + // --- Arrow side --- + let arrow_strings: ArrowArrayRef = Arc::new(StringArray::from( + json_rows.iter().map(|s| Some(*s)).collect::>(), + )); + let base_variant = json_to_variant(&arrow_strings).unwrap(); + let inner = base_variant.into_inner(); + let null_struct = StructArray::try_new( + inner.fields().clone(), + inner.columns().to_vec(), + Some(NullBuffer::from(validity.to_vec())), + ) + .unwrap(); + let arrow_variant = ArrowVariantArray::try_new(&null_struct).unwrap(); + + let path = VariantPath::try_from(field).unwrap(); + let arrow_result = parquet_variant_compute::variant_get( + &ArrowArrayRef::from(arrow_variant.clone()), + GetOptions::new_with_path(path), + ) + .unwrap(); + let arrow_result_variant = + ArrowVariantArray::try_new(arrow_result.as_any().downcast_ref::().unwrap()) + .unwrap(); + + // --- Vortex side --- + let vortex_input = ParquetVariantData::from_arrow_variant(&arrow_variant).unwrap(); + let vortex_result = apply_variant_get(&vortex_input, field).unwrap(); + let vortex_as_arrow = vortex_to_arrow_variant(&vortex_result); + + // --- Compare --- + assert_eq!( + vortex_as_arrow.len(), + arrow_result_variant.len(), + "length mismatch" + ); + + for i in 0..arrow_result_variant.len() { + let arrow_is_null = arrow_result_variant.is_null(i); + let vortex_is_null = vortex_as_arrow.is_null(i); + + assert_eq!( + vortex_is_null, arrow_is_null, + "row {i}: null mismatch (vortex={vortex_is_null}, arrow={arrow_is_null})" + ); + + if !arrow_is_null { + let arrow_value = arrow_result_variant.value(i); + let vortex_value = vortex_as_arrow.value(i); + assert_eq!( + vortex_value, arrow_value, + "row {i}: value mismatch\n vortex: {vortex_value:?}\n arrow: {arrow_value:?}" + ); + } + } +} + +// --------------------------------------------------------------------------- +// Tests that compare Vortex vs Arrow variant_get +// --------------------------------------------------------------------------- + +#[rstest] +#[case("some_field", &[r#"{"some_field": 1234}"#])] +#[case("a", &[r#"{"a": 1, "b": 2}"#, r#"{"a": "hello"}"#, r#"{"b": 99}"#])] +#[case("nested", &[r#"{"nested": {"x": 1, "y": 2}}"#])] +#[case("missing", &[r#"{"a": 1}"#, r#"{"b": 2}"#])] +#[case("x", &[r#"{"x": true}"#, r#"{"x": false}"#, r#"{"x": null}"#])] +#[case("arr", &[r#"{"arr": [1, 2, 3]}"#])] +#[case("s", &[r#"{"s": "hello world"}"#, r#"{"s": ""}"#])] +#[case("n", &[r#"{"n": 3.14}"#, r#"{"n": -0.0}"#])] +fn test_variant_get_matches_arrow(#[case] field: &str, #[case] json_rows: &[&str]) { + assert_matches_arrow(json_rows, field); +} + +#[test] +fn test_variant_get_matches_arrow_non_object() { + // Primitive variants (not objects) — accessing any field should give null + assert_matches_arrow(&["42", r#""hello""#, "true", "null"], "a"); +} + +#[test] +fn test_variant_get_matches_arrow_mixed_types() { + // Same field name, different value types across rows + assert_matches_arrow( + &[ + r#"{"v": 1}"#, + r#"{"v": "text"}"#, + r#"{"v": true}"#, + r#"{"v": [1,2]}"#, + r#"{"v": {"nested": 1}}"#, + ], + "v", + ); +} + +#[test] +fn test_variant_get_matches_arrow_nullable() { + assert_matches_arrow_nullable( + &[r#"{"a": 10}"#, r#"{"a": 20}"#, r#"{"a": 30}"#], + &[true, false, true], // row 1 is null + "a", + ); +} + +#[test] +fn test_variant_get_matches_arrow_all_null() { + assert_matches_arrow_nullable( + &[r#"{"a": 1}"#, r#"{"a": 2}"#, r#"{"a": 3}"#], + &[false, false, false], + "a", + ); +} + +#[test] +fn test_variant_get_matches_arrow_nested_object_result() { + // The result of variant_get is itself an object + assert_matches_arrow( + &[ + r#"{"outer": {"inner": 42}}"#, + r#"{"outer": {"a": 1, "b": 2}}"#, + ], + "outer", + ); +} + +// --------------------------------------------------------------------------- +// Original standalone tests +// --------------------------------------------------------------------------- + +#[test] +fn test_variant_get_basic() -> VortexResult<()> { + let arr = make_object_array()?; + let result = apply_variant_get(&arr, "a")?; + + assert_eq!(result.len(), 3); + + // Row 0: {"a": 1, ...} → variant(1) + let s0 = result.scalar_at(0)?; + assert!(!s0.is_null()); + let inner0 = s0.as_variant().value().unwrap(); + assert_eq!(*inner0, 1i32.into()); + + // Row 1: {"a": 2, ...} → variant(2) + let s1 = result.scalar_at(1)?; + assert!(!s1.is_null()); + let inner1 = s1.as_variant().value().unwrap(); + assert_eq!(*inner1, 2i32.into()); + + // Row 2: {"b": "y"} → null (field "a" missing) + let s2 = result.scalar_at(2)?; + assert!(s2.is_null()); + + Ok(()) +} + +#[test] +fn test_variant_get_missing_field() -> VortexResult<()> { + let arr = make_object_array()?; + let result = apply_variant_get(&arr, "nonexistent")?; + + assert_eq!(result.len(), 3); + for i in 0..3 { + assert!(result.scalar_at(i)?.is_null(), "row {i} should be null"); + } + + Ok(()) +} + +#[test] +fn test_variant_get_null_input() -> VortexResult<()> { + let arr = make_nullable_object_array()?; + let result = apply_variant_get(&arr, "a")?; + + assert_eq!(result.len(), 3); + assert!(!result.scalar_at(0)?.is_null()); + assert!(result.scalar_at(1)?.is_null()); + assert!(!result.scalar_at(2)?.is_null()); + + Ok(()) +} + +#[test] +fn test_variant_get_non_object() -> VortexResult<()> { + let mut builder = VariantArrayBuilder::new(2); + builder.append_variant(PqVariant::from(42i32)); + builder.append_variant(PqVariant::from("hello")); + let arr = ParquetVariantData::from_arrow_variant(&builder.build())?; + + let result = apply_variant_get(&arr, "a")?; + + assert_eq!(result.len(), 2); + assert!(result.scalar_at(0)?.is_null()); + assert!(result.scalar_at(1)?.is_null()); + + Ok(()) +} + +#[test] +fn test_variant_get_different_field() -> VortexResult<()> { + let arr = make_object_array()?; + let result = apply_variant_get(&arr, "b")?; + + assert_eq!(result.len(), 3); + assert!(!result.scalar_at(0)?.is_null()); + assert!(result.scalar_at(1)?.is_null()); + assert!(!result.scalar_at(2)?.is_null()); + + Ok(()) +} + +// --------------------------------------------------------------------------- +// Test data helpers +// --------------------------------------------------------------------------- + +fn make_object_array() -> VortexResult { + let mut builder = VariantArrayBuilder::new(3); + + builder + .new_object() + .with_field("a", 1i32) + .with_field("b", "x") + .finish(); + + builder + .new_object() + .with_field("a", 2i32) + .with_field("c", true) + .finish(); + + builder.new_object().with_field("b", "y").finish(); + + ParquetVariantData::from_arrow_variant(&builder.build()) +} + +fn make_nullable_object_array() -> VortexResult { + let mut builder = VariantArrayBuilder::new(3); + + builder.new_object().with_field("a", 10i32).finish(); + builder.new_object().with_field("a", 20i32).finish(); + builder.new_object().with_field("a", 30i32).finish(); + + let inner = builder.build().into_inner(); + let null_struct = StructArray::try_new( + inner.fields().clone(), + inner.columns().to_vec(), + Some(NullBuffer::from(vec![true, false, true])), + ) + .unwrap(); + let arrow_variant = ArrowVariantArray::try_new(&null_struct).unwrap(); + ParquetVariantData::from_arrow_variant(&arrow_variant) +} From 1f724a7d971d43473e9301244fb56947381bfba1 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Wed, 8 Apr 2026 15:27:07 +0100 Subject: [PATCH 3/6] fmt variant_get tests Signed-off-by: Adam Gutglick --- encodings/parquet-variant/src/variant_get/tests.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/encodings/parquet-variant/src/variant_get/tests.rs b/encodings/parquet-variant/src/variant_get/tests.rs index 56e4ac52704..a3bb09497f7 100644 --- a/encodings/parquet-variant/src/variant_get/tests.rs +++ b/encodings/parquet-variant/src/variant_get/tests.rs @@ -39,9 +39,7 @@ fn apply_variant_get(arr: &ArrayRef, field: &str) -> VortexResult { /// Convert a Vortex result back to an Arrow VariantArray for comparison. fn vortex_to_arrow_variant(arr: &ArrayRef) -> ArrowVariantArray { let variant = arr.as_::(); - let pv = variant - .child() - .as_::(); + let pv = variant.child().as_::(); let mut ctx = LEGACY_SESSION.create_execution_ctx(); pv.to_arrow(&mut ctx).unwrap() } From 1c06acda846c1ccb6e243530431ef3db5faee0d2 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Tue, 14 Apr 2026 15:06:14 +0100 Subject: [PATCH 4/6] Better expr Signed-off-by: Adam Gutglick --- .../parquet-variant/src/variant_get/mod.rs | 43 ++- .../parquet-variant/src/variant_get/tests.rs | 170 ++++++++++- .../parquet-variant/src/variant_get_tests.rs | 163 ---------- vortex-array/public-api.lock | 196 ++++++++++-- vortex-array/src/expr/exprs.rs | 33 +- vortex-array/src/scalar_fn/fns/variant_get.rs | 145 --------- .../src/scalar_fn/fns/variant_get/mod.rs | 287 ++++++++++++++++++ .../src/scalar_fn/fns/variant_get/path.rs | 122 ++++++++ vortex-proto/proto/expr.proto | 10 +- vortex-proto/public-api.lock | 80 ++++- vortex-proto/src/generated/vortex.expr.rs | 23 +- 11 files changed, 909 insertions(+), 363 deletions(-) delete mode 100644 encodings/parquet-variant/src/variant_get_tests.rs delete mode 100644 vortex-array/src/scalar_fn/fns/variant_get.rs create mode 100644 vortex-array/src/scalar_fn/fns/variant_get/mod.rs create mode 100644 vortex-array/src/scalar_fn/fns/variant_get/path.rs diff --git a/encodings/parquet-variant/src/variant_get/mod.rs b/encodings/parquet-variant/src/variant_get/mod.rs index b1133846a51..98278250894 100644 --- a/encodings/parquet-variant/src/variant_get/mod.rs +++ b/encodings/parquet-variant/src/variant_get/mod.rs @@ -7,7 +7,10 @@ use std::sync::Arc; -use parquet_variant::VariantPathElement; +use arrow_schema::Field; +use arrow_schema::FieldRef; +use parquet_variant::VariantPath; +use parquet_variant::VariantPathElement as ArrowVariantPathElement; use parquet_variant_compute::GetOptions; use parquet_variant_compute::VariantArray as ArrowVariantArray; use vortex_array::ArrayRef; @@ -19,10 +22,11 @@ use vortex_array::arrays::scalar_fn::ExactScalarFn; use vortex_array::arrays::scalar_fn::ScalarFnArrayView; use vortex_array::arrow::FromArrowArray; use vortex_array::dtype::DType; -use vortex_array::dtype::FieldName; use vortex_array::dtype::Nullability; use vortex_array::kernel::ExecuteParentKernel; use vortex_array::scalar_fn::fns::variant_get::VariantGet; +use vortex_array::scalar_fn::fns::variant_get::VariantGetOptions; +use vortex_array::scalar_fn::fns::variant_get::VariantPathElement as VortexVariantPathElement; use vortex_array::validity::Validity; use vortex_buffer::BitBuffer; use vortex_error::VortexResult; @@ -47,31 +51,48 @@ impl ExecuteParentKernel for VariantGetExecuteParent { _child_idx: usize, ctx: &mut ExecutionCtx, ) -> VortexResult> { - let field_name: &FieldName = parent.options; - variant_get_impl(array, field_name, ctx).map(Some) + variant_get_impl(array, parent.options, ctx).map(Some) } } fn variant_get_impl( array: ArrayView<'_, ParquetVariant>, - field_name: &FieldName, + options: &VariantGetOptions, ctx: &mut ExecutionCtx, ) -> VortexResult { // Convert to Arrow VariantArray let arrow_variant = array.to_arrow(ctx)?; - // Build path for a single field access - let path_element = VariantPathElement::Field { - name: field_name.as_ref().into(), - }; - let options = GetOptions::new_with_path(vec![path_element].into()); + let path = options + .path() + .iter() + .cloned() + .map(|element| match element { + VortexVariantPathElement::Field(name) => ArrowVariantPathElement::Field { + name: name.to_string().into(), + }, + VortexVariantPathElement::Index(index) => ArrowVariantPathElement::Index { index }, + }) + .collect::>(); + let mut arrow_options = GetOptions::new_with_path(VariantPath::new(path)); + if let Some(as_dtype) = options.effective_as_dtype() { + arrow_options = arrow_options.with_as_type(Some(FieldRef::new(Field::new( + "result", + as_dtype.to_arrow_dtype()?, + as_dtype.is_nullable(), + )))); + } // Delegate to the parquet-variant-compute kernel. // With as_type = None, the result is itself a VariantArray. let inner: Arc = Arc::new(arrow_variant.into_inner()); - let arrow_result = parquet_variant_compute::variant_get(&inner, options) + let arrow_result = parquet_variant_compute::variant_get(&inner, arrow_options) .map_err(|e| vortex_err!("variant_get failed: {e}"))?; + if options.effective_as_dtype().is_some() { + return ArrayRef::from_arrow(arrow_result.as_ref(), true); + } + // Convert back to Vortex. let result_variant = ArrowVariantArray::try_new( arrow_result diff --git a/encodings/parquet-variant/src/variant_get/tests.rs b/encodings/parquet-variant/src/variant_get/tests.rs index a3bb09497f7..b547a3b645f 100644 --- a/encodings/parquet-variant/src/variant_get/tests.rs +++ b/encodings/parquet-variant/src/variant_get/tests.rs @@ -8,9 +8,13 @@ use arrow_array::ArrayRef as ArrowArrayRef; use arrow_array::StringArray; use arrow_array::StructArray; use arrow_buffer::NullBuffer; +use arrow_schema::DataType as ArrowDataType; +use arrow_schema::Field as ArrowField; +use arrow_schema::FieldRef; use parquet_variant::Variant as PqVariant; use parquet_variant::VariantBuilderExt; use parquet_variant::VariantPath; +use parquet_variant::VariantPathElement; use parquet_variant_compute::GetOptions; use parquet_variant_compute::VariantArray as ArrowVariantArray; use parquet_variant_compute::VariantArrayBuilder; @@ -20,8 +24,15 @@ use vortex_array::ArrayRef; use vortex_array::LEGACY_SESSION; use vortex_array::VortexSessionExecute; use vortex_array::arrays::variant::VariantArrayExt; +use vortex_array::arrow::FromArrowArray; +use vortex_array::assert_arrays_eq; +use vortex_array::dtype::DType; +use vortex_array::dtype::Nullability; +use vortex_array::dtype::PType; use vortex_array::expr::root; use vortex_array::expr::variant_get; +use vortex_array::expr::variant_get_as; +use vortex_array::scalar_fn::fns::variant_get::VariantPath as VortexVariantPath; use vortex_error::VortexResult; use crate::ParquetVariant; @@ -29,8 +40,20 @@ use crate::ParquetVariantArrayExt; use crate::ParquetVariantData; /// Apply variant_get and execute through the full pipeline (including execute_parent). -fn apply_variant_get(arr: &ArrayRef, field: &str) -> VortexResult { - let expr = variant_get(field, root()); +fn apply_variant_get(arr: &ArrayRef, path: impl Into) -> VortexResult { + let expr = variant_get(path, root()); + let array = arr.clone().apply(&expr)?; + let mut ctx = LEGACY_SESSION.create_execution_ctx(); + array.execute::(&mut ctx) +} + +/// Apply typed variant_get and execute through the full pipeline. +fn apply_variant_get_as( + arr: &ArrayRef, + path: impl Into, + as_dtype: DType, +) -> VortexResult { + let expr = variant_get_as(path, as_dtype, root()); let array = arr.clone().apply(&expr)?; let mut ctx = LEGACY_SESSION.create_execution_ctx(); array.execute::(&mut ctx) @@ -47,7 +70,6 @@ fn vortex_to_arrow_variant(arr: &ArrayRef) -> ArrowVariantArray { /// Run variant_get through both Arrow and Vortex on the same input, and assert /// the per-row results (value + validity) are identical by comparing at the Arrow level. fn assert_matches_arrow(json_rows: &[&str], field: &str) { - // --- Arrow side --- let arrow_strings: ArrowArrayRef = Arc::new(StringArray::from( json_rows.iter().map(|s| Some(*s)).collect::>(), )); @@ -62,12 +84,10 @@ fn assert_matches_arrow(json_rows: &[&str], field: &str) { ArrowVariantArray::try_new(arrow_result.as_any().downcast_ref::().unwrap()) .unwrap(); - // --- Vortex side --- let vortex_input = ParquetVariantData::from_arrow_variant(&arrow_variant).unwrap(); let vortex_result = apply_variant_get(&vortex_input, field).unwrap(); let vortex_as_arrow = vortex_to_arrow_variant(&vortex_result); - // --- Compare row-by-row at Arrow Variant level --- assert_eq!( vortex_as_arrow.len(), arrow_result_variant.len(), @@ -94,10 +114,90 @@ fn assert_matches_arrow(json_rows: &[&str], field: &str) { } } +/// Run variant_get through both Arrow and Vortex for an explicit nested/index path, +/// and assert the per-row results match at the Arrow variant level. +fn assert_matches_arrow_with_path( + json_rows: &[&str], + path: VortexVariantPath, + arrow_path: VariantPath<'static>, +) { + let arrow_strings: ArrowArrayRef = Arc::new(StringArray::from( + json_rows.iter().map(|s| Some(*s)).collect::>(), + )); + let arrow_variant = json_to_variant(&arrow_strings).unwrap(); + let arrow_result = parquet_variant_compute::variant_get( + &arrow_variant.clone().into(), + GetOptions::new_with_path(arrow_path), + ) + .unwrap(); + let arrow_result_variant = + ArrowVariantArray::try_new(arrow_result.as_any().downcast_ref::().unwrap()) + .unwrap(); + + let vortex_input = ParquetVariantData::from_arrow_variant(&arrow_variant).unwrap(); + let vortex_result = apply_variant_get(&vortex_input, path).unwrap(); + let vortex_as_arrow = vortex_to_arrow_variant(&vortex_result); + + assert_eq!( + vortex_as_arrow.len(), + arrow_result_variant.len(), + "length mismatch" + ); + + for i in 0..arrow_result_variant.len() { + let arrow_is_null = arrow_result_variant.is_null(i); + let vortex_is_null = vortex_as_arrow.is_null(i); + + assert_eq!( + vortex_is_null, arrow_is_null, + "row {i}: null mismatch (vortex={vortex_is_null}, arrow={arrow_is_null})" + ); + + if !arrow_is_null { + let arrow_value = arrow_result_variant.value(i); + let vortex_value = vortex_as_arrow.value(i); + assert_eq!( + vortex_value, arrow_value, + "row {i}: value mismatch\n vortex: {vortex_value:?}\n arrow: {arrow_value:?}" + ); + } + } +} + +/// Run typed variant_get through both Arrow and Vortex for an explicit path, +/// and assert the typed nullable result matches. +fn assert_typed_matches_arrow_with_path( + json_rows: &[&str], + path: VortexVariantPath, + arrow_path: VariantPath<'static>, + as_dtype: DType, + arrow_dtype: ArrowDataType, +) -> VortexResult<()> { + let arrow_strings: ArrowArrayRef = Arc::new(StringArray::from( + json_rows.iter().map(|s| Some(*s)).collect::>(), + )); + let arrow_variant = json_to_variant(&arrow_strings).unwrap(); + let arrow_result = + parquet_variant_compute::variant_get( + &arrow_variant.clone().into(), + GetOptions::new_with_path(arrow_path).with_as_type(Some(FieldRef::new( + ArrowField::new("result", arrow_dtype, true), + ))), + ) + .unwrap(); + let expected = ArrayRef::from_arrow(arrow_result.as_ref(), true)?; + + let vortex_input = ParquetVariantData::from_arrow_variant(&arrow_variant).unwrap(); + let vortex_result = apply_variant_get_as(&vortex_input, path, as_dtype.clone())?; + + assert_eq!(vortex_result.dtype(), &as_dtype.as_nullable()); + assert_arrays_eq!(vortex_result, expected); + Ok(()) +} + /// Run variant_get through both Arrow and Vortex on nullable input (with NullBuffer), /// and assert the results match. fn assert_matches_arrow_nullable(json_rows: &[&str], validity: &[bool], field: &str) { - // --- Arrow side --- let arrow_strings: ArrowArrayRef = Arc::new(StringArray::from( json_rows.iter().map(|s| Some(*s)).collect::>(), )); @@ -121,12 +221,10 @@ fn assert_matches_arrow_nullable(json_rows: &[&str], validity: &[bool], field: & ArrowVariantArray::try_new(arrow_result.as_any().downcast_ref::().unwrap()) .unwrap(); - // --- Vortex side --- let vortex_input = ParquetVariantData::from_arrow_variant(&arrow_variant).unwrap(); let vortex_result = apply_variant_get(&vortex_input, field).unwrap(); let vortex_as_arrow = vortex_to_arrow_variant(&vortex_result); - // --- Compare --- assert_eq!( vortex_as_arrow.len(), arrow_result_variant.len(), @@ -221,6 +319,60 @@ fn test_variant_get_matches_arrow_nested_object_result() { ); } +#[test] +fn test_variant_get_matches_arrow_nested_path() { + let path = VortexVariantPath::from_name("outer").join("inner"); + let arrow_path = VariantPath::from_iter([ + VariantPathElement::field("outer".to_string()), + VariantPathElement::field("inner".to_string()), + ]); + assert_matches_arrow_with_path( + &[ + r#"{"outer": {"inner": 42}}"#, + r#"{"outer": {"inner": "x"}}"#, + r#"{"outer": {"other": true}}"#, + ], + path, + arrow_path, + ); +} + +#[test] +fn test_variant_get_matches_arrow_index_path() { + let path = VortexVariantPath::from_name("arr").join(1usize); + let arrow_path = VariantPath::from_iter([ + VariantPathElement::field("arr".to_string()), + VariantPathElement::index(1), + ]); + assert_matches_arrow_with_path( + &[ + r#"{"arr": [1, 2, 3]}"#, + r#"{"arr": ["a", "b"]}"#, + r#"{"arr": [true]}"#, + ], + path, + arrow_path, + ); +} + +#[test] +fn test_variant_get_matches_arrow_typed_path() -> VortexResult<()> { + assert_typed_matches_arrow_with_path( + &[ + r#"{"outer": {"inner": 42}}"#, + r#"{"outer": {"inner": "x"}}"#, + r#"{"outer": {"other": true}}"#, + ], + VortexVariantPath::from_name("outer").join("inner"), + VariantPath::from_iter([ + VariantPathElement::field("outer".to_string()), + VariantPathElement::field("inner".to_string()), + ]), + DType::Primitive(PType::I64, Nullability::NonNullable), + ArrowDataType::Int64, + ) +} + // --------------------------------------------------------------------------- // Original standalone tests // --------------------------------------------------------------------------- @@ -310,6 +462,7 @@ fn test_variant_get_different_field() -> VortexResult<()> { // Test data helpers // --------------------------------------------------------------------------- +/// Build a small non-null object variant array used by the standalone tests. fn make_object_array() -> VortexResult { let mut builder = VariantArrayBuilder::new(3); @@ -330,6 +483,7 @@ fn make_object_array() -> VortexResult { ParquetVariantData::from_arrow_variant(&builder.build()) } +/// Build the same object array shape with an explicit top-level validity bitmap. fn make_nullable_object_array() -> VortexResult { let mut builder = VariantArrayBuilder::new(3); diff --git a/encodings/parquet-variant/src/variant_get_tests.rs b/encodings/parquet-variant/src/variant_get_tests.rs deleted file mode 100644 index f4ade4a1003..00000000000 --- a/encodings/parquet-variant/src/variant_get_tests.rs +++ /dev/null @@ -1,163 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -#[cfg(test)] -mod tests { - use arrow_array::StructArray; - use arrow_buffer::NullBuffer; - use parquet_variant::Variant as PqVariant; - use parquet_variant::VariantBuilderExt; - use parquet_variant_compute::VariantArrayBuilder; - use vortex_array::ArrayRef; - use vortex_array::LEGACY_SESSION; - use vortex_array::VortexSessionExecute; - use vortex_array::expr::root; - use vortex_array::expr::variant_get; - use vortex_error::VortexResult; - - use crate::ParquetVariantData; - - /// Apply variant_get and execute through the full pipeline (including execute_parent). - fn apply_variant_get(arr: &ArrayRef, field: &str) -> VortexResult { - let expr = variant_get(field, root()); - let lazy = arr.clone().apply(&expr)?; - let mut ctx = LEGACY_SESSION.create_execution_ctx(); - lazy.execute::(&mut ctx) - } - - /// Build a VariantArray of objects: [{"a": 1, "b": "x"}, {"a": 2, "c": true}, {"b": "y"}] - fn make_object_array() -> VortexResult { - let mut builder = VariantArrayBuilder::new(3); - - builder - .new_object() - .with_field("a", 1i32) - .with_field("b", "x") - .finish(); - - builder - .new_object() - .with_field("a", 2i32) - .with_field("c", true) - .finish(); - - builder.new_object().with_field("b", "y").finish(); - - ParquetVariantData::from_arrow_variant(&builder.build()) - } - - /// Build a nullable VariantArray: [{"a": 10}, NULL, {"a": 30}] - fn make_nullable_object_array() -> VortexResult { - let mut builder = VariantArrayBuilder::new(3); - - builder.new_object().with_field("a", 10i32).finish(); - - builder.new_object().with_field("a", 20i32).finish(); - - builder.new_object().with_field("a", 30i32).finish(); - - let inner = builder.build().into_inner(); - let null_struct = StructArray::try_new( - inner.fields().clone(), - inner.columns().to_vec(), - Some(NullBuffer::from(vec![true, false, true])), - ) - .unwrap(); - let arrow_variant = parquet_variant_compute::VariantArray::try_new(&null_struct).unwrap(); - ParquetVariantData::from_arrow_variant(&arrow_variant) - } - - #[test] - fn test_variant_get_basic() -> VortexResult<()> { - let arr = make_object_array()?; - let result = apply_variant_get(&arr, "a")?; - - assert_eq!(result.len(), 3); - - // Row 0: {"a": 1, ...} → variant(1) - let s0 = result.scalar_at(0)?; - assert!(!s0.is_null()); - let inner0 = s0.as_variant().value().unwrap(); - assert_eq!(*inner0, 1i32.into()); - - // Row 1: {"a": 2, ...} → variant(2) - let s1 = result.scalar_at(1)?; - assert!(!s1.is_null()); - let inner1 = s1.as_variant().value().unwrap(); - assert_eq!(*inner1, 2i32.into()); - - // Row 2: {"b": "y"} → null (field "a" missing) - let s2 = result.scalar_at(2)?; - assert!(s2.is_null()); - - Ok(()) - } - - #[test] - fn test_variant_get_missing_field() -> VortexResult<()> { - let arr = make_object_array()?; - let result = apply_variant_get(&arr, "nonexistent")?; - - assert_eq!(result.len(), 3); - for i in 0..3 { - assert!(result.scalar_at(i)?.is_null(), "row {i} should be null"); - } - - Ok(()) - } - - #[test] - fn test_variant_get_null_input() -> VortexResult<()> { - let arr = make_nullable_object_array()?; - let result = apply_variant_get(&arr, "a")?; - - assert_eq!(result.len(), 3); - - // Row 0: {"a": 10} → variant(10) - assert!(!result.scalar_at(0)?.is_null()); - - // Row 1: NULL → null - assert!(result.scalar_at(1)?.is_null()); - - // Row 2: {"a": 30} → variant(30) - assert!(!result.scalar_at(2)?.is_null()); - - Ok(()) - } - - #[test] - fn test_variant_get_non_object() -> VortexResult<()> { - // Array of primitive variants (not objects) - let mut builder = VariantArrayBuilder::new(2); - builder.append_variant(PqVariant::from(42i32)); - builder.append_variant(PqVariant::from("hello")); - let arr = ParquetVariantData::from_arrow_variant(&builder.build())?; - - let result = apply_variant_get(&arr, "a")?; - - assert_eq!(result.len(), 2); - assert!(result.scalar_at(0)?.is_null()); - assert!(result.scalar_at(1)?.is_null()); - - Ok(()) - } - - #[test] - fn test_variant_get_different_field() -> VortexResult<()> { - let arr = make_object_array()?; - let result = apply_variant_get(&arr, "b")?; - - assert_eq!(result.len(), 3); - - // Row 0: {"a": 1, "b": "x"} → variant("x") - assert!(!result.scalar_at(0)?.is_null()); - - // Row 1: {"a": 2, "c": true} → null (no "b") - assert!(result.scalar_at(1)?.is_null()); - - // Row 2: {"b": "y"} → variant("y") - assert!(!result.scalar_at(2)?.is_null()); - - Ok(()) - } -} diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index 5535608d920..3f3b9d76033 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -12430,7 +12430,9 @@ pub fn vortex_array::expr::select_exclude(fields: impl core::convert::Into alloc::vec::Vec -pub fn vortex_array::expr::variant_get(field: impl core::convert::Into, child: vortex_array::expr::Expression) -> vortex_array::expr::Expression +pub fn vortex_array::expr::variant_get(options: impl core::convert::Into, child: vortex_array::expr::Expression) -> vortex_array::expr::Expression + +pub fn vortex_array::expr::variant_get_as(path: impl core::convert::Into, as_dtype: vortex_array::dtype::DType, child: vortex_array::expr::Expression) -> vortex_array::expr::Expression pub fn vortex_array::expr::zip_expr(mask: vortex_array::expr::Expression, if_true: vortex_array::expr::Expression, if_false: vortex_array::expr::Expression) -> vortex_array::expr::Expression @@ -17202,6 +17204,54 @@ pub fn vortex_array::scalar_fn::fns::select::Select::validity(&self, options: &S pub mod vortex_array::scalar_fn::fns::variant_get +pub enum vortex_array::scalar_fn::fns::variant_get::VariantPathElement + +pub vortex_array::scalar_fn::fns::variant_get::VariantPathElement::Field(vortex_array::dtype::FieldName) + +pub vortex_array::scalar_fn::fns::variant_get::VariantPathElement::Index(usize) + +impl vortex_array::scalar_fn::fns::variant_get::VariantPathElement + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantPathElement::field(name: impl core::convert::Into) -> Self + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantPathElement::index(index: usize) -> Self + +impl core::clone::Clone for vortex_array::scalar_fn::fns::variant_get::VariantPathElement + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantPathElement::clone(&self) -> vortex_array::scalar_fn::fns::variant_get::VariantPathElement + +impl core::cmp::Eq for vortex_array::scalar_fn::fns::variant_get::VariantPathElement + +impl core::cmp::PartialEq for vortex_array::scalar_fn::fns::variant_get::VariantPathElement + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantPathElement::eq(&self, other: &vortex_array::scalar_fn::fns::variant_get::VariantPathElement) -> bool + +impl core::convert::From for vortex_array::scalar_fn::fns::variant_get::VariantPathElement + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantPathElement::from(value: usize) -> Self + +impl core::fmt::Debug for vortex_array::scalar_fn::fns::variant_get::VariantPathElement + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantPathElement::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::fmt::Display for vortex_array::scalar_fn::fns::variant_get::VariantPathElement + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantPathElement::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::hash::Hash for vortex_array::scalar_fn::fns::variant_get::VariantPathElement + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantPathElement::hash<__H: core::hash::Hasher>(&self, state: &mut __H) + +impl core::iter::traits::collect::FromIterator for vortex_array::scalar_fn::fns::variant_get::VariantPath + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantPath::from_iter>(iter: T) -> Self + +impl core::marker::StructuralPartialEq for vortex_array::scalar_fn::fns::variant_get::VariantPathElement + +impl core::convert::From for vortex_array::scalar_fn::fns::variant_get::VariantPathElement where F: core::convert::Into + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantPathElement::from(value: F) -> Self + pub struct vortex_array::scalar_fn::fns::variant_get::VariantGet impl core::clone::Clone for vortex_array::scalar_fn::fns::variant_get::VariantGet @@ -17210,29 +17260,29 @@ pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::clone(&self) -> vo impl vortex_array::scalar_fn::ScalarFnVTable for vortex_array::scalar_fn::fns::variant_get::VariantGet -pub type vortex_array::scalar_fn::fns::variant_get::VariantGet::Options = vortex_array::dtype::FieldName +pub type vortex_array::scalar_fn::fns::variant_get::VariantGet::Options = vortex_array::scalar_fn::fns::variant_get::VariantGetOptions -pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::arity(&self, _field_name: &vortex_array::dtype::FieldName) -> vortex_array::scalar_fn::Arity +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::arity(&self, _options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions) -> vortex_array::scalar_fn::Arity -pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::child_name(&self, _instance: &Self::Options, child_idx: usize) -> vortex_array::scalar_fn::ChildName +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::child_name(&self, _options: &Self::Options, child_idx: usize) -> vortex_array::scalar_fn::ChildName pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::coerce_args(&self, options: &Self::Options, args: &[vortex_array::dtype::DType]) -> vortex_error::VortexResult> -pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::deserialize(&self, metadata: &[u8], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::deserialize(&self, metadata: &[u8], session: &vortex_session::VortexSession) -> vortex_error::VortexResult -pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::execute(&self, _field_name: &vortex_array::dtype::FieldName, _args: &dyn vortex_array::scalar_fn::ExecutionArgs, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::execute(&self, _options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions, _args: &dyn vortex_array::scalar_fn::ExecutionArgs, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult -pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::fmt_sql(&self, field_name: &vortex_array::dtype::FieldName, expr: &vortex_array::expr::Expression, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::fmt_sql(&self, options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions, expr: &vortex_array::expr::Expression, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::id(&self) -> vortex_array::scalar_fn::ScalarFnId -pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::is_fallible(&self, _field_name: &vortex_array::dtype::FieldName) -> bool +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::is_fallible(&self, _options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions) -> bool -pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::is_null_sensitive(&self, _field_name: &vortex_array::dtype::FieldName) -> bool +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::is_null_sensitive(&self, _options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions) -> bool -pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::reduce(&self, field_name: &vortex_array::dtype::FieldName, node: &dyn vortex_array::scalar_fn::ReduceNode, ctx: &dyn vortex_array::scalar_fn::ReduceCtx) -> vortex_error::VortexResult> +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::reduce(&self, options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions, node: &dyn vortex_array::scalar_fn::ReduceNode, ctx: &dyn vortex_array::scalar_fn::ReduceCtx) -> vortex_error::VortexResult> -pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::return_dtype(&self, _field_name: &vortex_array::dtype::FieldName, arg_dtypes: &[vortex_array::dtype::DType]) -> vortex_error::VortexResult +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::return_dtype(&self, options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions, arg_dtypes: &[vortex_array::dtype::DType]) -> vortex_error::VortexResult pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::serialize(&self, instance: &Self::Options) -> vortex_error::VortexResult>> @@ -17246,6 +17296,110 @@ pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::stat_falsification pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::validity(&self, options: &Self::Options, expression: &vortex_array::expr::Expression) -> vortex_error::VortexResult> +pub struct vortex_array::scalar_fn::fns::variant_get::VariantGetOptions + +impl vortex_array::scalar_fn::fns::variant_get::VariantGetOptions + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGetOptions::as_dtype(&self) -> core::option::Option<&vortex_array::dtype::DType> + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGetOptions::effective_as_dtype(&self) -> core::option::Option<&vortex_array::dtype::DType> + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGetOptions::new(path: impl core::convert::Into) -> Self + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGetOptions::path(&self) -> &vortex_array::scalar_fn::fns::variant_get::VariantPath + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGetOptions::return_dtype(&self) -> vortex_array::dtype::DType + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGetOptions::with_as_dtype(self, as_dtype: core::option::Option) -> Self + +impl core::clone::Clone for vortex_array::scalar_fn::fns::variant_get::VariantGetOptions + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGetOptions::clone(&self) -> vortex_array::scalar_fn::fns::variant_get::VariantGetOptions + +impl core::cmp::Eq for vortex_array::scalar_fn::fns::variant_get::VariantGetOptions + +impl core::cmp::PartialEq for vortex_array::scalar_fn::fns::variant_get::VariantGetOptions + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGetOptions::eq(&self, other: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions) -> bool + +impl core::default::Default for vortex_array::scalar_fn::fns::variant_get::VariantGetOptions + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGetOptions::default() -> vortex_array::scalar_fn::fns::variant_get::VariantGetOptions + +impl core::fmt::Debug for vortex_array::scalar_fn::fns::variant_get::VariantGetOptions + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGetOptions::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::fmt::Display for vortex_array::scalar_fn::fns::variant_get::VariantGetOptions + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGetOptions::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::hash::Hash for vortex_array::scalar_fn::fns::variant_get::VariantGetOptions + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGetOptions::hash<__H: core::hash::Hasher>(&self, state: &mut __H) + +impl core::marker::StructuralPartialEq for vortex_array::scalar_fn::fns::variant_get::VariantGetOptions + +impl> core::convert::From for vortex_array::scalar_fn::fns::variant_get::VariantGetOptions + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGetOptions::from(value: T) -> Self + +pub struct vortex_array::scalar_fn::fns::variant_get::VariantPath(_) + +impl vortex_array::scalar_fn::fns::variant_get::VariantPath + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantPath::from_index(index: usize) -> Self + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantPath::from_name(name: impl core::convert::Into) -> Self + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantPath::iter(&self) -> impl core::iter::traits::iterator::Iterator + '_ + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantPath::join(self, element: impl core::convert::Into) -> Self + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantPath::new(path: alloc::vec::Vec) -> Self + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantPath::push(&mut self, element: impl core::convert::Into) + +impl core::clone::Clone for vortex_array::scalar_fn::fns::variant_get::VariantPath + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantPath::clone(&self) -> vortex_array::scalar_fn::fns::variant_get::VariantPath + +impl core::cmp::Eq for vortex_array::scalar_fn::fns::variant_get::VariantPath + +impl core::cmp::PartialEq for vortex_array::scalar_fn::fns::variant_get::VariantPath + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantPath::eq(&self, other: &vortex_array::scalar_fn::fns::variant_get::VariantPath) -> bool + +impl core::convert::From for vortex_array::scalar_fn::fns::variant_get::VariantPath + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantPath::from(value: usize) -> Self + +impl core::default::Default for vortex_array::scalar_fn::fns::variant_get::VariantPath + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantPath::default() -> vortex_array::scalar_fn::fns::variant_get::VariantPath + +impl core::fmt::Debug for vortex_array::scalar_fn::fns::variant_get::VariantPath + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantPath::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::fmt::Display for vortex_array::scalar_fn::fns::variant_get::VariantPath + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantPath::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::hash::Hash for vortex_array::scalar_fn::fns::variant_get::VariantPath + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantPath::hash<__H: core::hash::Hasher>(&self, state: &mut __H) + +impl core::iter::traits::collect::FromIterator for vortex_array::scalar_fn::fns::variant_get::VariantPath + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantPath::from_iter>(iter: T) -> Self + +impl core::marker::StructuralPartialEq for vortex_array::scalar_fn::fns::variant_get::VariantPath + +impl core::convert::From for vortex_array::scalar_fn::fns::variant_get::VariantPath where F: core::convert::Into + +pub fn vortex_array::scalar_fn::fns::variant_get::VariantPath::from(value: F) -> Self + pub mod vortex_array::scalar_fn::fns::zip pub struct vortex_array::scalar_fn::fns::zip::Zip @@ -18456,29 +18610,29 @@ pub fn vortex_array::scalar_fn::fns::select::Select::validity(&self, options: &S impl vortex_array::scalar_fn::ScalarFnVTable for vortex_array::scalar_fn::fns::variant_get::VariantGet -pub type vortex_array::scalar_fn::fns::variant_get::VariantGet::Options = vortex_array::dtype::FieldName +pub type vortex_array::scalar_fn::fns::variant_get::VariantGet::Options = vortex_array::scalar_fn::fns::variant_get::VariantGetOptions -pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::arity(&self, _field_name: &vortex_array::dtype::FieldName) -> vortex_array::scalar_fn::Arity +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::arity(&self, _options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions) -> vortex_array::scalar_fn::Arity -pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::child_name(&self, _instance: &Self::Options, child_idx: usize) -> vortex_array::scalar_fn::ChildName +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::child_name(&self, _options: &Self::Options, child_idx: usize) -> vortex_array::scalar_fn::ChildName pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::coerce_args(&self, options: &Self::Options, args: &[vortex_array::dtype::DType]) -> vortex_error::VortexResult> -pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::deserialize(&self, metadata: &[u8], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::deserialize(&self, metadata: &[u8], session: &vortex_session::VortexSession) -> vortex_error::VortexResult -pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::execute(&self, _field_name: &vortex_array::dtype::FieldName, _args: &dyn vortex_array::scalar_fn::ExecutionArgs, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::execute(&self, _options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions, _args: &dyn vortex_array::scalar_fn::ExecutionArgs, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult -pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::fmt_sql(&self, field_name: &vortex_array::dtype::FieldName, expr: &vortex_array::expr::Expression, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::fmt_sql(&self, options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions, expr: &vortex_array::expr::Expression, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::id(&self) -> vortex_array::scalar_fn::ScalarFnId -pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::is_fallible(&self, _field_name: &vortex_array::dtype::FieldName) -> bool +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::is_fallible(&self, _options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions) -> bool -pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::is_null_sensitive(&self, _field_name: &vortex_array::dtype::FieldName) -> bool +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::is_null_sensitive(&self, _options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions) -> bool -pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::reduce(&self, field_name: &vortex_array::dtype::FieldName, node: &dyn vortex_array::scalar_fn::ReduceNode, ctx: &dyn vortex_array::scalar_fn::ReduceCtx) -> vortex_error::VortexResult> +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::reduce(&self, options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions, node: &dyn vortex_array::scalar_fn::ReduceNode, ctx: &dyn vortex_array::scalar_fn::ReduceCtx) -> vortex_error::VortexResult> -pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::return_dtype(&self, _field_name: &vortex_array::dtype::FieldName, arg_dtypes: &[vortex_array::dtype::DType]) -> vortex_error::VortexResult +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::return_dtype(&self, options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions, arg_dtypes: &[vortex_array::dtype::DType]) -> vortex_error::VortexResult pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::serialize(&self, instance: &Self::Options) -> vortex_error::VortexResult>> diff --git a/vortex-array/src/expr/exprs.rs b/vortex-array/src/expr/exprs.rs index 9aced319454..5f644bf3db6 100644 --- a/vortex-array/src/expr/exprs.rs +++ b/vortex-array/src/expr/exprs.rs @@ -47,6 +47,8 @@ use crate::scalar_fn::fns::root::Root; use crate::scalar_fn::fns::select::FieldSelection; use crate::scalar_fn::fns::select::Select; use crate::scalar_fn::fns::variant_get::VariantGet; +use crate::scalar_fn::fns::variant_get::VariantGetOptions; +use crate::scalar_fn::fns::variant_get::VariantPath; use crate::scalar_fn::fns::zip::Zip; // ---- Root ---- @@ -703,14 +705,37 @@ pub fn list_contains(list: Expression, value: Expression) -> Expression { // ---- VariantGet ---- -/// Creates an expression that extracts a field from a variant object by name. +/// Creates an expression that extracts a nested path from a variant value. /// -/// Returns a new variant containing the field's value, or null if the field does not exist. +/// Returns a new variant containing the nested value, or null if the path does not exist. /// /// ```rust /// # use vortex_array::expr::{variant_get, root}; /// let expr = variant_get("field_name", root()); /// ``` -pub fn variant_get(field: impl Into, child: Expression) -> Expression { - VariantGet.new_expr(field.into(), vec![child]) +pub fn variant_get(options: impl Into, child: Expression) -> Expression { + VariantGet.new_expr(options.into(), vec![child]) +} + +/// Creates an expression that extracts a nested path from a variant value and materializes it as a +/// specific type. +/// +/// ```rust +/// # use vortex_array::dtype::{DType, Nullability, PType}; +/// # use vortex_array::expr::{root, variant_get_as}; +/// let expr = variant_get_as( +/// "field_name", +/// DType::Primitive(PType::I64, Nullability::NonNullable), +/// root(), +/// ); +/// ``` +pub fn variant_get_as( + path: impl Into, + as_dtype: DType, + child: Expression, +) -> Expression { + VariantGet.new_expr( + VariantGetOptions::new(path).with_as_dtype(Some(as_dtype)), + vec![child], + ) } diff --git a/vortex-array/src/scalar_fn/fns/variant_get.rs b/vortex-array/src/scalar_fn/fns/variant_get.rs deleted file mode 100644 index 304a7197cb7..00000000000 --- a/vortex-array/src/scalar_fn/fns/variant_get.rs +++ /dev/null @@ -1,145 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -use std::fmt::Formatter; -use std::sync::Arc; - -use prost::Message; -use vortex_error::VortexResult; -use vortex_error::vortex_bail; -use vortex_proto::expr as pb; -use vortex_session::VortexSession; - -use crate::ArrayRef; -use crate::ExecutionCtx; -use crate::arrays::Variant; -use crate::arrays::variant::VariantArrayExt; -use crate::dtype::DType; -use crate::dtype::FieldName; -use crate::dtype::Nullability; -use crate::expr::Expression; -use crate::scalar_fn::Arity; -use crate::scalar_fn::ChildName; -use crate::scalar_fn::ExecutionArgs; -use crate::scalar_fn::ReduceCtx; -use crate::scalar_fn::ReduceNode; -use crate::scalar_fn::ReduceNodeRef; -use crate::scalar_fn::ScalarFnId; -use crate::scalar_fn::ScalarFnVTable; -use crate::scalar_fn::ScalarFnVTableExt; - -/// Extracts a field from a variant object by name, returning a new variant. -/// -/// This is analogous to [`GetItem`](super::get_item::GetItem) for structs, but operates on -/// semi-structured variant data. The result is always `DType::Variant(Nullable)` since the -/// requested field may not exist in every row. -/// -/// Execution is handled by variant encodings (e.g. `ParquetVariantArray`) via `execute_parent`. -/// The canonical `VariantArray` does not support direct execution; a `reduce` rule unwraps -/// the `VariantArray` wrapper to expose the underlying encoding. -#[derive(Clone)] -pub struct VariantGet; - -impl ScalarFnVTable for VariantGet { - type Options = FieldName; - - fn id(&self) -> ScalarFnId { - ScalarFnId::from("vortex.variant_get") - } - - fn serialize(&self, instance: &Self::Options) -> VortexResult>> { - Ok(Some( - pb::VariantGetOpts { - path: instance.to_string(), - } - .encode_to_vec(), - )) - } - - fn deserialize( - &self, - metadata: &[u8], - _session: &VortexSession, - ) -> VortexResult { - let opts = pb::VariantGetOpts::decode(metadata)?; - Ok(FieldName::from(opts.path)) - } - - fn arity(&self, _field_name: &FieldName) -> Arity { - Arity::Exact(1) - } - - fn child_name(&self, _instance: &Self::Options, child_idx: usize) -> ChildName { - match child_idx { - 0 => ChildName::from("input"), - _ => unreachable!( - "Invalid child index {} for VariantGet expression", - child_idx - ), - } - } - - fn fmt_sql( - &self, - field_name: &FieldName, - expr: &Expression, - f: &mut Formatter<'_>, - ) -> std::fmt::Result { - write!(f, "variant_get(")?; - expr.children()[0].fmt_sql(f)?; - write!(f, ", '{}')", field_name) - } - - fn return_dtype(&self, _field_name: &FieldName, arg_dtypes: &[DType]) -> VortexResult { - if !matches!(arg_dtypes[0], DType::Variant(_)) { - vortex_bail!( - "variant_get requires a Variant input, got {:?}", - arg_dtypes[0] - ); - } - // Always nullable: the field may not exist in every variant value. - Ok(DType::Variant(Nullability::Nullable)) - } - - fn execute( - &self, - _field_name: &FieldName, - _args: &dyn ExecutionArgs, - _ctx: &mut ExecutionCtx, - ) -> VortexResult { - vortex_bail!( - "variant_get cannot be executed directly; \ - it must be pushed down to a variant encoding via execute_parent" - ) - } - - fn reduce( - &self, - field_name: &FieldName, - node: &dyn ReduceNode, - ctx: &dyn ReduceCtx, - ) -> VortexResult> { - // If the child is a canonical VariantArray wrapper, unwrap it to expose the - // underlying encoding (e.g. ParquetVariantArray) so that execute_parent can - // handle the operation. - let child = node.child(0); - if let Some(child_array) = child.as_any().downcast_ref::() - && child_array.is::() - { - let inner = child_array.as_::().child().clone(); - return Ok(Some(ctx.new_node( - VariantGet.bind(field_name.clone()), - &[Arc::new(inner) as ReduceNodeRef], - )?)); - } - Ok(None) - } - - fn is_null_sensitive(&self, _field_name: &FieldName) -> bool { - true - } - - fn is_fallible(&self, _field_name: &FieldName) -> bool { - false - } -} diff --git a/vortex-array/src/scalar_fn/fns/variant_get/mod.rs b/vortex-array/src/scalar_fn/fns/variant_get/mod.rs new file mode 100644 index 00000000000..de4a6226ff8 --- /dev/null +++ b/vortex-array/src/scalar_fn/fns/variant_get/mod.rs @@ -0,0 +1,287 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::fmt::Display; +use std::fmt::Formatter; +use std::sync::Arc; + +use prost::Message; +use vortex_error::VortexResult; +use vortex_error::vortex_bail; +use vortex_error::vortex_err; +use vortex_proto::expr as pb; +use vortex_session::VortexSession; + +use crate::ArrayRef; +use crate::ExecutionCtx; +use crate::arrays::Variant; +use crate::arrays::variant::VariantArrayExt; +use crate::dtype::DType; +use crate::dtype::Nullability; +use crate::expr::Expression; +use crate::scalar_fn::Arity; +use crate::scalar_fn::ChildName; +use crate::scalar_fn::ExecutionArgs; +use crate::scalar_fn::ReduceCtx; +use crate::scalar_fn::ReduceNode; +use crate::scalar_fn::ReduceNodeRef; +use crate::scalar_fn::ScalarFnId; +use crate::scalar_fn::ScalarFnVTable; +use crate::scalar_fn::ScalarFnVTableExt; + +mod path; + +pub use path::VariantPath; +pub use path::VariantPathElement; + +/// Extracts a nested path from a variant value, returning a new variant. +/// +/// This is analogous to [`GetItem`](super::get_item::GetItem) for structs, but operates on +/// semi-structured variant data. The result is always `DType::Variant(Nullable)` since the +/// requested path may not exist in every row. +/// +/// Execution is handled by variant encodings (e.g. `ParquetVariantArray`) via `execute_parent`. +/// The canonical `VariantArray` does not support direct execution; a `reduce` rule unwraps +/// the `VariantArray` wrapper to expose the underlying encoding. +#[derive(Clone)] +pub struct VariantGet; + +/// Options for [`VariantGet`]. +/// +/// `path` selects the nested variant value to extract. `as_dtype`, when set, asks the encoding +/// to materialize the result directly as that logical type instead of another variant. +#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)] +pub struct VariantGetOptions { + path: VariantPath, + as_dtype: Option, +} + +impl VariantGetOptions { + /// Creates options for extracting the given path as another variant value. + pub fn new(path: impl Into) -> Self { + Self { + path: path.into(), + as_dtype: None, + } + } + + /// Returns the requested path. + pub fn path(&self) -> &VariantPath { + &self.path + } + + /// Returns the requested output type, if any. + pub fn as_dtype(&self) -> Option<&DType> { + self.as_dtype.as_ref() + } + + /// Returns new options that request direct materialization as `as_dtype`. + pub fn with_as_dtype(mut self, as_dtype: Option) -> Self { + self.as_dtype = as_dtype; + self + } + + /// Returns the logical output dtype for this expression. + pub fn return_dtype(&self) -> DType { + match self.effective_as_dtype() { + Some(dtype) => dtype.as_nullable(), + None => DType::Variant(Nullability::Nullable), + } + } + + /// Returns the dtype to materialize directly, if it differs from the default variant output. + pub fn effective_as_dtype(&self) -> Option<&DType> { + self.as_dtype + .as_ref() + .filter(|dtype| !matches!(dtype, DType::Variant(_))) + } +} + +impl Display for VariantGetOptions { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.path)?; + if let Some(as_dtype) = &self.as_dtype { + write!(f, " as {as_dtype}")?; + } + Ok(()) + } +} + +impl> From for VariantGetOptions { + fn from(value: T) -> Self { + Self::new(value) + } +} + +impl ScalarFnVTable for VariantGet { + type Options = VariantGetOptions; + + fn id(&self) -> ScalarFnId { + ScalarFnId::from("vortex.variant_get") + } + + fn serialize(&self, instance: &Self::Options) -> VortexResult>> { + let path = instance + .path() + .iter() + .cloned() + .map(|element| match element { + VariantPathElement::Field(name) => Ok(pb::VariantPathElement { + path_element: Some(pb::variant_path_element::PathElement::Field( + name.to_string(), + )), + }), + VariantPathElement::Index(index) => Ok(pb::VariantPathElement { + path_element: Some(pb::variant_path_element::PathElement::Index( + index.try_into().map_err(|_| { + vortex_err!("variant path index {index} does not fit in u32") + })?, + )), + }), + }) + .collect::>>()?; + let as_dtype = instance.as_dtype().map(TryInto::try_into).transpose()?; + Ok(Some(pb::VariantGetOpts { path, as_dtype }.encode_to_vec())) + } + + fn deserialize(&self, metadata: &[u8], session: &VortexSession) -> VortexResult { + let opts = pb::VariantGetOpts::decode(metadata)?; + let path = opts + .path + .into_iter() + .map(|element| match element.path_element { + Some(pb::variant_path_element::PathElement::Field(name)) => { + Ok(VariantPathElement::Field(name.into())) + } + Some(pb::variant_path_element::PathElement::Index(index)) => { + Ok(VariantPathElement::Index(index as usize)) + } + None => vortex_bail!("variant_get path element must be set"), + }) + .collect::>()?; + let as_dtype = opts + .as_dtype + .as_ref() + .map(|dtype| DType::from_proto(dtype, session)) + .transpose()?; + + Ok(VariantGetOptions { path, as_dtype }) + } + + fn arity(&self, _options: &VariantGetOptions) -> Arity { + Arity::Exact(1) + } + + fn child_name(&self, _options: &Self::Options, child_idx: usize) -> ChildName { + match child_idx { + 0 => ChildName::from("input"), + _ => unreachable!( + "Invalid child index {} for VariantGet expression", + child_idx + ), + } + } + + fn fmt_sql( + &self, + options: &VariantGetOptions, + expr: &Expression, + f: &mut Formatter<'_>, + ) -> std::fmt::Result { + write!(f, "variant_get(")?; + expr.children()[0].fmt_sql(f)?; + write!(f, ", '{}'", options.path())?; + if let Some(as_dtype) = options.as_dtype() { + write!(f, ", {as_dtype}")?; + } + write!(f, ")") + } + + fn return_dtype( + &self, + options: &VariantGetOptions, + arg_dtypes: &[DType], + ) -> VortexResult { + if !matches!(arg_dtypes[0], DType::Variant(_)) { + vortex_bail!( + "variant_get requires a Variant input, got {:?}", + arg_dtypes[0] + ); + } + Ok(options.return_dtype()) + } + + fn execute( + &self, + _options: &VariantGetOptions, + _args: &dyn ExecutionArgs, + _ctx: &mut ExecutionCtx, + ) -> VortexResult { + vortex_bail!( + "variant_get cannot be executed directly; \ + it must be pushed down to a variant encoding via execute_parent" + ) + } + + fn reduce( + &self, + options: &VariantGetOptions, + node: &dyn ReduceNode, + ctx: &dyn ReduceCtx, + ) -> VortexResult> { + // If the child is a canonical VariantArray wrapper, unwrap it to expose the + // underlying encoding (e.g. ParquetVariantArray) so that execute_parent can + // handle the operation. + let child = node.child(0); + if let Some(child_array) = child.as_any().downcast_ref::() + && child_array.is::() + { + let inner = child_array.as_::().child().clone(); + return Ok(Some(ctx.new_node( + VariantGet.bind(options.clone()), + &[Arc::new(inner) as ReduceNodeRef], + )?)); + } + Ok(None) + } + + fn is_null_sensitive(&self, _options: &VariantGetOptions) -> bool { + true + } + + fn is_fallible(&self, _options: &VariantGetOptions) -> bool { + false + } +} + +#[cfg(test)] +mod tests { + use vortex_session::VortexSession; + + use super::VariantGet; + use super::VariantGetOptions; + use super::VariantPath; + use crate::dtype::DType; + use crate::dtype::Nullability; + use crate::dtype::PType; + use crate::scalar_fn::ScalarFnVTable; + + #[test] + fn variant_get_path_proto_round_trip() { + let options = + VariantGetOptions::new(VariantPath::from_name("outer").join(1usize).join("inner")) + .with_as_dtype(Some(DType::Primitive(PType::I64, Nullability::NonNullable))); + + let metadata = VariantGet.serialize(&options).unwrap().unwrap(); + let decoded = VariantGet + .deserialize(&metadata, &VortexSession::empty()) + .unwrap(); + + assert_eq!(decoded, options); + assert_eq!(decoded.to_string(), "outer[1].inner as i64"); + assert_eq!( + decoded.return_dtype(), + DType::Primitive(PType::I64, Nullability::Nullable) + ); + } +} diff --git a/vortex-array/src/scalar_fn/fns/variant_get/path.rs b/vortex-array/src/scalar_fn/fns/variant_get/path.rs new file mode 100644 index 00000000000..b9e476db1c2 --- /dev/null +++ b/vortex-array/src/scalar_fn/fns/variant_get/path.rs @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use crate::dtype::FieldName; + +/// A path within a variant value. +/// +/// Each path element addresses either an object field or a list index. +#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)] +pub struct VariantPath(Vec); + +impl VariantPath { + /// Creates a path from a sequence of path elements. + pub fn new(path: Vec) -> Self { + Self(path) + } + + /// Creates a path that addresses a single object field. + pub fn from_name(name: impl Into) -> Self { + Self(vec![VariantPathElement::Field(name.into())]) + } + + /// Creates a path that addresses a single list index. + pub fn from_index(index: usize) -> Self { + Self(vec![VariantPathElement::Index(index)]) + } + + /// Returns a new path with an additional element appended. + pub fn join(mut self, element: impl Into) -> Self { + self.push(element); + self + } + + /// Appends an element to the end of the path. + pub fn push(&mut self, element: impl Into) { + self.0.push(element.into()); + } + + pub fn iter(&self) -> impl Iterator + '_ { + self.0.iter() + } +} + +impl From for VariantPath +where + F: Into, +{ + fn from(value: F) -> Self { + Self::from_name(value) + } +} + +impl From for VariantPath { + fn from(value: usize) -> Self { + Self::from_index(value) + } +} + +impl std::fmt::Display for VariantPath { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + for (idx, element) in self.0.iter().enumerate() { + match element { + VariantPathElement::Field(name) if idx == 0 => write!(f, "{name}")?, + VariantPathElement::Field(name) => write!(f, ".{name}")?, + VariantPathElement::Index(index) => write!(f, "[{index}]")?, + } + } + + Ok(()) + } +} + +impl FromIterator for VariantPath { + fn from_iter>(iter: T) -> Self { + Self::new(Vec::from_iter(iter)) + } +} + +/// A single step within a [`VariantPath`]. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub enum VariantPathElement { + /// Select a named field from an object-like variant value. + Field(FieldName), + /// Select an element from a list-like variant value. + Index(usize), +} + +impl From for VariantPathElement +where + F: Into, +{ + fn from(value: F) -> Self { + Self::Field(value.into()) + } +} + +impl From for VariantPathElement { + fn from(value: usize) -> Self { + Self::Index(value) + } +} + +impl VariantPathElement { + /// Creates a field path element. + pub fn field(name: impl Into) -> Self { + Self::Field(name.into()) + } + + /// Creates an index path element. + pub fn index(index: usize) -> Self { + Self::Index(index) + } +} + +impl std::fmt::Display for VariantPathElement { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Field(name) => write!(f, "{name}"), + Self::Index(index) => write!(f, "[{index}]"), + } + } +} diff --git a/vortex-proto/proto/expr.proto b/vortex-proto/proto/expr.proto index 3c062dffb79..cbf125d0337 100644 --- a/vortex-proto/proto/expr.proto +++ b/vortex-proto/proto/expr.proto @@ -89,7 +89,15 @@ message SelectOpts { // Options for `vortex.variant_get` message VariantGetOpts { - string path = 1; + repeated VariantPathElement path = 1; + optional vortex.dtype.DType as_dtype = 2; +} + +message VariantPathElement { + oneof path_element { + string field = 1; + uint32 index = 2; + } } // Options for `vortex.case_when` diff --git a/vortex-proto/public-api.lock b/vortex-proto/public-api.lock index d7ea01cb7bb..1deeb74ee64 100644 --- a/vortex-proto/public-api.lock +++ b/vortex-proto/public-api.lock @@ -768,6 +768,42 @@ pub fn vortex_proto::expr::select_opts::Opts::hash<__H: core::hash::Hasher>(&sel impl core::marker::StructuralPartialEq for vortex_proto::expr::select_opts::Opts +pub mod vortex_proto::expr::variant_path_element + +pub enum vortex_proto::expr::variant_path_element::PathElement + +pub vortex_proto::expr::variant_path_element::PathElement::Field(alloc::string::String) + +pub vortex_proto::expr::variant_path_element::PathElement::Index(u32) + +impl vortex_proto::expr::variant_path_element::PathElement + +pub fn vortex_proto::expr::variant_path_element::PathElement::encode(&self, buf: &mut impl bytes::buf::buf_mut::BufMut) + +pub fn vortex_proto::expr::variant_path_element::PathElement::encoded_len(&self) -> usize + +pub fn vortex_proto::expr::variant_path_element::PathElement::merge(field: &mut core::option::Option, tag: u32, wire_type: prost::encoding::wire_type::WireType, buf: &mut impl bytes::buf::buf_impl::Buf, ctx: prost::encoding::DecodeContext) -> core::result::Result<(), prost::error::DecodeError> + +impl core::clone::Clone for vortex_proto::expr::variant_path_element::PathElement + +pub fn vortex_proto::expr::variant_path_element::PathElement::clone(&self) -> vortex_proto::expr::variant_path_element::PathElement + +impl core::cmp::Eq for vortex_proto::expr::variant_path_element::PathElement + +impl core::cmp::PartialEq for vortex_proto::expr::variant_path_element::PathElement + +pub fn vortex_proto::expr::variant_path_element::PathElement::eq(&self, other: &vortex_proto::expr::variant_path_element::PathElement) -> bool + +impl core::fmt::Debug for vortex_proto::expr::variant_path_element::PathElement + +pub fn vortex_proto::expr::variant_path_element::PathElement::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::hash::Hash for vortex_proto::expr::variant_path_element::PathElement + +pub fn vortex_proto::expr::variant_path_element::PathElement::hash<__H: core::hash::Hasher>(&self, state: &mut __H) + +impl core::marker::StructuralPartialEq for vortex_proto::expr::variant_path_element::PathElement + pub struct vortex_proto::expr::AggregateFn pub vortex_proto::expr::AggregateFn::id: alloc::string::String @@ -1194,14 +1230,14 @@ pub fn vortex_proto::expr::SelectOpts::encoded_len(&self) -> usize pub struct vortex_proto::expr::VariantGetOpts -pub vortex_proto::expr::VariantGetOpts::path: alloc::string::String +pub vortex_proto::expr::VariantGetOpts::as_dtype: core::option::Option + +pub vortex_proto::expr::VariantGetOpts::path: alloc::vec::Vec impl core::clone::Clone for vortex_proto::expr::VariantGetOpts pub fn vortex_proto::expr::VariantGetOpts::clone(&self) -> vortex_proto::expr::VariantGetOpts -impl core::cmp::Eq for vortex_proto::expr::VariantGetOpts - impl core::cmp::PartialEq for vortex_proto::expr::VariantGetOpts pub fn vortex_proto::expr::VariantGetOpts::eq(&self, other: &vortex_proto::expr::VariantGetOpts) -> bool @@ -1214,10 +1250,6 @@ impl core::fmt::Debug for vortex_proto::expr::VariantGetOpts pub fn vortex_proto::expr::VariantGetOpts::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result -impl core::hash::Hash for vortex_proto::expr::VariantGetOpts - -pub fn vortex_proto::expr::VariantGetOpts::hash<__H: core::hash::Hasher>(&self, state: &mut __H) - impl core::marker::StructuralPartialEq for vortex_proto::expr::VariantGetOpts impl prost::message::Message for vortex_proto::expr::VariantGetOpts @@ -1226,6 +1258,40 @@ pub fn vortex_proto::expr::VariantGetOpts::clear(&mut self) pub fn vortex_proto::expr::VariantGetOpts::encoded_len(&self) -> usize +pub struct vortex_proto::expr::VariantPathElement + +pub vortex_proto::expr::VariantPathElement::path_element: core::option::Option + +impl core::clone::Clone for vortex_proto::expr::VariantPathElement + +pub fn vortex_proto::expr::VariantPathElement::clone(&self) -> vortex_proto::expr::VariantPathElement + +impl core::cmp::Eq for vortex_proto::expr::VariantPathElement + +impl core::cmp::PartialEq for vortex_proto::expr::VariantPathElement + +pub fn vortex_proto::expr::VariantPathElement::eq(&self, other: &vortex_proto::expr::VariantPathElement) -> bool + +impl core::default::Default for vortex_proto::expr::VariantPathElement + +pub fn vortex_proto::expr::VariantPathElement::default() -> Self + +impl core::fmt::Debug for vortex_proto::expr::VariantPathElement + +pub fn vortex_proto::expr::VariantPathElement::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::hash::Hash for vortex_proto::expr::VariantPathElement + +pub fn vortex_proto::expr::VariantPathElement::hash<__H: core::hash::Hasher>(&self, state: &mut __H) + +impl core::marker::StructuralPartialEq for vortex_proto::expr::VariantPathElement + +impl prost::message::Message for vortex_proto::expr::VariantPathElement + +pub fn vortex_proto::expr::VariantPathElement::clear(&mut self) + +pub fn vortex_proto::expr::VariantPathElement::encoded_len(&self) -> usize + pub mod vortex_proto::scalar pub mod vortex_proto::scalar::scalar_value diff --git a/vortex-proto/src/generated/vortex.expr.rs b/vortex-proto/src/generated/vortex.expr.rs index 607dc848d8a..f36d83201f9 100644 --- a/vortex-proto/src/generated/vortex.expr.rs +++ b/vortex-proto/src/generated/vortex.expr.rs @@ -154,10 +154,27 @@ pub mod select_opts { } } /// Options for `vortex.variant_get` -#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +#[derive(Clone, PartialEq, ::prost::Message)] pub struct VariantGetOpts { - #[prost(string, tag = "1")] - pub path: ::prost::alloc::string::String, + #[prost(message, repeated, tag = "1")] + pub path: ::prost::alloc::vec::Vec, + #[prost(message, optional, tag = "2")] + pub as_dtype: ::core::option::Option, +} +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct VariantPathElement { + #[prost(oneof = "variant_path_element::PathElement", tags = "1, 2")] + pub path_element: ::core::option::Option, +} +/// Nested message and enum types in `VariantPathElement`. +pub mod variant_path_element { + #[derive(Clone, PartialEq, Eq, Hash, ::prost::Oneof)] + pub enum PathElement { + #[prost(string, tag = "1")] + Field(::prost::alloc::string::String), + #[prost(uint32, tag = "2")] + Index(u32), + } } /// Options for `vortex.case_when` /// Encodes num_when_then_pairs and has_else into a single u32 (num_children). From abaede8f92c3dbd1e3b0559985d5406f8d106178 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Tue, 14 Apr 2026 19:29:55 +0100 Subject: [PATCH 5/6] more Signed-off-by: Adam Gutglick --- .../parquet-variant/src/variant_get/mod.rs | 18 +- .../parquet-variant/src/variant_get/tests.rs | 172 +++++++++++------- vortex-array/public-api.lock | 4 +- vortex-array/src/arrays/filter/mod.rs | 1 + .../src/arrays/filter/parent_kernels.rs | 69 +++++++ vortex-array/src/arrays/filter/vtable.rs | 10 + vortex-array/src/arrays/slice/mod.rs | 1 + .../src/arrays/slice/parent_kernels.rs | 71 ++++++++ vortex-array/src/arrays/slice/vtable.rs | 10 + .../src/scalar_fn/fns/variant_get/mod.rs | 19 +- 10 files changed, 298 insertions(+), 77 deletions(-) create mode 100644 vortex-array/src/arrays/filter/parent_kernels.rs create mode 100644 vortex-array/src/arrays/slice/parent_kernels.rs diff --git a/encodings/parquet-variant/src/variant_get/mod.rs b/encodings/parquet-variant/src/variant_get/mod.rs index 98278250894..264fa0c803e 100644 --- a/encodings/parquet-variant/src/variant_get/mod.rs +++ b/encodings/parquet-variant/src/variant_get/mod.rs @@ -101,6 +101,20 @@ fn variant_get_impl( .ok_or_else(|| vortex_err!("variant_get did not return a StructArray"))?, ) .map_err(|e| vortex_err!("failed to create VariantArray from result: {e}"))?; + let value_nullable = result_variant + .inner() + .fields() + .iter() + .find(|field| field.name() == "value") + .map(|field| field.is_nullable()) + .unwrap_or(false); + let typed_value_nullable = result_variant + .inner() + .fields() + .iter() + .find(|field| field.name() == "typed_value") + .map(|field| field.is_nullable()) + .unwrap_or(false); // Ensure the result is always nullable (matching variant_get's return_dtype). // Arrow may return a non-nullable result when no nulls are present. @@ -121,11 +135,11 @@ fn variant_get_impl( )?; let value = result_variant .value_field() - .map(|v| ArrayRef::from_arrow(v as &dyn arrow_array::Array, true)) + .map(|v| ArrayRef::from_arrow(v as &dyn arrow_array::Array, value_nullable)) .transpose()?; let typed_value = result_variant .typed_value_field() - .map(|tv| ArrayRef::from_arrow(tv.as_ref(), true)) + .map(|tv| ArrayRef::from_arrow(tv.as_ref(), typed_value_nullable)) .transpose()?; let pv = ParquetVariant::try_new(validity, metadata, value, typed_value)?; diff --git a/encodings/parquet-variant/src/variant_get/tests.rs b/encodings/parquet-variant/src/variant_get/tests.rs index b547a3b645f..1a8fbf9ed45 100644 --- a/encodings/parquet-variant/src/variant_get/tests.rs +++ b/encodings/parquet-variant/src/variant_get/tests.rs @@ -19,6 +19,7 @@ use parquet_variant_compute::GetOptions; use parquet_variant_compute::VariantArray as ArrowVariantArray; use parquet_variant_compute::VariantArrayBuilder; use parquet_variant_compute::json_to_variant; +use rstest::fixture; use rstest::rstest; use vortex_array::ArrayRef; use vortex_array::LEGACY_SESSION; @@ -34,29 +35,29 @@ use vortex_array::expr::variant_get; use vortex_array::expr::variant_get_as; use vortex_array::scalar_fn::fns::variant_get::VariantPath as VortexVariantPath; use vortex_error::VortexResult; +use vortex_mask::Mask; use crate::ParquetVariant; use crate::ParquetVariantArrayExt; use crate::ParquetVariantData; -/// Apply variant_get and execute through the full pipeline (including execute_parent). -fn apply_variant_get(arr: &ArrayRef, path: impl Into) -> VortexResult { - let expr = variant_get(path, root()); - let array = arr.clone().apply(&expr)?; - let mut ctx = LEGACY_SESSION.create_execution_ctx(); - array.execute::(&mut ctx) -} - -/// Apply typed variant_get and execute through the full pipeline. -fn apply_variant_get_as( - arr: &ArrayRef, - path: impl Into, - as_dtype: DType, -) -> VortexResult { - let expr = variant_get_as(path, as_dtype, root()); - let array = arr.clone().apply(&expr)?; - let mut ctx = LEGACY_SESSION.create_execution_ctx(); - array.execute::(&mut ctx) +macro_rules! apply_variant_get { + ($arr:expr, $path:expr) => {{ + (|| -> VortexResult { + let expr = variant_get($path, root()); + let array = $arr.clone().apply(&expr)?; + let mut ctx = LEGACY_SESSION.create_execution_ctx(); + array.execute::(&mut ctx) + })() + }}; + ($arr:expr, $path:expr, $as_dtype:expr) => {{ + (|| -> VortexResult { + let expr = variant_get_as($path, $as_dtype, root()); + let array = $arr.clone().apply(&expr)?; + let mut ctx = LEGACY_SESSION.create_execution_ctx(); + array.execute::(&mut ctx) + })() + }}; } /// Convert a Vortex result back to an Arrow VariantArray for comparison. @@ -67,6 +68,31 @@ fn vortex_to_arrow_variant(arr: &ArrayRef) -> ArrowVariantArray { pv.to_arrow(&mut ctx).unwrap() } +fn assert_variant_storage_matches(expected: &ArrowVariantArray, actual: &ArrowVariantArray) { + assert_eq!(actual.len(), expected.len(), "length mismatch"); + assert_eq!( + actual.inner().column_names(), + expected.inner().column_names(), + "column mismatch" + ); + assert_eq!(actual.inner().nulls(), expected.inner().nulls()); + assert_eq!( + actual.inner().fields().len(), + expected.inner().fields().len() + ); + + for (expected, actual) in expected + .inner() + .fields() + .iter() + .zip(actual.inner().fields().iter()) + { + assert_eq!(actual.name(), expected.name()); + assert_eq!(actual.data_type(), expected.data_type()); + assert_eq!(actual.is_nullable(), expected.is_nullable()); + } +} + /// Run variant_get through both Arrow and Vortex on the same input, and assert /// the per-row results (value + validity) are identical by comparing at the Arrow level. fn assert_matches_arrow(json_rows: &[&str], field: &str) { @@ -85,14 +111,10 @@ fn assert_matches_arrow(json_rows: &[&str], field: &str) { .unwrap(); let vortex_input = ParquetVariantData::from_arrow_variant(&arrow_variant).unwrap(); - let vortex_result = apply_variant_get(&vortex_input, field).unwrap(); + let vortex_result = apply_variant_get!(&vortex_input, field).unwrap(); let vortex_as_arrow = vortex_to_arrow_variant(&vortex_result); - assert_eq!( - vortex_as_arrow.len(), - arrow_result_variant.len(), - "length mismatch" - ); + assert_variant_storage_matches(&arrow_result_variant, &vortex_as_arrow); for i in 0..arrow_result_variant.len() { let arrow_is_null = arrow_result_variant.is_null(i); @@ -135,14 +157,10 @@ fn assert_matches_arrow_with_path( .unwrap(); let vortex_input = ParquetVariantData::from_arrow_variant(&arrow_variant).unwrap(); - let vortex_result = apply_variant_get(&vortex_input, path).unwrap(); + let vortex_result = apply_variant_get!(&vortex_input, path).unwrap(); let vortex_as_arrow = vortex_to_arrow_variant(&vortex_result); - assert_eq!( - vortex_as_arrow.len(), - arrow_result_variant.len(), - "length mismatch" - ); + assert_variant_storage_matches(&arrow_result_variant, &vortex_as_arrow); for i in 0..arrow_result_variant.len() { let arrow_is_null = arrow_result_variant.is_null(i); @@ -188,7 +206,7 @@ fn assert_typed_matches_arrow_with_path( let expected = ArrayRef::from_arrow(arrow_result.as_ref(), true)?; let vortex_input = ParquetVariantData::from_arrow_variant(&arrow_variant).unwrap(); - let vortex_result = apply_variant_get_as(&vortex_input, path, as_dtype.clone())?; + let vortex_result = apply_variant_get!(&vortex_input, path, as_dtype.clone())?; assert_eq!(vortex_result.dtype(), &as_dtype.as_nullable()); assert_arrays_eq!(vortex_result, expected); @@ -222,14 +240,10 @@ fn assert_matches_arrow_nullable(json_rows: &[&str], validity: &[bool], field: & .unwrap(); let vortex_input = ParquetVariantData::from_arrow_variant(&arrow_variant).unwrap(); - let vortex_result = apply_variant_get(&vortex_input, field).unwrap(); + let vortex_result = apply_variant_get!(&vortex_input, field).unwrap(); let vortex_as_arrow = vortex_to_arrow_variant(&vortex_result); - assert_eq!( - vortex_as_arrow.len(), - arrow_result_variant.len(), - "length mismatch" - ); + assert_variant_storage_matches(&arrow_result_variant, &vortex_as_arrow); for i in 0..arrow_result_variant.len() { let arrow_is_null = arrow_result_variant.is_null(i); @@ -373,14 +387,9 @@ fn test_variant_get_matches_arrow_typed_path() -> VortexResult<()> { ) } -// --------------------------------------------------------------------------- -// Original standalone tests -// --------------------------------------------------------------------------- - -#[test] -fn test_variant_get_basic() -> VortexResult<()> { - let arr = make_object_array()?; - let result = apply_variant_get(&arr, "a")?; +#[rstest] +fn test_variant_get_basic(object_array: ArrayRef) -> VortexResult<()> { + let result = apply_variant_get!(&object_array, "a")?; assert_eq!(result.len(), 3); @@ -403,10 +412,9 @@ fn test_variant_get_basic() -> VortexResult<()> { Ok(()) } -#[test] -fn test_variant_get_missing_field() -> VortexResult<()> { - let arr = make_object_array()?; - let result = apply_variant_get(&arr, "nonexistent")?; +#[rstest] +fn test_variant_get_missing_field(object_array: ArrayRef) -> VortexResult<()> { + let result = apply_variant_get!(&object_array, "nonexistent")?; assert_eq!(result.len(), 3); for i in 0..3 { @@ -416,10 +424,9 @@ fn test_variant_get_missing_field() -> VortexResult<()> { Ok(()) } -#[test] -fn test_variant_get_null_input() -> VortexResult<()> { - let arr = make_nullable_object_array()?; - let result = apply_variant_get(&arr, "a")?; +#[rstest] +fn test_variant_get_null_input(nullable_object_array: ArrayRef) -> VortexResult<()> { + let result = apply_variant_get!(&nullable_object_array, "a")?; assert_eq!(result.len(), 3); assert!(!result.scalar_at(0)?.is_null()); @@ -436,7 +443,7 @@ fn test_variant_get_non_object() -> VortexResult<()> { builder.append_variant(PqVariant::from("hello")); let arr = ParquetVariantData::from_arrow_variant(&builder.build())?; - let result = apply_variant_get(&arr, "a")?; + let result = apply_variant_get!(&arr, "a")?; assert_eq!(result.len(), 2); assert!(result.scalar_at(0)?.is_null()); @@ -445,10 +452,9 @@ fn test_variant_get_non_object() -> VortexResult<()> { Ok(()) } -#[test] -fn test_variant_get_different_field() -> VortexResult<()> { - let arr = make_object_array()?; - let result = apply_variant_get(&arr, "b")?; +#[rstest] +fn test_variant_get_different_field(object_array: ArrayRef) -> VortexResult<()> { + let result = apply_variant_get!(&object_array, "b")?; assert_eq!(result.len(), 3); assert!(!result.scalar_at(0)?.is_null()); @@ -458,12 +464,49 @@ fn test_variant_get_different_field() -> VortexResult<()> { Ok(()) } +#[rstest] +fn test_variant_get_through_slice_wrapper(object_array: ArrayRef) -> VortexResult<()> { + let mut ctx = LEGACY_SESSION.create_execution_ctx(); + + let expr = variant_get("a", root()); + let actual = object_array + .slice(1..3)? + .apply(&expr)? + .execute::(&mut ctx)?; + + let expected = apply_variant_get!(&object_array, "a")?; + + assert_eq!(actual.len(), 2); + assert_eq!(actual.scalar_at(0)?, expected.scalar_at(1)?); + assert_eq!(actual.scalar_at(1)?, expected.scalar_at(2)?); + Ok(()) +} + +#[rstest] +fn test_variant_get_through_filter_wrapper(object_array: ArrayRef) -> VortexResult<()> { + let mask = Mask::from_iter([true, false, true]); + + let expr = variant_get("a", root()); + let mut ctx = LEGACY_SESSION.create_execution_ctx(); + + let array = object_array.filter(mask.clone())?.apply(&expr)?; + let actual = array.execute::(&mut ctx)?; + let expected = apply_variant_get!(&object_array, "a")?; + + assert_eq!(mask.true_count(), 2); + assert_eq!(actual.len(), 2); + assert_eq!(actual.scalar_at(0)?, expected.scalar_at(0)?); + assert_eq!(actual.scalar_at(1)?, expected.scalar_at(2)?); + Ok(()) +} + // --------------------------------------------------------------------------- // Test data helpers // --------------------------------------------------------------------------- -/// Build a small non-null object variant array used by the standalone tests. -fn make_object_array() -> VortexResult { +/// Small non-null object variant array used by the standalone tests. +#[fixture] +fn object_array() -> ArrayRef { let mut builder = VariantArrayBuilder::new(3); builder @@ -480,11 +523,12 @@ fn make_object_array() -> VortexResult { builder.new_object().with_field("b", "y").finish(); - ParquetVariantData::from_arrow_variant(&builder.build()) + ParquetVariantData::from_arrow_variant(&builder.build()).unwrap() } -/// Build the same object array shape with an explicit top-level validity bitmap. -fn make_nullable_object_array() -> VortexResult { +/// The same object array shape with an explicit top-level validity bitmap. +#[fixture] +fn nullable_object_array() -> ArrayRef { let mut builder = VariantArrayBuilder::new(3); builder.new_object().with_field("a", 10i32).finish(); @@ -499,5 +543,5 @@ fn make_nullable_object_array() -> VortexResult { ) .unwrap(); let arrow_variant = ArrowVariantArray::try_new(&null_struct).unwrap(); - ParquetVariantData::from_arrow_variant(&arrow_variant) + ParquetVariantData::from_arrow_variant(&arrow_variant).unwrap() } diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index 3f3b9d76033..51fc6e29d33 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -17270,7 +17270,7 @@ pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::coerce_args(&self, pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::deserialize(&self, metadata: &[u8], session: &vortex_session::VortexSession) -> vortex_error::VortexResult -pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::execute(&self, _options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions, _args: &dyn vortex_array::scalar_fn::ExecutionArgs, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::execute(&self, options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions, args: &dyn vortex_array::scalar_fn::ExecutionArgs, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::fmt_sql(&self, options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions, expr: &vortex_array::expr::Expression, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result @@ -18620,7 +18620,7 @@ pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::coerce_args(&self, pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::deserialize(&self, metadata: &[u8], session: &vortex_session::VortexSession) -> vortex_error::VortexResult -pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::execute(&self, _options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions, _args: &dyn vortex_array::scalar_fn::ExecutionArgs, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult +pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::execute(&self, options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions, args: &dyn vortex_array::scalar_fn::ExecutionArgs, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::fmt_sql(&self, options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions, expr: &vortex_array::expr::Expression, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result diff --git a/vortex-array/src/arrays/filter/mod.rs b/vortex-array/src/arrays/filter/mod.rs index 39859bc5b25..7a199b39c77 100644 --- a/vortex-array/src/arrays/filter/mod.rs +++ b/vortex-array/src/arrays/filter/mod.rs @@ -10,6 +10,7 @@ pub use vtable::FilterArray; mod execute; mod kernel; +mod parent_kernels; pub use kernel::FilterExecuteAdaptor; pub use kernel::FilterKernel; pub use kernel::FilterReduce; diff --git a/vortex-array/src/arrays/filter/parent_kernels.rs b/vortex-array/src/arrays/filter/parent_kernels.rs new file mode 100644 index 00000000000..507c8f51919 --- /dev/null +++ b/vortex-array/src/arrays/filter/parent_kernels.rs @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Parent kernels for wrapper-side execution over [`Filter`]. +//! +//! [`VariantGet`](crate::scalar_fn::fns::variant_get::VariantGet) relies on the underlying +//! variant encoding to perform the real extraction work, so it cannot execute directly once a +//! `Filter` wrapper sits between the expression and that encoding. +//! +//! This module handles that pass-through at the wrapper layer by filtering the wrapped variant +//! child first and then re-dispatching `VariantGet` on the filtered child. + +use vortex_error::VortexResult; + +use crate::ArrayRef; +use crate::ExecutionCtx; +use crate::array::ArrayView; +use crate::arrays::Filter; +use crate::arrays::Variant; +use crate::arrays::filter::FilterArrayExt; +use crate::arrays::scalar_fn::ExactScalarFn; +use crate::arrays::scalar_fn::ScalarFnArrayView; +use crate::arrays::scalar_fn::ScalarFnFactoryExt; +use crate::arrays::variant::VariantArrayExt; +use crate::dtype::DType; +use crate::kernel::ExecuteParentKernel; +use crate::kernel::ParentKernelSet; +use crate::scalar_fn::fns::variant_get::VariantGet; + +pub(super) static PARENT_KERNELS: ParentKernelSet = + ParentKernelSet::new(&[ParentKernelSet::lift(&FilterVariantGetExecuteParent)]); + +/// Pass `variant_get` through a `Filter` wrapper to the underlying variant child. +#[derive(Debug)] +struct FilterVariantGetExecuteParent; + +impl ExecuteParentKernel for FilterVariantGetExecuteParent { + type Parent = ExactScalarFn; + + fn execute_parent( + &self, + array: ArrayView<'_, Filter>, + parent: ScalarFnArrayView<'_, VariantGet>, + child_idx: usize, + ctx: &mut ExecutionCtx, + ) -> VortexResult> { + if child_idx != 0 { + return Ok(None); + } + + let child = array.child(); + let inner = if let Some(variant) = child.as_opt::() { + variant.child().clone() + } else if matches!(child.dtype(), DType::Variant(_)) { + child.clone() + } else { + return Ok(None); + }; + + let filtered = inner + .filter(array.filter_mask().clone())? + .execute::(ctx)?; + + VariantGet + .try_new_array(parent.len(), parent.options.clone(), [filtered])? + .execute::(ctx) + .map(Some) + } +} diff --git a/vortex-array/src/arrays/filter/vtable.rs b/vortex-array/src/arrays/filter/vtable.rs index 05f49b5c941..e53d8914d7f 100644 --- a/vortex-array/src/arrays/filter/vtable.rs +++ b/vortex-array/src/arrays/filter/vtable.rs @@ -32,6 +32,7 @@ use crate::arrays::filter::array::FilterData; use crate::arrays::filter::array::SLOT_NAMES; use crate::arrays::filter::execute::execute_filter; use crate::arrays::filter::execute::execute_filter_fast_paths; +use crate::arrays::filter::parent_kernels::PARENT_KERNELS; use crate::arrays::filter::rules::PARENT_RULES; use crate::arrays::filter::rules::RULES; use crate::buffer::BufferHandle; @@ -170,6 +171,15 @@ impl VTable for Filter { PARENT_RULES.evaluate(array, parent, child_idx) } + fn execute_parent( + array: ArrayView<'_, Self>, + parent: &ArrayRef, + child_idx: usize, + ctx: &mut ExecutionCtx, + ) -> VortexResult> { + PARENT_KERNELS.execute(array, parent, child_idx, ctx) + } + fn reduce(array: ArrayView<'_, Self>) -> VortexResult> { RULES.evaluate(array) } diff --git a/vortex-array/src/arrays/slice/mod.rs b/vortex-array/src/arrays/slice/mod.rs index 8eec9d85310..c6aa44aa094 100644 --- a/vortex-array/src/arrays/slice/mod.rs +++ b/vortex-array/src/arrays/slice/mod.rs @@ -9,6 +9,7 @@ //! [`ArrayParentReduceRule`] and [`ExecuteParentKernel`] respectively. mod array; +mod parent_kernels; mod rules; mod slice_; mod vtable; diff --git a/vortex-array/src/arrays/slice/parent_kernels.rs b/vortex-array/src/arrays/slice/parent_kernels.rs new file mode 100644 index 00000000000..676bd385fac --- /dev/null +++ b/vortex-array/src/arrays/slice/parent_kernels.rs @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Parent kernels for wrapper-side execution over [`Slice`]. +//! +//! Most scalar functions can commute with `Slice` via generic scalar-fn rules, but +//! [`VariantGet`](crate::scalar_fn::fns::variant_get::VariantGet) is unusual: it has to reach the +//! underlying variant encoding's `execute_parent` kernel rather than executing directly. +//! +//! This module keeps that wrapper-specific pass-through on the wrapper side. When a +//! `VariantGet` parent sits directly above a `Slice`, we slice the wrapped variant child first and +//! then re-dispatch `VariantGet` on the sliced child so execution can continue at the underlying +//! variant encoding. + +use vortex_error::VortexResult; + +use crate::ArrayRef; +use crate::ExecutionCtx; +use crate::array::ArrayView; +use crate::arrays::Slice; +use crate::arrays::Variant; +use crate::arrays::scalar_fn::ExactScalarFn; +use crate::arrays::scalar_fn::ScalarFnArrayView; +use crate::arrays::scalar_fn::ScalarFnFactoryExt; +use crate::arrays::slice::SliceArrayExt; +use crate::arrays::variant::VariantArrayExt; +use crate::dtype::DType; +use crate::kernel::ExecuteParentKernel; +use crate::kernel::ParentKernelSet; +use crate::scalar_fn::fns::variant_get::VariantGet; + +pub(super) static PARENT_KERNELS: ParentKernelSet = + ParentKernelSet::new(&[ParentKernelSet::lift(&SliceVariantGetExecuteParent)]); + +/// Pass `variant_get` through a `Slice` wrapper to the underlying variant child. +#[derive(Debug)] +struct SliceVariantGetExecuteParent; + +impl ExecuteParentKernel for SliceVariantGetExecuteParent { + type Parent = ExactScalarFn; + + fn execute_parent( + &self, + array: ArrayView<'_, Slice>, + parent: ScalarFnArrayView<'_, VariantGet>, + child_idx: usize, + ctx: &mut ExecutionCtx, + ) -> VortexResult> { + if child_idx != 0 { + return Ok(None); + } + + let child = array.child(); + let inner = if let Some(variant) = child.as_opt::() { + variant.child().clone() + } else if matches!(child.dtype(), DType::Variant(_)) { + child.clone() + } else { + return Ok(None); + }; + + let sliced = inner + .slice(array.slice_range().clone())? + .execute::(ctx)?; + + VariantGet + .try_new_array(parent.len(), parent.options.clone(), [sliced])? + .execute::(ctx) + .map(Some) + } +} diff --git a/vortex-array/src/arrays/slice/vtable.rs b/vortex-array/src/arrays/slice/vtable.rs index 911de401aa4..24809b797c7 100644 --- a/vortex-array/src/arrays/slice/vtable.rs +++ b/vortex-array/src/arrays/slice/vtable.rs @@ -30,6 +30,7 @@ use crate::arrays::slice::SliceArrayExt; use crate::arrays::slice::array::CHILD_SLOT; use crate::arrays::slice::array::SLOT_NAMES; use crate::arrays::slice::array::SliceData; +use crate::arrays::slice::parent_kernels::PARENT_KERNELS; use crate::arrays::slice::rules::PARENT_RULES; use crate::buffer::BufferHandle; use crate::dtype::DType; @@ -158,6 +159,15 @@ impl VTable for Slice { ) -> VortexResult> { PARENT_RULES.evaluate(array, parent, child_idx) } + + fn execute_parent( + array: ArrayView<'_, Self>, + parent: &ArrayRef, + child_idx: usize, + ctx: &mut ExecutionCtx, + ) -> VortexResult> { + PARENT_KERNELS.execute(array, parent, child_idx, ctx) + } } impl OperationsVTable for Slice { fn scalar_at( diff --git a/vortex-array/src/scalar_fn/fns/variant_get/mod.rs b/vortex-array/src/scalar_fn/fns/variant_get/mod.rs index de4a6226ff8..c7b69bb8eb6 100644 --- a/vortex-array/src/scalar_fn/fns/variant_get/mod.rs +++ b/vortex-array/src/scalar_fn/fns/variant_get/mod.rs @@ -41,8 +41,11 @@ pub use path::VariantPathElement; /// requested path may not exist in every row. /// /// Execution is handled by variant encodings (e.g. `ParquetVariantArray`) via `execute_parent`. -/// The canonical `VariantArray` does not support direct execution; a `reduce` rule unwraps -/// the `VariantArray` wrapper to expose the underlying encoding. +/// The canonical `VariantArray` does not support direct execution, so `VariantGet` keeps a small +/// `reduce` rule that unwraps a direct `VariantArray` child to expose the underlying encoding. +/// Wrapper arrays such as `Slice` and `Filter` forward `VariantGet` from their own +/// `execute_parent` hooks so the expression can still reach the underlying variant encoding +/// without teaching `VariantGet` about wrapper-specific array shapes. #[derive(Clone)] pub struct VariantGet; @@ -213,14 +216,12 @@ impl ScalarFnVTable for VariantGet { fn execute( &self, - _options: &VariantGetOptions, - _args: &dyn ExecutionArgs, - _ctx: &mut ExecutionCtx, + options: &VariantGetOptions, + args: &dyn ExecutionArgs, + ctx: &mut ExecutionCtx, ) -> VortexResult { - vortex_bail!( - "variant_get cannot be executed directly; \ - it must be pushed down to a variant encoding via execute_parent" - ) + let _ = (options, args, ctx); + vortex_bail!("variant_get cannot be executed directly") } fn reduce( From 68b59ab587834f64dbb1c1684e02d14e9b07375b Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Wed, 15 Apr 2026 18:22:43 +0100 Subject: [PATCH 6/6] Move things around Signed-off-by: Adam Gutglick --- encodings/parquet-variant/src/compute/mod.rs | 1 + encodings/parquet-variant/src/{ => compute}/variant_get/mod.rs | 0 .../parquet-variant/src/{ => compute}/variant_get/tests.rs | 0 encodings/parquet-variant/src/kernel.rs | 2 +- encodings/parquet-variant/src/lib.rs | 2 +- 5 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 encodings/parquet-variant/src/compute/mod.rs rename encodings/parquet-variant/src/{ => compute}/variant_get/mod.rs (100%) rename encodings/parquet-variant/src/{ => compute}/variant_get/tests.rs (100%) diff --git a/encodings/parquet-variant/src/compute/mod.rs b/encodings/parquet-variant/src/compute/mod.rs new file mode 100644 index 00000000000..9d96b646456 --- /dev/null +++ b/encodings/parquet-variant/src/compute/mod.rs @@ -0,0 +1 @@ +pub(crate) mod variant_get; diff --git a/encodings/parquet-variant/src/variant_get/mod.rs b/encodings/parquet-variant/src/compute/variant_get/mod.rs similarity index 100% rename from encodings/parquet-variant/src/variant_get/mod.rs rename to encodings/parquet-variant/src/compute/variant_get/mod.rs diff --git a/encodings/parquet-variant/src/variant_get/tests.rs b/encodings/parquet-variant/src/compute/variant_get/tests.rs similarity index 100% rename from encodings/parquet-variant/src/variant_get/tests.rs rename to encodings/parquet-variant/src/compute/variant_get/tests.rs diff --git a/encodings/parquet-variant/src/kernel.rs b/encodings/parquet-variant/src/kernel.rs index cf395c0a345..9d0e4874e01 100644 --- a/encodings/parquet-variant/src/kernel.rs +++ b/encodings/parquet-variant/src/kernel.rs @@ -19,7 +19,7 @@ use vortex_mask::Mask; use crate::ParquetVariant; use crate::ParquetVariantArrayExt; -use crate::variant_get::VariantGetExecuteParent; +use crate::compute::variant_get::VariantGetExecuteParent; pub(crate) static PARENT_KERNELS: ParentKernelSet = ParentKernelSet::new(&[ ParentKernelSet::lift(&FilterExecuteAdaptor(ParquetVariant)), diff --git a/encodings/parquet-variant/src/lib.rs b/encodings/parquet-variant/src/lib.rs index 2ce35a44388..038c49baf2c 100644 --- a/encodings/parquet-variant/src/lib.rs +++ b/encodings/parquet-variant/src/lib.rs @@ -25,10 +25,10 @@ //! [Arrow canonical extension type]: https://arrow.apache.org/docs/format/CanonicalExtensions.html#parquet-variant mod array; +mod compute; mod kernel; mod operations; mod validity; -mod variant_get; mod vtable; pub use array::ParquetVariantArrayExt;