From f86209264f3d4b7a4c6dabf52d03bda178167b43 Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Wed, 28 Jan 2026 23:59:54 +0800 Subject: [PATCH 1/2] Pass ConfigOptions into UDFs --- c/sedona-extension/src/scalar_kernel.rs | 3 +++ c/sedona-proj/src/st_transform.rs | 2 ++ python/sedonadb/src/udf.rs | 2 ++ rust/sedona-expr/src/item_crs.rs | 21 ++++++++++++++++++--- rust/sedona-expr/src/scalar_udf.rs | 10 +++++++++- rust/sedona-functions/src/st_setsrid.rs | 3 +++ 6 files changed, 37 insertions(+), 4 deletions(-) diff --git a/c/sedona-extension/src/scalar_kernel.rs b/c/sedona-extension/src/scalar_kernel.rs index 17972356e..04c9ef0b9 100644 --- a/c/sedona-extension/src/scalar_kernel.rs +++ b/c/sedona-extension/src/scalar_kernel.rs @@ -20,6 +20,7 @@ use arrow_array::{ make_array, ArrayRef, }; use arrow_schema::{ArrowError, Field}; +use datafusion_common::config::ConfigOptions; use datafusion_common::{plan_err, DataFusionError, Result, ScalarValue}; use datafusion_expr::ColumnarValue; use sedona_common::sedona_internal_err; @@ -101,6 +102,7 @@ impl SedonaScalarKernel for ImportedScalarKernel { args: &[ColumnarValue], return_type: &SedonaType, num_rows: usize, + _config_options: Option<&ConfigOptions>, ) -> Result { let arg_scalars = args .iter() @@ -560,6 +562,7 @@ impl ExportedScalarKernelImpl { &args, return_type, num_rows as usize, + None, )?; // Convert the result to an ArrayRef diff --git a/c/sedona-proj/src/st_transform.rs b/c/sedona-proj/src/st_transform.rs index b7fd90a6c..fcdf8e78f 100644 --- a/c/sedona-proj/src/st_transform.rs +++ b/c/sedona-proj/src/st_transform.rs @@ -19,6 +19,7 @@ use arrow_array::builder::{BinaryBuilder, StringViewBuilder}; use arrow_array::ArrayRef; use arrow_schema::DataType; use datafusion_common::cast::{as_string_view_array, as_struct_array}; +use datafusion_common::config::ConfigOptions; use datafusion_common::{exec_err, DataFusionError, Result, ScalarValue}; use datafusion_expr::ColumnarValue; use sedona_common::sedona_internal_err; @@ -99,6 +100,7 @@ impl SedonaScalarKernel for STTransform { args: &[ColumnarValue], _return_type: &SedonaType, _num_rows: usize, + _config_options: Option<&ConfigOptions>, ) -> Result { let inputs = zip(arg_types, args) .map(|(arg_type, arg)| ArgInput::from_arg(arg_type, arg)) diff --git a/python/sedonadb/src/udf.rs b/python/sedonadb/src/udf.rs index eeb7cdd9b..d731bdc29 100644 --- a/python/sedonadb/src/udf.rs +++ b/python/sedonadb/src/udf.rs @@ -22,6 +22,7 @@ use arrow_array::{ ArrayRef, }; use arrow_schema::Field; +use datafusion_common::config::ConfigOptions; use datafusion_common::{Result, ScalarValue}; use datafusion_expr::{ColumnarValue, ScalarUDF, Volatility}; use datafusion_ffi::udf::FFI_ScalarUDF; @@ -159,6 +160,7 @@ impl SedonaScalarKernel for PySedonaScalarKernel { args: &[ColumnarValue], return_type: &SedonaType, num_rows: usize, + _config_options: Option<&ConfigOptions>, ) -> Result { let result = Python::with_gil(|py| -> Result { let py_values = zip(arg_types, args) diff --git a/rust/sedona-expr/src/item_crs.rs b/rust/sedona-expr/src/item_crs.rs index 0889622b7..3bb73a160 100644 --- a/rust/sedona-expr/src/item_crs.rs +++ b/rust/sedona-expr/src/item_crs.rs @@ -20,6 +20,7 @@ use std::{fmt::Debug, iter::zip, sync::Arc}; use arrow_array::{Array, ArrayRef, StructArray}; use arrow_buffer::NullBuffer; use arrow_schema::{DataType, Field, FieldRef}; +use datafusion_common::config::ConfigOptions; use datafusion_common::{ cast::{as_string_view_array, as_struct_array}, exec_err, DataFusionError, Result, ScalarValue, @@ -102,8 +103,16 @@ impl SedonaScalarKernel for ItemCrsKernel { args: &[ColumnarValue], return_type: &SedonaType, num_rows: usize, + config_options: Option<&ConfigOptions>, ) -> Result { - invoke_handle_item_crs(self.inner.as_ref(), arg_types, args, return_type, num_rows) + invoke_handle_item_crs( + self.inner.as_ref(), + arg_types, + args, + return_type, + num_rows, + config_options, + ) } fn invoke_batch( @@ -444,6 +453,7 @@ fn invoke_handle_item_crs( args: &[ColumnarValue], return_type: &SedonaType, num_rows: usize, + config_options: Option<&ConfigOptions>, ) -> Result { // Separate the argument types into item and Option // Don't strip the CRSes because we need them to compare with @@ -485,8 +495,13 @@ fn invoke_handle_item_crs( None => return sedona_internal_err!("Expected inner kernel to match types {item_types:?}"), }; - let item_result = - kernel.invoke_batch_from_args(&item_types, &item_args, return_type, num_rows)?; + let item_result = kernel.invoke_batch_from_args( + &item_types, + &item_args, + return_type, + num_rows, + config_options, + )?; if ArgMatcher::is_geometry_or_geography().match_type(&out_item_type) { make_item_crs(&out_item_type, item_result, crs_result, None) diff --git a/rust/sedona-expr/src/scalar_udf.rs b/rust/sedona-expr/src/scalar_udf.rs index d5bc56ec8..fac30e60a 100644 --- a/rust/sedona-expr/src/scalar_udf.rs +++ b/rust/sedona-expr/src/scalar_udf.rs @@ -17,6 +17,7 @@ use std::{any::Any, fmt::Debug, sync::Arc}; use arrow_schema::{DataType, FieldRef}; +use datafusion_common::config::ConfigOptions; use datafusion_common::{not_impl_err, Result, ScalarValue}; use datafusion_expr::{ ColumnarValue, Documentation, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature, @@ -136,6 +137,7 @@ pub trait SedonaScalarKernel: Debug + Send + Sync { args: &[ColumnarValue], _return_type: &SedonaType, _num_rows: usize, + _config_options: Option<&ConfigOptions>, ) -> Result { self.invoke_batch(arg_types, args) } @@ -323,7 +325,13 @@ impl ScalarUDFImpl for SedonaScalarUDF { .collect::>(); let (kernel, return_type) = self.return_type_impl(&arg_types, &arg_scalars)?; - kernel.invoke_batch_from_args(&arg_types, &args.args, &return_type, args.number_rows) + kernel.invoke_batch_from_args( + &arg_types, + &args.args, + &return_type, + args.number_rows, + Some(&*args.config_options), + ) } fn aliases(&self) -> &[String] { diff --git a/rust/sedona-functions/src/st_setsrid.rs b/rust/sedona-functions/src/st_setsrid.rs index 8af08beb7..32b5f6e18 100644 --- a/rust/sedona-functions/src/st_setsrid.rs +++ b/rust/sedona-functions/src/st_setsrid.rs @@ -25,6 +25,7 @@ use arrow_array::{ }; use arrow_buffer::NullBuffer; use arrow_schema::DataType; +use datafusion_common::config::ConfigOptions; use datafusion_common::{ cast::{as_int64_array, as_string_view_array}, error::Result, @@ -144,6 +145,7 @@ impl SedonaScalarKernel for STSetSRID { args: &[ColumnarValue], return_type: &SedonaType, _num_rows: usize, + _config_options: Option<&ConfigOptions>, ) -> Result { let item_crs_matcher = ArgMatcher::is_item_crs(); if item_crs_matcher.match_type(return_type) { @@ -200,6 +202,7 @@ impl SedonaScalarKernel for STSetCRS { args: &[ColumnarValue], return_type: &SedonaType, _num_rows: usize, + _config_options: Option<&ConfigOptions>, ) -> Result { let item_crs_matcher = ArgMatcher::is_item_crs(); if item_crs_matcher.match_type(return_type) { From d51057dd8a7e272b6a33455ea5597bf95428960f Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Thu, 29 Jan 2026 01:09:35 +0800 Subject: [PATCH 2/2] Fully resolve the issue --- rust/sedona-geoparquet/src/format.rs | 11 ++++- rust/sedona-geoparquet/src/writer.rs | 13 +++--- rust/sedona-spatial-join/src/optimizer.rs | 2 - rust/sedona-testing/src/testers.rs | 49 ++++++++++++++++++++--- rust/sedona/src/show.rs | 21 +++++++--- 5 files changed, 77 insertions(+), 19 deletions(-) diff --git a/rust/sedona-geoparquet/src/format.rs b/rust/sedona-geoparquet/src/format.rs index 0da807b3c..4fa966e40 100644 --- a/rust/sedona-geoparquet/src/format.rs +++ b/rust/sedona-geoparquet/src/format.rs @@ -302,11 +302,18 @@ impl FileFormat for GeoParquetFormat { async fn create_writer_physical_plan( &self, input: Arc, - _state: &dyn Session, + session: &dyn Session, conf: FileSinkConfig, order_requirements: Option, ) -> Result> { - create_geoparquet_writer_physical_plan(input, conf, order_requirements, &self.options) + let session_config_options = session.config().options(); + create_geoparquet_writer_physical_plan( + input, + conf, + order_requirements, + &self.options, + session_config_options, + ) } fn file_source(&self) -> Arc { diff --git a/rust/sedona-geoparquet/src/writer.rs b/rust/sedona-geoparquet/src/writer.rs index 8ea1a264e..42c9de4d5 100644 --- a/rust/sedona-geoparquet/src/writer.rs +++ b/rust/sedona-geoparquet/src/writer.rs @@ -69,6 +69,7 @@ pub fn create_geoparquet_writer_physical_plan( mut conf: FileSinkConfig, order_requirements: Option, options: &TableGeoParquetOptions, + session_config_options: &Arc, ) -> Result> { if conf.insert_op != InsertOp::Append { return not_impl_err!("Overwrites are not implemented yet for Parquet"); @@ -93,8 +94,11 @@ pub fn create_geoparquet_writer_physical_plan( } GeoParquetVersion::V1_1 => { metadata.version = "1.1.0".to_string(); - (bbox_projection, bbox_columns) = - project_bboxes(&input, options.overwrite_bbox_columns)?; + (bbox_projection, bbox_columns) = project_bboxes( + &input, + options.overwrite_bbox_columns, + session_config_options, + )?; parquet_output_schema = compute_final_schema(&bbox_projection, &input.schema())?; output_geometry_column_indices = conf.output_schema.geometry_column_indices()?; } @@ -291,6 +295,7 @@ type ProjectBboxesResult = ( fn project_bboxes( input: &Arc, overwrite_bbox_columns: bool, + session_config_options: &Arc, ) -> Result { let input_schema = input.schema(); let matcher = ArgMatcher::is_geometry(); @@ -310,14 +315,12 @@ fn project_bboxes( column.return_field(&input_schema)?.as_ref(), )?) { let bbox_field_name = bbox_column_name(f.name()); - // TODO: Pipe actual ConfigOptions from session instead of using defaults - // See: https://github.com/apache/sedona-db/issues/248 let expr = Arc::new(ScalarFunctionExpr::new( bbox_udf_name, bbox_udf.clone(), vec![column], Arc::new(Field::new("", bbox_type(), true)), - Arc::new(ConfigOptions::default()), + Arc::clone(session_config_options), )); bbox_exprs.insert(i, (expr, bbox_field_name.clone())); diff --git a/rust/sedona-spatial-join/src/optimizer.rs b/rust/sedona-spatial-join/src/optimizer.rs index a8c281673..a5a8baefe 100644 --- a/rust/sedona-spatial-join/src/optimizer.rs +++ b/rust/sedona-spatial-join/src/optimizer.rs @@ -1176,8 +1176,6 @@ mod tests { ) -> Arc { let return_type = udf.return_type(&[]).unwrap(); let field = Arc::new(arrow::datatypes::Field::new("result", return_type, false)); - // TODO: Pipe actual ConfigOptions from session instead of using defaults - // See: https://github.com/apache/sedona-db/issues/248 Arc::new(ScalarFunctionExpr::new( udf.name(), Arc::clone(&udf), diff --git a/rust/sedona-testing/src/testers.rs b/rust/sedona-testing/src/testers.rs index 54b947a36..33476d49e 100644 --- a/rust/sedona-testing/src/testers.rs +++ b/rust/sedona-testing/src/testers.rs @@ -27,7 +27,7 @@ use datafusion_expr::{ ReturnFieldArgs, ScalarFunctionArgs, ScalarUDF, }; use datafusion_physical_expr::{expressions::Column, PhysicalExpr}; -use sedona_common::sedona_internal_err; +use sedona_common::{sedona_internal_err, SedonaOptions}; use sedona_schema::datatypes::SedonaType; use crate::{ @@ -240,12 +240,53 @@ impl AggregateUdfTester { pub struct ScalarUdfTester { udf: ScalarUDF, arg_types: Vec, + config_options: Arc, } impl ScalarUdfTester { /// Create a new tester pub fn new(udf: ScalarUDF, arg_types: Vec) -> Self { - Self { udf, arg_types } + let mut config_options = ConfigOptions::default(); + let sedona_options = SedonaOptions::default(); + config_options.extensions.insert(sedona_options); + Self { + udf, + arg_types, + config_options: Arc::new(config_options), + } + } + + /// Returns the [`ConfigOptions`] used when invoking the UDF. + /// + /// This is the same structure DataFusion threads through [`ScalarFunctionArgs`]. + /// Sedona-specific options are stored in `config_options.extensions`. + pub fn config_options(&self) -> &ConfigOptions { + &self.config_options + } + + /// Returns a mutable reference to the [`ConfigOptions`] used when invoking the UDF. + /// + /// Use this to tweak DataFusion options or to insert/update Sedona options via + /// `config_options.extensions` before calling the tester's `invoke_*` helpers. + pub fn config_options_mut(&mut self) -> &mut ConfigOptions { + // config_options can only be owned by this tester, so it's safe to get a mutable reference. + Arc::get_mut(&mut self.config_options).expect("ConfigOptions is shared") + } + + /// Returns the [`SedonaOptions`] stored in `config_options.extensions`, if present. + pub fn sedona_options(&self) -> &SedonaOptions { + self.config_options + .extensions + .get::() + .expect("SedonaOptions does not exist") + } + + /// Returns a mutable reference to the [`SedonaOptions`] stored in `config_options.extensions`, if present. + pub fn sedona_options_mut(&mut self) -> &mut SedonaOptions { + self.config_options_mut() + .extensions + .get_mut::() + .expect("SedonaOptions does not exist") } /// Assert the return type of the function for the argument types used @@ -610,9 +651,7 @@ impl ScalarUdfTester { arg_fields: self.arg_fields(), number_rows, return_field: return_type.to_storage_field("", true)?.into(), - // TODO: Consider piping actual ConfigOptions for more realistic testing - // See: https://github.com/apache/sedona-db/issues/248 - config_options: Arc::new(ConfigOptions::default()), + config_options: Arc::clone(&self.config_options), }; self.udf.invoke_with_args(args) diff --git a/rust/sedona/src/show.rs b/rust/sedona/src/show.rs index 4276a1198..0d3b946b1 100644 --- a/rust/sedona/src/show.rs +++ b/rust/sedona/src/show.rs @@ -50,7 +50,11 @@ pub fn show_batches<'a, W: std::io::Write>( ))? .clone(); - let mut table = DisplayTable::try_new(schema, batches, options)?.with_format_fn(format_fn); + let session_config = ctx.ctx.copied_config(); + let session_config_options = session_config.options(); + + let mut table = DisplayTable::try_new(schema, batches, options, session_config_options)? + .with_format_fn(format_fn); table.negotiate_hidden_columns()?; table.write(writer) } @@ -141,6 +145,7 @@ impl<'a> DisplayTable<'a> { schema: &Schema, batches: Vec, options: DisplayTableOptions<'a>, + session_config_options: &Arc, ) -> Result { let num_rows = batches.iter().map(|batch| batch.num_rows()).sum(); @@ -155,6 +160,7 @@ impl<'a> DisplayTable<'a> { .iter() .map(|batch| batch.column(i).clone()) .collect(), + Arc::clone(session_config_options), ) }) .collect::>>()?; @@ -354,17 +360,23 @@ struct DisplayColumn { raw_values: Vec, format_fn: Option, hidden: bool, + session_config_options: Arc, } impl DisplayColumn { /// Create a new display column - pub fn try_new(field: &Field, raw_values: Vec) -> Result { + pub fn try_new( + field: &Field, + raw_values: Vec, + session_config_options: Arc, + ) -> Result { Ok(Self { name: field.name().to_string(), sedona_type: SedonaType::from_storage_field(field)?, raw_values, format_fn: None, hidden: false, + session_config_options, }) } @@ -382,6 +394,7 @@ impl DisplayColumn { raw_values: vec![Arc::new(raw_values)], format_fn: None, hidden: false, + session_config_options: Arc::new(ConfigOptions::default()), } } @@ -495,9 +508,7 @@ impl DisplayColumn { arg_fields, number_rows: array.len(), return_field, - // TODO: Pipe actual ConfigOptions from SedonaContext instead of using defaults - // See: https://github.com/apache/sedona-db/issues/248 - config_options: Arc::new(ConfigOptions::default()), + config_options: Arc::clone(&self.session_config_options), }; let format_proxy_value = format_udf.invoke_with_args(args)?;