diff --git a/Cargo.lock b/Cargo.lock index bc6799a91..c265c6593 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5003,6 +5003,7 @@ dependencies = [ "async-trait", "clap", "datafusion", + "datafusion-common", "env_logger 0.11.8", "futures", "libmimalloc-sys", @@ -5010,6 +5011,7 @@ dependencies = [ "regex", "rustyline", "sedona", + "sedona-common", "sedona-tg", "tokio", ] diff --git a/c/sedona-proj/src/st_transform.rs b/c/sedona-proj/src/st_transform.rs index fcdf8e78f..63c209f9a 100644 --- a/c/sedona-proj/src/st_transform.rs +++ b/c/sedona-proj/src/st_transform.rs @@ -22,7 +22,7 @@ use datafusion_common::cast::{as_string_view_array, as_struct_array}; use datafusion_common::config::ConfigOptions; use datafusion_common::{exec_err, DataFusionError, Result, ScalarValue}; use datafusion_expr::ColumnarValue; -use sedona_common::sedona_internal_err; +use sedona_common::{sedona_internal_datafusion_err, sedona_internal_err}; use sedona_expr::item_crs::make_item_crs; use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel}; use sedona_functions::executor::WkbExecutor; @@ -401,8 +401,8 @@ pub(crate) fn with_global_proj_engine( // Otherwise, attempt to get the builder let maybe_builder = PROJ_ENGINE_BUILDER.read().map_err(|_| { // Highly unlikely (can only occur when a panic occurred during set) - DataFusionError::Internal( - "Failed to acquire read lock for global PROJ configuration".to_string(), + sedona_internal_datafusion_err!( + "Failed to acquire read lock for global PROJ configuration" ) })?; @@ -416,9 +416,7 @@ pub(crate) fn with_global_proj_engine( engine_cell .set(CachingCrsEngine::new(proj_engine)) - .map_err(|_| { - DataFusionError::Internal("Failed to set cached PROJ transform".to_string()) - })?; + .map_err(|_| sedona_internal_datafusion_err!("Failed to set cached PROJ transform"))?; func(engine_cell.get().unwrap())?; Ok(()) }) diff --git a/rust/sedona-expr/src/statistics.rs b/rust/sedona-expr/src/statistics.rs index 08c0dd9cf..4ae729e7a 100644 --- a/rust/sedona-expr/src/statistics.rs +++ b/rust/sedona-expr/src/statistics.rs @@ -17,6 +17,7 @@ use std::str::FromStr; use datafusion_common::{stats::Precision, ColumnStatistics, DataFusionError, Result, ScalarValue}; +use sedona_common::sedona_internal_datafusion_err; use sedona_geometry::interval::{Interval, IntervalTrait}; use sedona_geometry::{ bounding_box::BoundingBox, @@ -391,7 +392,7 @@ impl GeoStatistics { pub fn to_scalar_value(&self) -> Result { // Serialize to JSON let serialized = serde_json::to_vec(self).map_err(|e| { - DataFusionError::Internal(format!("Failed to serialize GeoStatistics: {e}")) + sedona_internal_datafusion_err!("Failed to serialize GeoStatistics: {e}") })?; Ok(ScalarValue::Binary(Some(serialized))) diff --git a/rust/sedona-functions/src/st_analyze_agg.rs b/rust/sedona-functions/src/st_analyze_agg.rs index 82647a6de..5377255a2 100644 --- a/rust/sedona-functions/src/st_analyze_agg.rs +++ b/rust/sedona-functions/src/st_analyze_agg.rs @@ -30,6 +30,7 @@ use datafusion_common::{ }; use datafusion_expr::{scalar_doc_sections::DOC_SECTION_OTHER, Documentation, Volatility}; use datafusion_expr::{Accumulator, ColumnarValue}; +use sedona_common::{sedona_internal_datafusion_err, sedona_internal_err}; use sedona_expr::aggregate_udf::SedonaAccumulatorRef; use sedona_expr::aggregate_udf::SedonaAggregateUDF; use sedona_expr::item_crs::ItemCrsSedonaAccumulator; @@ -383,9 +384,7 @@ impl AnalyzeAccumulator { impl Accumulator for AnalyzeAccumulator { fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> { if values.is_empty() { - return Err(DataFusionError::Internal( - "No input arrays provided to accumulator".to_string(), - )); + return sedona_internal_err!("No input arrays provided to accumulator"); } let arg_types = [self.input_type.clone()]; let arg_values = [ColumnarValue::Array(values[0].clone())]; @@ -441,9 +440,7 @@ impl Accumulator for AnalyzeAccumulator { fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> { // Check input length (expecting 1 state field) if states.is_empty() { - return Err(DataFusionError::Internal( - "No input arrays provided to accumulator in merge_batch".to_string(), - )); + return sedona_internal_err!("No input arrays provided to accumulator in merge_batch"); } let array = &states[0]; @@ -455,9 +452,8 @@ impl Accumulator for AnalyzeAccumulator { } let serialized = binary_array.value(i); - let other_stats: GeoStatistics = serde_json::from_slice(serialized).map_err(|e| { - DataFusionError::Internal(format!("Failed to deserialize stats: {e}")) - })?; + let other_stats: GeoStatistics = serde_json::from_slice(serialized) + .map_err(|e| sedona_internal_datafusion_err!("Failed to deserialize stats: {e}"))?; // Use the merge method to combine statistics self.stats.merge(&other_stats); diff --git a/rust/sedona-functions/src/st_envelope.rs b/rust/sedona-functions/src/st_envelope.rs index c7dd22d8e..cdf0ccb09 100644 --- a/rust/sedona-functions/src/st_envelope.rs +++ b/rust/sedona-functions/src/st_envelope.rs @@ -23,6 +23,7 @@ use datafusion_expr::{ scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation, Volatility, }; use geo_traits::GeometryTrait; +use sedona_common::sedona_internal_err; use sedona_expr::{ item_crs::ItemCrsKernel, scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}, @@ -125,11 +126,7 @@ fn invoke_scalar(wkb: &Wkb, writer: &mut impl std::io::Write) -> Result<()> { geo_traits::GeometryType::GeometryCollection(_) => { write_wkb_geometrycollection_header(writer, wkb.dim(), 0) } - _ => { - return Err(DataFusionError::Internal( - "Unsupported geometry type".to_string(), - )) - } + _ => return sedona_internal_err!("Unsupported geometry type"), }; if let Err(e) = result { diff --git a/rust/sedona-functions/src/st_envelope_agg.rs b/rust/sedona-functions/src/st_envelope_agg.rs index 37ca99455..7966052c4 100644 --- a/rust/sedona-functions/src/st_envelope_agg.rs +++ b/rust/sedona-functions/src/st_envelope_agg.rs @@ -132,9 +132,7 @@ impl BoundsAccumulator2D { // Check the input length for update methods. fn check_update_input_len(input: &[ArrayRef], expected: usize, context: &str) -> Result<()> { if input.is_empty() { - return Err(DataFusionError::Internal(format!( - "No input arrays provided to accumulator in {context}" - ))); + return sedona_internal_err!("No input arrays provided to accumulator in {context}"); } if input.len() != expected { return sedona_internal_err!( diff --git a/rust/sedona-functions/src/st_geomfromwkt.rs b/rust/sedona-functions/src/st_geomfromwkt.rs index c06c94bee..01eef0fe5 100644 --- a/rust/sedona-functions/src/st_geomfromwkt.rs +++ b/rust/sedona-functions/src/st_geomfromwkt.rs @@ -19,11 +19,13 @@ use std::{str::FromStr, sync::Arc, vec}; use arrow_array::builder::{BinaryBuilder, StringViewBuilder}; use arrow_schema::DataType; use datafusion_common::cast::as_string_view_array; -use datafusion_common::error::{DataFusionError, Result}; +use datafusion_common::error::Result; +use datafusion_common::exec_datafusion_err; use datafusion_common::scalar::ScalarValue; use datafusion_expr::{ scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation, Volatility, }; +use sedona_common::sedona_internal_datafusion_err; use sedona_expr::item_crs::make_item_crs; use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; use sedona_geometry::wkb_factory::WKB_MIN_PROBABLE_BYTES; @@ -136,8 +138,8 @@ impl SedonaScalarKernel for STGeoFromWKT { } fn invoke_scalar(wkt_bytes: &str, builder: &mut BinaryBuilder) -> Result<()> { - let geometry: Wkt = Wkt::from_str(wkt_bytes) - .map_err(|err| DataFusionError::Internal(format!("WKT parse error: {err}")))?; + let geometry: Wkt = + Wkt::from_str(wkt_bytes).map_err(|err| exec_datafusion_err!("WKT parse error: {err}"))?; write_geometry( builder, @@ -146,7 +148,7 @@ fn invoke_scalar(wkt_bytes: &str, builder: &mut BinaryBuilder) -> Result<()> { endianness: Endianness::LittleEndian, }, ) - .map_err(|err| DataFusionError::Internal(format!("WKB write error: {err}"))) + .map_err(|err| sedona_internal_datafusion_err!("WKB write error: {err}")) } /// ST_GeomFromEWKT() UDF implementation diff --git a/rust/sedona-functions/src/st_pointzm.rs b/rust/sedona-functions/src/st_pointzm.rs index a2eaf997a..59f9f43d3 100644 --- a/rust/sedona-functions/src/st_pointzm.rs +++ b/rust/sedona-functions/src/st_pointzm.rs @@ -30,6 +30,7 @@ use datafusion_expr::{ scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation, Volatility, }; use geo_traits::Dimensions; +use sedona_common::{sedona_internal_datafusion_err, sedona_internal_err}; use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; use sedona_geometry::{ error::SedonaGeometryError, @@ -166,9 +167,7 @@ impl SedonaScalarKernel for STGeoFromPointZm { .iter() .map(|v| match v { ColumnarValue::Scalar(ScalarValue::Float64(val)) => Ok(*val), - _ => Err(datafusion_common::DataFusionError::Internal( - "Expected Float64 scalar".to_string(), - )), + _ => sedona_internal_err!("Expected Float64 scalar"), }) .collect(); let scalar_coords = scalar_coords?; @@ -216,9 +215,7 @@ impl SedonaScalarKernel for STGeoFromPointZm { let values = arrays.iter().map(|v| v.value(i)).collect::>(); if !any_null { write_wkb_pointzm(&mut builder, &values, self.dim).map_err(|_| { - datafusion_common::DataFusionError::Internal( - "Failed to write WKB point header".to_string(), - ) + sedona_internal_datafusion_err!("Failed to write WKB point header") })?; builder.append_value([]); } else { diff --git a/rust/sedona-functions/src/st_setsrid.rs b/rust/sedona-functions/src/st_setsrid.rs index 7c5b97139..59059cf14 100644 --- a/rust/sedona-functions/src/st_setsrid.rs +++ b/rust/sedona-functions/src/st_setsrid.rs @@ -747,9 +747,9 @@ mod test { to: ScalarValue, ) -> Result<(SedonaType, ColumnarValue)> { let SedonaType::Arrow(datatype) = &arg_type[1] else { - return Err(DataFusionError::Internal( - "Expected SedonaType::Arrow, but found a different variant".to_string(), - )); + return sedona_internal_err!( + "Expected SedonaType::Arrow, but found a different variant" + ); }; let arg_fields = vec![ Arc::new(arg_type[0].to_storage_field("", true)?), diff --git a/rust/sedona-functions/src/st_xyzm.rs b/rust/sedona-functions/src/st_xyzm.rs index 423c6c4e4..9fa7697bc 100644 --- a/rust/sedona-functions/src/st_xyzm.rs +++ b/rust/sedona-functions/src/st_xyzm.rs @@ -27,7 +27,7 @@ use geo_traits::{ CoordTrait, Dimensions, GeometryCollectionTrait, GeometryTrait, LineStringTrait, MultiLineStringTrait, MultiPointTrait, MultiPolygonTrait, PointTrait, PolygonTrait, }; -use sedona_common::sedona_internal_err; +use sedona_common::{sedona_internal_datafusion_err, sedona_internal_err}; use sedona_expr::{ item_crs::ItemCrsKernel, scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}, @@ -173,7 +173,7 @@ fn invoke_scalar(item: &Wkb, dim_index: usize) -> Result> { 1 => { let coord_dim = multipoint.dim(); let point = MultiPointTrait::point(multipoint, 0) - .ok_or(DataFusionError::Internal("Missing point".to_string()))?; + .ok_or(sedona_internal_datafusion_err!("Missing point"))?; let coord = PointTrait::coord(&point); return get_coord(coord_dim, coord, dim_index); } diff --git a/rust/sedona-functions/src/st_xyzm_minmax.rs b/rust/sedona-functions/src/st_xyzm_minmax.rs index 9720d72d4..8617d6adc 100644 --- a/rust/sedona-functions/src/st_xyzm_minmax.rs +++ b/rust/sedona-functions/src/st_xyzm_minmax.rs @@ -19,12 +19,12 @@ use std::sync::Arc; use crate::executor::WkbExecutor; use arrow_array::builder::Float64Builder; use arrow_schema::DataType; -use datafusion_common::{error::Result, DataFusionError}; +use datafusion_common::error::Result; use datafusion_expr::{ scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation, Volatility, }; use geo_traits::GeometryTrait; -use sedona_common::sedona_internal_err; +use sedona_common::{sedona_internal_datafusion_err, sedona_internal_err}; use sedona_expr::{ item_crs::ItemCrsKernel, scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}, @@ -196,24 +196,23 @@ fn invoke_scalar( let interval: Interval = match dim { "x" => { let xy_bounds = geo_traits_bounds_xy(item) - .map_err(|e| DataFusionError::Internal(format!("Error updating bounds: {e}")))?; - Interval::try_from(*xy_bounds.x()).map_err(|e| { - DataFusionError::Internal(format!("Error converting to interval: {e}")) - })? + .map_err(|e| sedona_internal_datafusion_err!("Error updating bounds: {e}"))?; + Interval::try_from(*xy_bounds.x()) + .map_err(|e| sedona_internal_datafusion_err!("Error converting to interval: {e}"))? } "y" => { let xy_bounds = geo_traits_bounds_xy(item) - .map_err(|e| DataFusionError::Internal(format!("Error updating bounds: {e}")))?; + .map_err(|e| sedona_internal_datafusion_err!("Error updating bounds: {e}"))?; *xy_bounds.y() } "z" => { let z_bounds = geo_traits_bounds_z(item) - .map_err(|e| DataFusionError::Internal(format!("Error updating bounds: {e}")))?; + .map_err(|e| sedona_internal_datafusion_err!("Error updating bounds: {e}"))?; z_bounds } "m" => { let m_bounds = geo_traits_bounds_m(item) - .map_err(|e| DataFusionError::Internal(format!("Error updating bounds: {e}")))?; + .map_err(|e| sedona_internal_datafusion_err!("Error updating bounds: {e}"))?; m_bounds } _ => sedona_internal_err!("unexpected dim index")?, diff --git a/rust/sedona-geo/src/centroid.rs b/rust/sedona-geo/src/centroid.rs index ee74df135..0e927be95 100644 --- a/rust/sedona-geo/src/centroid.rs +++ b/rust/sedona-geo/src/centroid.rs @@ -16,7 +16,7 @@ // under the License. //! Centroid extraction functionality for WKB geometries -use datafusion_common::{error::DataFusionError, Result}; +use datafusion_common::{exec_err, Result}; use geo_traits::CoordTrait; use geo_traits::GeometryTrait; use geo_traits::PointTrait; @@ -56,9 +56,7 @@ pub fn extract_centroid_2d(geo: impl GeometryTrait) -> Result<(f64, f64 // Return POINT EMPTY as (NaN, NaN) Ok((f64::NAN, f64::NAN)) } else { - Err(DataFusionError::Internal( - "Centroid computation failed.".to_string(), - )) + exec_err!("Centroid computation failed.") } } } diff --git a/rust/sedona-geo/src/st_intersection_agg.rs b/rust/sedona-geo/src/st_intersection_agg.rs index 49d4b4484..1ef1958ad 100644 --- a/rust/sedona-geo/src/st_intersection_agg.rs +++ b/rust/sedona-geo/src/st_intersection_agg.rs @@ -18,13 +18,11 @@ use std::{sync::Arc, vec}; use arrow_array::ArrayRef; use arrow_schema::FieldRef; -use datafusion_common::{ - error::{DataFusionError, Result}, - ScalarValue, -}; +use datafusion_common::{error::Result, exec_err, ScalarValue}; use datafusion_expr::{Accumulator, ColumnarValue}; use geo::{BooleanOps, Intersects}; use geo_traits::to_geo::ToGeoGeometry; +use sedona_common::sedona_internal_err; use sedona_expr::{ aggregate_udf::{SedonaAccumulator, SedonaAccumulatorRef}, item_crs::ItemCrsSedonaAccumulator, @@ -104,9 +102,7 @@ impl IntersectionAccumulator { geo::Geometry::Polygon(poly) => geo::MultiPolygon(vec![poly]), geo::Geometry::MultiPolygon(multi) => multi.clone(), _ => { - return Err(DataFusionError::Internal( - "Unsupported geometry type for intersection operation".to_string(), - )); + return exec_err!("Unsupported geometry type for intersection operation"); } }; @@ -170,9 +166,7 @@ impl IntersectionAccumulator { impl Accumulator for IntersectionAccumulator { fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> { if values.is_empty() { - return Err(DataFusionError::Internal( - "No input arrays provided to accumulator in update_batch".to_string(), - )); + return sedona_internal_err!("No input arrays provided to accumulator in update_batch"); } let arg_types = [self.input_type.clone()]; let args = [ColumnarValue::Array(values[0].clone())]; @@ -213,9 +207,7 @@ impl Accumulator for IntersectionAccumulator { fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> { // Check input length (expecting 1 state field) if states.is_empty() { - return Err(DataFusionError::Internal( - "No input arrays provided to accumulator in merge_batch".to_string(), - )); + return sedona_internal_err!("No input arrays provided to accumulator in merge_batch"); } let array = &states[0]; let args = [ColumnarValue::Array(array.clone())]; diff --git a/rust/sedona-geo/src/st_union_agg.rs b/rust/sedona-geo/src/st_union_agg.rs index cb2a8be36..e9d7e0034 100644 --- a/rust/sedona-geo/src/st_union_agg.rs +++ b/rust/sedona-geo/src/st_union_agg.rs @@ -18,10 +18,7 @@ use std::{sync::Arc, vec}; use arrow_array::ArrayRef; use arrow_schema::FieldRef; -use datafusion_common::{ - error::{DataFusionError, Result}, - ScalarValue, -}; +use datafusion_common::{error::Result, exec_err, ScalarValue}; use datafusion_expr::{Accumulator, ColumnarValue}; use geo::BooleanOps; use geo_traits::to_geo::ToGeoGeometry; @@ -100,9 +97,7 @@ impl UnionAccumulator { geo::Geometry::Polygon(poly) => geo::MultiPolygon(vec![poly]), geo::Geometry::MultiPolygon(multi) => multi.clone(), _ => { - return Err(DataFusionError::Internal( - "Unsupported geometry type for union operation".to_string(), - )); + return exec_err!("Unsupported geometry type for union operation"); } }; @@ -206,9 +201,7 @@ impl Accumulator for UnionAccumulator { fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> { // Check input length (expecting 1 state field) if states.is_empty() { - return Err(DataFusionError::Internal( - "No input arrays provided to accumulator in merge_batch".to_string(), - )); + return sedona_internal_err!("No input arrays provided to accumulator in merge_batch"); } let array = &states[0]; let args = [ColumnarValue::Array(array.clone())]; diff --git a/rust/sedona-raster-functions/src/executor.rs b/rust/sedona-raster-functions/src/executor.rs index 917a11189..e7bbbe9dd 100644 --- a/rust/sedona-raster-functions/src/executor.rs +++ b/rust/sedona-raster-functions/src/executor.rs @@ -17,9 +17,9 @@ use arrow_array::{Array, ArrayRef, StructArray}; use datafusion_common::error::Result; -use datafusion_common::{DataFusionError, ScalarValue}; +use datafusion_common::ScalarValue; use datafusion_expr::ColumnarValue; -use sedona_common::sedona_internal_err; +use sedona_common::{sedona_internal_datafusion_err, sedona_internal_err}; use sedona_raster::array::{RasterRefImpl, RasterStructArray}; use sedona_schema::datatypes::SedonaType; use sedona_schema::datatypes::RASTER; @@ -73,9 +73,7 @@ impl<'a, 'b> RasterExecutor<'a, 'b> { .as_any() .downcast_ref::() .ok_or_else(|| { - DataFusionError::Internal( - "Expected StructArray for raster data".to_string(), - ) + sedona_internal_datafusion_err!("Expected StructArray for raster data") })?; let raster_array = RasterStructArray::new(raster_struct); @@ -103,9 +101,7 @@ impl<'a, 'b> RasterExecutor<'a, 'b> { } } ScalarValue::Null => func(0, None), - _ => Err(DataFusionError::Internal( - "Expected Struct scalar for raster".to_string(), - )), + _ => sedona_internal_err!("Expected Struct scalar for raster"), }, } } diff --git a/rust/sedona-schema/src/datatypes.rs b/rust/sedona-schema/src/datatypes.rs index 4674d34e2..9707302c9 100644 --- a/rust/sedona-schema/src/datatypes.rs +++ b/rust/sedona-schema/src/datatypes.rs @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. use arrow_schema::{DataType, Field}; -use datafusion_common::error::{DataFusionError, Result}; -use sedona_common::sedona_internal_err; +use datafusion_common::error::Result; +use sedona_common::{sedona_internal_datafusion_err, sedona_internal_err}; use serde_json::Value; use std::fmt::{Debug, Display}; use std::sync::LazyLock; @@ -341,7 +341,7 @@ fn deserialize_edges_and_crs(value: &Option) -> Result<(Edges, Crs)> { } let json_value: Value = serde_json::from_str(val).map_err(|err| { - DataFusionError::Internal(format!("Error deserializing GeoArrow metadata: {err}")) + sedona_internal_datafusion_err!("Error deserializing GeoArrow metadata: {err}") })?; if !json_value.is_object() { return sedona_internal_err!( diff --git a/rust/sedona-spatial-join/src/evaluated_batch/spill.rs b/rust/sedona-spatial-join/src/evaluated_batch/spill.rs index 9d5dd8a84..eddd05746 100644 --- a/rust/sedona-spatial-join/src/evaluated_batch/spill.rs +++ b/rust/sedona-spatial-join/src/evaluated_batch/spill.rs @@ -21,11 +21,11 @@ use arrow::array::Float64Array; use arrow_array::{Array, RecordBatch, StructArray}; use arrow_schema::{DataType, Field, Fields, Schema, SchemaRef}; use datafusion::config::SpillCompression; -use datafusion_common::{DataFusionError, Result, ScalarValue}; +use datafusion_common::{Result, ScalarValue}; use datafusion_execution::{disk_manager::RefCountedTempFile, runtime_env::RuntimeEnv}; use datafusion_expr::ColumnarValue; use datafusion_physical_plan::metrics::SpillMetrics; -use sedona_common::sedona_internal_err; +use sedona_common::{sedona_internal_datafusion_err, sedona_internal_err}; use sedona_schema::datatypes::SedonaType; use crate::{ @@ -191,16 +191,12 @@ pub(crate) fn spilled_batch_to_evaluated_batch( .as_any() .downcast_ref::() .ok_or_else(|| { - DataFusionError::Internal("Expected data column to be a StructArray".to_string()) + sedona_internal_datafusion_err!("Expected data column to be a StructArray") })?; let data_schema = Arc::new(Schema::new(match data_array.data_type() { DataType::Struct(fields) => fields.clone(), - _ => { - return Err(DataFusionError::Internal( - "Expected data column to have Struct data type".to_string(), - )) - } + _ => return sedona_internal_err!("Expected data column to have Struct data type"), })); let data_columns = (0..data_array.num_columns()) @@ -223,7 +219,7 @@ pub(crate) fn spilled_batch_to_evaluated_batch( .as_any() .downcast_ref::() .ok_or_else(|| { - DataFusionError::Internal("Expected dist column to be Float64Array".to_string()) + sedona_internal_datafusion_err!("Expected dist column to be Float64Array") })?; let distance = if !dist_array.is_empty() { @@ -282,7 +278,7 @@ mod tests { use crate::utils::arrow_utils::get_record_batch_memory_size; use arrow_array::{ArrayRef, BinaryArray, Int32Array, StringArray}; use arrow_schema::{DataType, Field, Schema}; - use datafusion_common::Result; + use datafusion_common::{DataFusionError, Result}; use datafusion_execution::runtime_env::RuntimeEnv; use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet; use sedona_schema::datatypes::WKB_GEOMETRY; diff --git a/rust/sedona-spatial-join/src/exec.rs b/rust/sedona-spatial-join/src/exec.rs index 495518ea0..2ed90d735 100644 --- a/rust/sedona-spatial-join/src/exec.rs +++ b/rust/sedona-spatial-join/src/exec.rs @@ -17,7 +17,7 @@ use std::{fmt::Formatter, sync::Arc}; use arrow_schema::SchemaRef; -use datafusion_common::{project_schema, DataFusionError, JoinSide, Result}; +use datafusion_common::{project_schema, JoinSide, Result}; use datafusion_execution::{SendableRecordBatchStream, TaskContext}; use datafusion_expr::{JoinType, Operator}; use datafusion_physical_expr::{ @@ -33,7 +33,7 @@ use datafusion_physical_plan::{ PlanProperties, }; use parking_lot::Mutex; -use sedona_common::SpatialJoinOptions; +use sedona_common::{sedona_internal_err, SpatialJoinOptions}; use crate::{ prepare::{SpatialJoinComponents, SpatialJoinComponentsBuilder}, @@ -93,9 +93,7 @@ fn determine_knn_build_probe_plans<'a>( match knn_pred.probe_side { JoinSide::Left => Ok((right_plan, left_plan)), JoinSide::Right => Ok((left_plan, right_plan)), - JoinSide::None => Err(DataFusionError::Internal( - "KNN join requires explicit probe_side designation".to_string(), - )), + JoinSide::None => sedona_internal_err!("KNN join requires explicit probe_side designation"), } } diff --git a/rust/sedona-spatial-join/src/operand_evaluator.rs b/rust/sedona-spatial-join/src/operand_evaluator.rs index a8b832643..792d1957a 100644 --- a/rust/sedona-spatial-join/src/operand_evaluator.rs +++ b/rust/sedona-spatial-join/src/operand_evaluator.rs @@ -19,9 +19,7 @@ use std::{mem::transmute, sync::Arc}; use arrow_array::{Array, ArrayRef, Float64Array, RecordBatch}; use arrow_schema::DataType; -use datafusion_common::{ - utils::proxy::VecAllocExt, DataFusionError, JoinSide, Result, ScalarValue, -}; +use datafusion_common::{utils::proxy::VecAllocExt, JoinSide, Result, ScalarValue}; use datafusion_expr::ColumnarValue; use datafusion_physical_expr::PhysicalExpr; use float_next_after::NextAfter; @@ -33,6 +31,7 @@ use sedona_schema::datatypes::SedonaType; use wkb::reader::Wkb; use sedona_common::option::SpatialJoinOptions; +use sedona_common::sedona_internal_err; use crate::{ spatial_predicate::{DistancePredicate, KNNPredicate, RelationPredicate, SpatialPredicate}, @@ -266,15 +265,11 @@ impl DistanceOperandEvaluator { } } } else { - return Err(DataFusionError::Internal( - "Distance columnar value is not a Float64Array".to_string(), - )); + return sedona_internal_err!("Distance columnar value is not a Float64Array"); } } _ => { - return Err(DataFusionError::Internal( - "Distance columnar value is not a Float64".to_string(), - )); + return sedona_internal_err!("Distance columnar value is not a Float64"); } } @@ -297,14 +292,10 @@ pub(crate) fn distance_value_at( Ok(Some(array.value(i))) } } else { - Err(DataFusionError::Internal( - "Distance columnar value is not a Float64Array".to_string(), - )) + sedona_internal_err!("Distance columnar value is not a Float64Array") } } - _ => Err(DataFusionError::Internal( - "Distance columnar value is not a Float64".to_string(), - )), + _ => sedona_internal_err!("Distance columnar value is not a Float64"), } } diff --git a/rust/sedona-spatial-join/src/refine/tg.rs b/rust/sedona-spatial-join/src/refine/tg.rs index 01e6a4117..82a781f95 100644 --- a/rust/sedona-spatial-join/src/refine/tg.rs +++ b/rust/sedona-spatial-join/src/refine/tg.rs @@ -22,7 +22,7 @@ use std::{ }, }; -use datafusion_common::{DataFusionError, Result}; +use datafusion_common::Result; use sedona_common::{sedona_internal_err, ExecutionMode, SpatialJoinOptions, TgIndexType}; use sedona_expr::statistics::GeoStatistics; use sedona_tg::tg::{self, BinaryPredicate}; @@ -322,14 +322,10 @@ impl TgPredicateEvaluator for TgPredicateEval fn create_evaluator(predicate: &SpatialPredicate) -> Result> { let evaluator: Box = match predicate { SpatialPredicate::Distance(_) => { - return Err(DataFusionError::Internal( - "Distance predicate is not supported for TG".to_string(), - )) + return sedona_internal_err!("Distance predicate is not supported for TG") } SpatialPredicate::KNearestNeighbors(_) => { - return Err(DataFusionError::Internal( - "KNN predicate is not supported for TG".to_string(), - )) + return sedona_internal_err!("KNN predicate is not supported for TG") } SpatialPredicate::Relation(predicate) => match predicate.relation_type { SpatialRelationType::Intersects => { @@ -347,11 +343,7 @@ fn create_evaluator(predicate: &SpatialPredicate) -> Result::new()) } SpatialRelationType::Equals => Box::new(TgPredicateEvaluatorImpl::::new()), - _ => { - return Err(DataFusionError::Internal( - "Unsupported spatial relation type for TG".to_string(), - )) - } + _ => return sedona_internal_err!("Unsupported spatial relation type for TG"), }, }; Ok(evaluator) diff --git a/rust/sedona/src/show.rs b/rust/sedona/src/show.rs index 0d3b946b1..08ac5c960 100644 --- a/rust/sedona/src/show.rs +++ b/rust/sedona/src/show.rs @@ -21,8 +21,9 @@ use comfy_table::{Cell, CellAlignment, ColumnConstraint, ContentArrangement, Row use datafusion::arrow::util::display::{ArrayFormatter, FormatOptions}; use datafusion::error::Result; use datafusion_common::format::DEFAULT_FORMAT_OPTIONS; -use datafusion_common::{config::ConfigOptions, DataFusionError, ScalarValue}; +use datafusion_common::{config::ConfigOptions, ScalarValue}; use datafusion_expr::{ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDF}; +use sedona_common::sedona_internal_datafusion_err; use sedona_expr::scalar_udf::SedonaScalarUDF; use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher}; use std::iter::zip; @@ -45,8 +46,8 @@ pub fn show_batches<'a, W: std::io::Write>( let format_fn = ctx .functions .scalar_udf("sd_format") - .ok_or(DataFusionError::Internal( - "sd_format UDF does not exist".to_string(), + .ok_or(sedona_internal_datafusion_err!( + "sd_format UDF does not exist" ))? .clone(); diff --git a/sedona-cli/Cargo.toml b/sedona-cli/Cargo.toml index 3c3d1dbb1..c856d481a 100644 --- a/sedona-cli/Cargo.toml +++ b/sedona-cli/Cargo.toml @@ -54,6 +54,7 @@ datafusion = { workspace = true, features = [ "unicode_expressions", "compression", ] } +datafusion-common = { workspace = true } env_logger = { workspace = true } futures = { workspace = true } mimalloc = { workspace = true, optional = true } @@ -61,5 +62,6 @@ libmimalloc-sys = { workspace = true, optional = true } regex = { workspace = true } rustyline = "15.0" sedona = { workspace = true, features = ["aws", "gcp", "http", "proj"] } +sedona-common = { workspace = true } sedona-tg = { workspace = true } tokio = { workspace = true, features = ["macros", "rt", "rt-multi-thread", "sync", "parking_lot", "signal"] } diff --git a/sedona-cli/src/print_format.rs b/sedona-cli/src/print_format.rs index 580ee433a..a5946d336 100644 --- a/sedona-cli/src/print_format.rs +++ b/sedona-cli/src/print_format.rs @@ -25,9 +25,10 @@ use arrow::csv::writer::WriterBuilder; use arrow::datatypes::SchemaRef; use arrow::json::{ArrayWriter, LineDelimitedWriter}; use arrow::record_batch::RecordBatch; -use datafusion::error::{DataFusionError, Result}; +use datafusion::error::Result; use sedona::context::SedonaContext; use sedona::show::{show_batches, DisplayMode, DisplayTableOptions}; +use sedona_common::sedona_internal_datafusion_err; /// Allow records to be printed in different formats #[derive(Debug, PartialEq, Eq, clap::ValueEnum, Clone, Copy)] @@ -144,7 +145,7 @@ fn format_batches_with_maxrows( options, )?; let mut formatted_str = String::from_utf8(formatted) - .map_err(|e| DataFusionError::Internal(format!("invalid utf-8 in table: {e}")))?; + .map_err(|e| sedona_internal_datafusion_err!("invalid utf-8 in table: {e}"))?; if over_limit { formatted_str = keep_only_maxrows(&formatted_str, maxrows);