diff --git a/rust/sedona-raster-functions/benches/native-raster-functions.rs b/rust/sedona-raster-functions/benches/native-raster-functions.rs index eae081acf..37f32236f 100644 --- a/rust/sedona-raster-functions/benches/native-raster-functions.rs +++ b/rust/sedona-raster-functions/benches/native-raster-functions.rs @@ -20,6 +20,20 @@ use sedona_testing::benchmark_util::{benchmark, BenchmarkArgSpec::*, BenchmarkAr fn criterion_benchmark(c: &mut Criterion) { let f = sedona_raster_functions::register::default_function_set(); + benchmark::scalar( + c, + &f, + "native-raster", + "rs_bandpath", + BenchmarkArgs::Array(Raster(64, 64)), + ); + benchmark::scalar( + c, + &f, + "native-raster", + "rs_bandpath", + BenchmarkArgs::ArrayScalar(Raster(64, 64), Int32(1, 2)), + ); benchmark::scalar(c, &f, "native-raster", "rs_convexhull", Raster(64, 64)); benchmark::scalar(c, &f, "native-raster", "rs_crs", Raster(64, 64)); benchmark::scalar(c, &f, "native-raster", "rs_envelope", Raster(64, 64)); diff --git a/rust/sedona-raster-functions/src/lib.rs b/rust/sedona-raster-functions/src/lib.rs index 55325b1a7..79b14983f 100644 --- a/rust/sedona-raster-functions/src/lib.rs +++ b/rust/sedona-raster-functions/src/lib.rs @@ -16,7 +16,9 @@ // under the License. mod executor; +pub mod raster_utils; pub mod register; +pub mod rs_bandpath; pub mod rs_convexhull; pub mod rs_envelope; pub mod rs_example; diff --git a/rust/sedona-raster-functions/src/raster_utils.rs b/rust/sedona-raster-functions/src/raster_utils.rs new file mode 100644 index 000000000..1a0b44bd6 --- /dev/null +++ b/rust/sedona-raster-functions/src/raster_utils.rs @@ -0,0 +1,58 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion_common::error::Result; +use datafusion_common::exec_err; + +/// Validate that a 1-based band index is within `[1, num_bands]`. +pub fn validate_band_index(band_index: i32, num_bands: usize) -> Result<()> { + if band_index < 1 || band_index as usize > num_bands { + return exec_err!( + "Provided band index {} is not in the range [1, {}]", + band_index, + num_bands + ); + } + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_validate_band_index_valid() { + assert!(validate_band_index(1, 3).is_ok()); + assert!(validate_band_index(2, 3).is_ok()); + assert!(validate_band_index(3, 3).is_ok()); + } + + #[test] + fn test_validate_band_index_zero() { + assert!(validate_band_index(0, 3).is_err()); + } + + #[test] + fn test_validate_band_index_negative() { + assert!(validate_band_index(-1, 3).is_err()); + } + + #[test] + fn test_validate_band_index_out_of_range() { + assert!(validate_band_index(4, 3).is_err()); + } +} diff --git a/rust/sedona-raster-functions/src/register.rs b/rust/sedona-raster-functions/src/register.rs index fc687e1b4..a4d38a26b 100644 --- a/rust/sedona-raster-functions/src/register.rs +++ b/rust/sedona-raster-functions/src/register.rs @@ -38,6 +38,7 @@ pub fn default_function_set() -> FunctionSet { register_scalar_udfs!( function_set, + crate::rs_bandpath::rs_bandpath_udf, crate::rs_convexhull::rs_convexhull_udf, crate::rs_envelope::rs_envelope_udf, crate::rs_example::rs_example_udf, diff --git a/rust/sedona-raster-functions/src/rs_bandpath.rs b/rust/sedona-raster-functions/src/rs_bandpath.rs new file mode 100644 index 000000000..16d9b79b7 --- /dev/null +++ b/rust/sedona-raster-functions/src/rs_bandpath.rs @@ -0,0 +1,363 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +use std::{sync::Arc, vec}; + +use crate::executor::RasterExecutor; +use arrow_array::builder::StringBuilder; +use arrow_array::{cast::AsArray, types::Int32Type, Array}; +use arrow_schema::DataType; +use datafusion_common::error::Result; +use datafusion_expr::{ColumnarValue, Volatility}; +use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; +use sedona_raster::traits::RasterRef; +use sedona_schema::raster::StorageType; +use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher}; + +/// RS_BandPath() scalar UDF implementation +/// +/// Returns the path to the raster file referenced by the out-db band. +/// If the band is an in-db band, this function returns null. +/// Accepts an optional band_index parameter (1-based, default is 1). +pub fn rs_bandpath_udf() -> SedonaScalarUDF { + SedonaScalarUDF::new( + "rs_bandpath", + vec![ + Arc::new(RsBandPath {}), + Arc::new(RsBandPathWithBandIndex {}), + ], + Volatility::Immutable, + ) +} + +/// One-argument kernel: RS_BandPath(raster) - uses band 1 by default +#[derive(Debug)] +struct RsBandPath {} + +const PREALLOC_SIZE_PER_PATH: usize = 256; + +impl SedonaScalarKernel for RsBandPath { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_raster()], + SedonaType::Arrow(DataType::Utf8), + ); + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let executor = RasterExecutor::new(arg_types, args); + + let preallocate_bytes = PREALLOC_SIZE_PER_PATH * executor.num_iterations(); + let mut builder = + StringBuilder::with_capacity(executor.num_iterations(), preallocate_bytes); + + executor + .execute_raster_void(|_i, raster_opt| get_band_path(raster_opt, 1, &mut builder))?; + + executor.finish(Arc::new(builder.finish())) + } +} + +/// Two-argument kernel: RS_BandPath(raster, band_index) +#[derive(Debug)] +struct RsBandPathWithBandIndex {} + +impl SedonaScalarKernel for RsBandPathWithBandIndex { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_raster(), ArgMatcher::is_integer()], + SedonaType::Arrow(DataType::Utf8), + ); + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let executor = RasterExecutor::new(arg_types, args); + + // Expand the band_index parameter to an array + let band_index_array = args[1].clone().into_array(executor.num_iterations())?; + let band_index_array = band_index_array.as_primitive::(); + + let preallocate_bytes = PREALLOC_SIZE_PER_PATH * executor.num_iterations(); + let mut builder = + StringBuilder::with_capacity(executor.num_iterations(), preallocate_bytes); + + executor.execute_raster_void(|i, raster_opt| { + let band_index = if band_index_array.is_null(i) { + 1 // Default to band 1 if null + } else { + band_index_array.value(i) + }; + get_band_path(raster_opt, band_index, &mut builder) + })?; + + executor.finish(Arc::new(builder.finish())) + } +} + +/// Get the band path for a raster at the specified band index +fn get_band_path( + raster_opt: Option<&sedona_raster::array::RasterRefImpl<'_>>, + band_index: i32, + builder: &mut StringBuilder, +) -> Result<()> { + match raster_opt { + None => builder.append_null(), + Some(raster) => { + let bands = raster.bands(); + let num_bands = bands.len() as i32; + if band_index < 1 || band_index > num_bands { + builder.append_null(); + } else { + let band = bands.band(band_index as usize)?; + let band_metadata = band.metadata(); + + if band_metadata.storage_type()? == StorageType::OutDbRef { + match band_metadata.outdb_url() { + Some(url) => builder.append_value(url), + None => builder.append_null(), + } + } else { + builder.append_null() + } + } + } + } + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use arrow_array::{Array, Int32Array, StringArray}; + use datafusion_common::ScalarValue; + use datafusion_expr::ScalarUDF; + use sedona_schema::datatypes::RASTER; + use sedona_testing::rasters::generate_test_rasters; + use sedona_testing::testers::ScalarUdfTester; + + #[test] + fn udf_metadata() { + let udf: ScalarUDF = rs_bandpath_udf().into(); + assert_eq!(udf.name(), "rs_bandpath"); + } + + #[test] + fn udf_bandpath_indb_rasters_default_band() { + let udf: ScalarUDF = rs_bandpath_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER]); + + tester.assert_return_type(DataType::Utf8); + + // Test with in-db rasters - should all return null (default band_index = 1) + let rasters = generate_test_rasters(3, Some(1)).unwrap(); + let result = tester.invoke_array(Arc::new(rasters)).unwrap(); + + let string_array = result + .as_any() + .downcast_ref::() + .expect("Expected StringArray"); + + // All in-db rasters should return null + assert!(string_array.is_null(0)); + assert!(string_array.is_null(1)); + assert!(string_array.is_null(2)); + } + + #[test] + fn udf_bandpath_indb_rasters_with_band_index() { + let udf: ScalarUDF = rs_bandpath_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER, SedonaType::Arrow(DataType::Int32)]); + + tester.assert_return_type(DataType::Utf8); + + // Test with in-db rasters and explicit band index + let rasters = generate_test_rasters(3, Some(3)).unwrap(); // 3 bands + let band_indices = Int32Array::from(vec![1, 2, 3]); + let result = tester + .invoke_arrays(vec![Arc::new(rasters), Arc::new(band_indices)]) + .unwrap(); + + let string_array = result + .as_any() + .downcast_ref::() + .expect("Expected StringArray"); + + // All in-db bands should return null regardless of band index + assert!(string_array.is_null(0)); + assert!(string_array.is_null(1)); + assert!(string_array.is_null(2)); + } + + #[test] + fn udf_bandpath_invalid_band_index() { + let udf: ScalarUDF = rs_bandpath_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER, SedonaType::Arrow(DataType::Int32)]); + + // Test with invalid band indices (out of range) + let rasters = generate_test_rasters(3, Some(2)).unwrap(); // 2 bands + let band_indices = Int32Array::from(vec![0, 3, -1]); // All invalid indices + let result = tester + .invoke_arrays(vec![Arc::new(rasters), Arc::new(band_indices)]) + .unwrap(); + + let string_array = result + .as_any() + .downcast_ref::() + .expect("Expected StringArray"); + + // Invalid band indices should return null + assert!(string_array.is_null(0)); // band 0 is invalid (1-based) + assert!(string_array.is_null(1)); // band 3 is out of range + assert!(string_array.is_null(2)); // negative band index is invalid + } + + /// Build a raster array with out-db bands for testing RS_BandPath. + /// Returns a StructArray with 3 rasters: + /// [0] OutDbRef band with URL "s3://bucket/raster_0.tif" + /// [1] null raster + /// [2] Two bands: InDb band 1, OutDbRef band 2 with URL "s3://bucket/raster_2.tif" + fn build_outdb_rasters() -> arrow_array::StructArray { + use sedona_raster::builder::RasterBuilder; + use sedona_raster::traits::{BandMetadata, RasterMetadata}; + use sedona_schema::raster::{BandDataType, StorageType}; + + let metadata = RasterMetadata { + width: 4, + height: 4, + upperleft_x: 0.0, + upperleft_y: 0.0, + scale_x: 1.0, + scale_y: -1.0, + skew_x: 0.0, + skew_y: 0.0, + }; + + let mut builder = RasterBuilder::new(3); + + // Raster 0: single OutDbRef band + builder.start_raster(&metadata, Some("EPSG:4326")).unwrap(); + builder + .start_band(BandMetadata { + nodata_value: None, + storage_type: StorageType::OutDbRef, + datatype: BandDataType::Float32, + outdb_url: Some("s3://bucket/raster_0.tif".to_string()), + outdb_band_id: Some(1), + }) + .unwrap(); + builder.band_data_writer().append_value([]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + + // Raster 1: null + builder.append_null().unwrap(); + + // Raster 2: two bands — InDb (band 1) + OutDbRef (band 2) + builder.start_raster(&metadata, Some("EPSG:4326")).unwrap(); + builder + .start_band(BandMetadata { + nodata_value: None, + storage_type: StorageType::InDb, + datatype: BandDataType::UInt8, + outdb_url: None, + outdb_band_id: None, + }) + .unwrap(); + builder.band_data_writer().append_value([0u8; 16]); + builder.finish_band().unwrap(); + builder + .start_band(BandMetadata { + nodata_value: None, + storage_type: StorageType::OutDbRef, + datatype: BandDataType::Float32, + outdb_url: Some("s3://bucket/raster_2.tif".to_string()), + outdb_band_id: Some(3), + }) + .unwrap(); + builder.band_data_writer().append_value([]); + builder.finish_band().unwrap(); + builder.finish_raster().unwrap(); + + builder.finish().unwrap() + } + + #[test] + fn udf_bandpath_outdb_rasters_default_band() { + let udf: ScalarUDF = rs_bandpath_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER]); + + let rasters = build_outdb_rasters(); + let result = tester.invoke_array(Arc::new(rasters)).unwrap(); + + let string_array = result + .as_any() + .downcast_ref::() + .expect("Expected StringArray"); + + // Raster 0: OutDbRef band 1 → returns URL + assert!(!string_array.is_null(0)); + assert_eq!(string_array.value(0), "s3://bucket/raster_0.tif"); + // Raster 1: null raster → null + assert!(string_array.is_null(1)); + // Raster 2: band 1 is InDb → null + assert!(string_array.is_null(2)); + } + + #[test] + fn udf_bandpath_outdb_rasters_with_band_index() { + let udf: ScalarUDF = rs_bandpath_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER, SedonaType::Arrow(DataType::Int32)]); + + let rasters = build_outdb_rasters(); + // Ask for band 1, band 1, band 2 respectively + let band_indices = Int32Array::from(vec![1, 1, 2]); + let result = tester + .invoke_arrays(vec![Arc::new(rasters), Arc::new(band_indices)]) + .unwrap(); + + let string_array = result + .as_any() + .downcast_ref::() + .expect("Expected StringArray"); + + // Raster 0, band 1: OutDbRef → URL + assert_eq!(string_array.value(0), "s3://bucket/raster_0.tif"); + // Raster 1: null raster → null + assert!(string_array.is_null(1)); + // Raster 2, band 2: OutDbRef → URL + assert_eq!(string_array.value(2), "s3://bucket/raster_2.tif"); + } + + #[test] + fn udf_bandpath_null_scalar() { + let udf: ScalarUDF = rs_bandpath_udf().into(); + let tester = ScalarUdfTester::new(udf, vec![RASTER]); + + // Test with null scalar + let result = tester.invoke_scalar(ScalarValue::Null).unwrap(); + tester.assert_scalar_result_equals(result, ScalarValue::Utf8(None)); + } +} diff --git a/rust/sedona-raster/src/traits.rs b/rust/sedona-raster/src/traits.rs index 98010d92d..f8541ff33 100644 --- a/rust/sedona-raster/src/traits.rs +++ b/rust/sedona-raster/src/traits.rs @@ -108,6 +108,69 @@ pub trait BandMetadataRef { fn outdb_url(&self) -> Option<&str>; /// OutDb band ID (only used when storage_type == OutDbRef) fn outdb_band_id(&self) -> Option; + + /// No-data value interpreted as f64. + /// + /// Returns `Ok(None)` when no nodata value is defined, `Ok(Some(f64))` on + /// success, or an error when the raw bytes have an unexpected length for + /// the band's data type. + fn nodata_value_as_f64(&self) -> Result, ArrowError> { + let bytes = match self.nodata_value() { + Some(b) => b, + None => return Ok(None), + }; + let dt = self.data_type()?; + nodata_bytes_to_f64(bytes, &dt).map(Some) + } +} + +/// Convert raw nodata bytes to f64 given a [`BandDataType`]. +/// +/// The bytes are expected to be in little-endian order and exactly match the +/// byte size of the data type. +fn nodata_bytes_to_f64(bytes: &[u8], dt: &BandDataType) -> Result { + macro_rules! read_le { + ($t:ty, $n:expr) => {{ + let arr: [u8; $n] = bytes.try_into().map_err(|_| { + ArrowError::InvalidArgumentError(format!( + "Invalid nodata byte length for {:?}: expected {}, got {}", + dt, + $n, + bytes.len() + )) + })?; + Ok(<$t>::from_le_bytes(arr) as f64) + }}; + } + + match dt { + BandDataType::UInt8 => { + if bytes.len() != 1 { + return Err(ArrowError::InvalidArgumentError(format!( + "Invalid nodata byte length for UInt8: expected 1, got {}", + bytes.len() + ))); + } + Ok(bytes[0] as f64) + } + BandDataType::Int8 => { + if bytes.len() != 1 { + return Err(ArrowError::InvalidArgumentError(format!( + "Invalid nodata byte length for Int8: expected 1, got {}", + bytes.len() + ))); + } + Ok(bytes[0] as i8 as f64) + } + BandDataType::UInt16 => read_le!(u16, 2), + BandDataType::Int16 => read_le!(i16, 2), + BandDataType::UInt32 => read_le!(u32, 4), + BandDataType::Int32 => read_le!(i32, 4), + BandDataType::UInt64 => read_le!(u64, 8), + BandDataType::Int64 => read_le!(i64, 8), + BandDataType::Float32 => read_le!(f32, 4), + BandDataType::Float64 => read_le!(f64, 8), + } } /// Trait for iterating over bands within a raster @@ -118,3 +181,40 @@ pub trait BandIterator<'a>: Iterator> { self.len() == 0 } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_nodata_bytes_to_f64_uint8() { + let val = nodata_bytes_to_f64(&[42], &BandDataType::UInt8).unwrap(); + assert_eq!(val, 42.0); + } + + #[test] + fn test_nodata_bytes_to_f64_int8() { + let val = nodata_bytes_to_f64(&[0xFE], &BandDataType::Int8).unwrap(); + assert_eq!(val, -2.0); + } + + #[test] + fn test_nodata_bytes_to_f64_float64() { + let bytes = (-9999.0_f64).to_le_bytes(); + let val = nodata_bytes_to_f64(&bytes, &BandDataType::Float64).unwrap(); + assert_eq!(val, -9999.0); + } + + #[test] + fn test_nodata_bytes_to_f64_int32() { + let bytes = (-1_i32).to_le_bytes(); + let val = nodata_bytes_to_f64(&bytes, &BandDataType::Int32).unwrap(); + assert_eq!(val, -1.0); + } + + #[test] + fn test_nodata_bytes_to_f64_wrong_length() { + let result = nodata_bytes_to_f64(&[1, 2, 3], &BandDataType::Float64); + assert!(result.is_err()); + } +} diff --git a/rust/sedona-schema/src/raster.rs b/rust/sedona-schema/src/raster.rs index f1bc0283a..b5b8745c4 100644 --- a/rust/sedona-schema/src/raster.rs +++ b/rust/sedona-schema/src/raster.rs @@ -105,6 +105,35 @@ pub enum BandDataType { Int8 = 10, } +impl BandDataType { + /// Byte size of a single pixel for this data type. + pub fn byte_size(&self) -> usize { + match self { + BandDataType::UInt8 | BandDataType::Int8 => 1, + BandDataType::UInt16 | BandDataType::Int16 => 2, + BandDataType::UInt32 | BandDataType::Int32 | BandDataType::Float32 => 4, + BandDataType::UInt64 | BandDataType::Int64 | BandDataType::Float64 => 8, + } + } + + /// Java/Sedona-compatible pixel type name (e.g. `"UNSIGNED_8BITS"`). + pub fn pixel_type_name(&self) -> &'static str { + match self { + BandDataType::UInt8 => "UNSIGNED_8BITS", + BandDataType::UInt16 => "UNSIGNED_16BITS", + BandDataType::Int16 => "SIGNED_16BITS", + BandDataType::Int32 => "SIGNED_32BITS", + BandDataType::Float32 => "REAL_32BITS", + BandDataType::Float64 => "REAL_64BITS", + // Extra types present in Rust but not in Java Sedona + BandDataType::UInt32 => "UNSIGNED_32BITS", + BandDataType::UInt64 => "UNSIGNED_64BITS", + BandDataType::Int64 => "SIGNED_64BITS", + BandDataType::Int8 => "SIGNED_8BITS", + } + } +} + /// Storage strategy for raster band data within Apache Arrow arrays. /// /// This enum defines how raster data is physically stored and accessed: @@ -333,4 +362,32 @@ mod tests { panic!("Expected Struct type for band"); } } + + #[test] + fn test_band_data_type_byte_size() { + assert_eq!(BandDataType::UInt8.byte_size(), 1); + assert_eq!(BandDataType::Int8.byte_size(), 1); + assert_eq!(BandDataType::UInt16.byte_size(), 2); + assert_eq!(BandDataType::Int16.byte_size(), 2); + assert_eq!(BandDataType::UInt32.byte_size(), 4); + assert_eq!(BandDataType::Int32.byte_size(), 4); + assert_eq!(BandDataType::Float32.byte_size(), 4); + assert_eq!(BandDataType::UInt64.byte_size(), 8); + assert_eq!(BandDataType::Int64.byte_size(), 8); + assert_eq!(BandDataType::Float64.byte_size(), 8); + } + + #[test] + fn test_band_data_type_pixel_type_name() { + assert_eq!(BandDataType::UInt8.pixel_type_name(), "UNSIGNED_8BITS"); + assert_eq!(BandDataType::Int8.pixel_type_name(), "SIGNED_8BITS"); + assert_eq!(BandDataType::UInt16.pixel_type_name(), "UNSIGNED_16BITS"); + assert_eq!(BandDataType::Int16.pixel_type_name(), "SIGNED_16BITS"); + assert_eq!(BandDataType::UInt32.pixel_type_name(), "UNSIGNED_32BITS"); + assert_eq!(BandDataType::Int32.pixel_type_name(), "SIGNED_32BITS"); + assert_eq!(BandDataType::Float32.pixel_type_name(), "REAL_32BITS"); + assert_eq!(BandDataType::UInt64.pixel_type_name(), "UNSIGNED_64BITS"); + assert_eq!(BandDataType::Int64.pixel_type_name(), "SIGNED_64BITS"); + assert_eq!(BandDataType::Float64.pixel_type_name(), "REAL_64BITS"); + } }