add a flag in splitter to skip projection

CTTY · CTTY · commit f4b72ef15061 · 2025-10-22T20:04:33.000-07:00
diff --git a/crates/iceberg/src/arrow/record_batch_partition_splitter.rs b/crates/iceberg/src/arrow/record_batch_partition_splitter.rs
@@ -31,6 +31,9 @@ use crate::spec::{Literal, PartitionKey, PartitionSpecRef, SchemaRef, Struct, St
 use crate::transform::{BoxedTransformFunction, create_transform_function};
 use crate::{Error, ErrorKind, Result};
 
+/// Column name for the projected partition values struct
+pub const PROJECTED_PARTITION_VALUE_COLUMN: &str = "_partition";
+
 /// The splitter used to split the record batch into multiple record batches by the partition spec.
 /// 1. It will project and transform the input record batch based on the partition spec, get the partitioned record batch.
 /// 2. Split the input record batch into multiple record batches based on the partitioned record batch.
@@ -40,11 +43,12 @@ use crate::{Error, ErrorKind, Result};
 pub struct RecordBatchPartitionSplitter {
     schema: SchemaRef,
     partition_spec: PartitionSpecRef,
-    projector: RecordBatchProjector,
+    projector: Option<RecordBatchProjector>,
     transform_functions: Vec<BoxedTransformFunction>,
 
     partition_type: StructType,
     partition_arrow_type: DataType,
+    has_partition_column: bool,
 }
 
 // # TODO
@@ -58,6 +62,7 @@ impl RecordBatchPartitionSplitter {
     /// * `input_schema` - The Arrow schema of the input record batches
     /// * `iceberg_schema` - The Iceberg schema reference
     /// * `partition_spec` - The partition specification reference
+    /// * `has_partition_column` - If true, expects a pre-computed partition column in the input batch
     ///
     /// # Returns
     ///
@@ -66,47 +71,55 @@ impl RecordBatchPartitionSplitter {
         input_schema: ArrowSchemaRef,
         iceberg_schema: SchemaRef,
         partition_spec: PartitionSpecRef,
+        has_partition_column: bool,
     ) -> Result<Self> {
-        let projector = RecordBatchProjector::new(
-            input_schema,
-            &partition_spec
-                .fields()
-                .iter()
-                .map(|field| field.source_id)
-                .collect::<Vec<_>>(),
-            // The source columns, selected by ids, must be a primitive type and cannot be contained in a map or list, but may be nested in a struct.
-            // ref: https://iceberg.apache.org/spec/#partitioning
-            |field| {
-                if !field.data_type().is_primitive() {
-                    return Ok(None);
-                }
-                field
-                    .metadata()
-                    .get(PARQUET_FIELD_ID_META_KEY)
-                    .map(|s| {
-                        s.parse::<i64>()
-                            .map_err(|e| Error::new(ErrorKind::Unexpected, e.to_string()))
-                    })
-                    .transpose()
-            },
-            |_| true,
-        )?;
-        let transform_functions = partition_spec
-            .fields()
-            .iter()
-            .map(|field| create_transform_function(&field.transform))
-            .collect::<Result<Vec<_>>>()?;
-
         let partition_type = partition_spec.partition_type(&iceberg_schema)?;
         let partition_arrow_type = type_to_arrow_type(&Type::Struct(partition_type.clone()))?;
 
+        let (projector, transform_functions) = if has_partition_column {
+            // Skip projector and transform initialization when partition column is pre-computed
+            (None, Vec::new())
+        } else {
+            let projector = RecordBatchProjector::new(
+                input_schema,
+                &partition_spec
+                    .fields()
+                    .iter()
+                    .map(|field| field.source_id)
+                    .collect::<Vec<_>>(),
+                // The source columns, selected by ids, must be a primitive type and cannot be contained in a map or list, but may be nested in a struct.
+                // ref: https://iceberg.apache.org/spec/#partitioning
+                |field| {
+                    if !field.data_type().is_primitive() {
+                        return Ok(None);
+                    }
+                    field
+                        .metadata()
+                        .get(PARQUET_FIELD_ID_META_KEY)
+                        .map(|s| {
+                            s.parse::<i64>()
+                                .map_err(|e| Error::new(ErrorKind::Unexpected, e.to_string()))
+                        })
+                        .transpose()
+                },
+                |_| true,
+            )?;
+            let transform_functions = partition_spec
+                .fields()
+                .iter()
+                .map(|field| create_transform_function(&field.transform))
+                .collect::<Result<Vec<_>>>()?;
+            (Some(projector), transform_functions)
+        };
+
         Ok(Self {
             schema: iceberg_schema,
             partition_spec,
             projector,
             transform_functions,
             partition_type,
             partition_arrow_type,
+            has_partition_column,
         })
     }
 
@@ -153,14 +166,66 @@ impl RecordBatchPartitionSplitter {
 
     /// Split the record batch into multiple record batches based on the partition spec.
     pub fn split(&self, batch: &RecordBatch) -> Result<Vec<(PartitionKey, RecordBatch)>> {
-        let source_columns = self.projector.project_column(batch.columns())?;
-        let partition_columns = source_columns
-            .into_iter()
-            .zip_eq(self.transform_functions.iter())
-            .map(|(source_column, transform_function)| transform_function.transform(source_column))
-            .collect::<Result<Vec<_>>>()?;
+        let partition_structs = if self.has_partition_column {
+            // Extract partition values from pre-computed partition column
+            let partition_column = batch
+                .column_by_name(PROJECTED_PARTITION_VALUE_COLUMN)
+                .ok_or_else(|| {
+                    Error::new(
+                        ErrorKind::DataInvalid,
+                        format!(
+                            "Partition column '{}' not found in batch",
+                            PROJECTED_PARTITION_VALUE_COLUMN
+                        ),
+                    )
+                })?;
+
+            let partition_struct_array = partition_column
+                .as_any()
+                .downcast_ref::<StructArray>()
+                .ok_or_else(|| {
+                    Error::new(
+                        ErrorKind::DataInvalid,
+                        "Partition column is not a StructArray",
+                    )
+                })?;
+
+            let arrow_struct_array = Arc::new(partition_struct_array.clone()) as ArrayRef;
+            let struct_array = arrow_struct_to_literal(&arrow_struct_array, &self.partition_type)?;
+
+            struct_array
+                .into_iter()
+                .map(|s| {
+                    if let Some(Literal::Struct(s)) = s {
+                        Ok(s)
+                    } else {
+                        Err(Error::new(
+                            ErrorKind::DataInvalid,
+                            "Partition value is not a struct literal or is null",
+                        ))
+                    }
+                })
+                .collect::<Result<Vec<_>>>()?
+        } else {
+            // Compute partition values from source columns
+            let projector = self.projector.as_ref().ok_or_else(|| {
+                Error::new(
+                    ErrorKind::DataInvalid,
+                    "Projector not initialized for non-partition-column mode",
+                )
+            })?;
+
+            let source_columns = projector.project_column(batch.columns())?;
+            let partition_columns = source_columns
+                .into_iter()
+                .zip_eq(self.transform_functions.iter())
+                .map(|(source_column, transform_function)| {
+                    transform_function.transform(source_column)
+                })
+                .collect::<Result<Vec<_>>>()?;
 
-        let partition_structs = self.partition_columns_to_struct(partition_columns)?;
+            self.partition_columns_to_struct(partition_columns)?
+        };
 
         // Group the batch by row value.
         let mut group_ids = HashMap::new();
@@ -246,9 +311,13 @@ mod tests {
                 .unwrap(),
         );
         let input_schema = Arc::new(schema_to_arrow_schema(&schema).unwrap());
-        let partition_splitter =
-            RecordBatchPartitionSplitter::new(input_schema.clone(), schema.clone(), partition_spec)
-                .expect("Failed to create splitter");
+        let partition_splitter = RecordBatchPartitionSplitter::new(
+            input_schema.clone(),
+            schema.clone(),
+            partition_spec,
+            false,
+        )
+        .expect("Failed to create splitter");
 
         let id_array = Int32Array::from(vec![1, 2, 1, 3, 2, 3, 1]);
         let data_array = StringArray::from(vec!["a", "b", "c", "d", "e", "f", "g"]);
@@ -319,4 +388,119 @@ mod tests {
             Struct::from_iter(vec![Some(Literal::int(3))]),
         ]);
     }
+
+    #[test]
+    fn test_record_batch_partition_split_with_partition_column() {
+        use arrow_array::StructArray;
+        use arrow_schema::{Field, Schema as ArrowSchema};
+
+        let schema = Arc::new(
+            Schema::builder()
+                .with_fields(vec![
+                    NestedField::required(
+                        1,
+                        "id",
+                        Type::Primitive(crate::spec::PrimitiveType::Int),
+                    )
+                    .into(),
+                    NestedField::required(
+                        2,
+                        "name",
+                        Type::Primitive(crate::spec::PrimitiveType::String),
+                    )
+                    .into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+        let partition_spec = Arc::new(
+            PartitionSpecBuilder::new(schema.clone())
+                .with_spec_id(1)
+                .add_unbound_field(UnboundPartitionField {
+                    source_id: 1,
+                    field_id: None,
+                    name: "id_bucket".to_string(),
+                    transform: Transform::Identity,
+                })
+                .unwrap()
+                .build()
+                .unwrap(),
+        );
+
+        // Create input schema with _partition column
+        // Note: partition field IDs start from 1000 by default
+        let partition_field = Field::new("id_bucket", DataType::Int32, false).with_metadata(
+            HashMap::from([(PARQUET_FIELD_ID_META_KEY.to_string(), "1000".to_string())]),
+        );
+        let partition_struct_field = Field::new(
+            PROJECTED_PARTITION_VALUE_COLUMN,
+            DataType::Struct(vec![partition_field.clone()].into()),
+            false,
+        );
+
+        let input_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("id", DataType::Int32, false),
+            Field::new("name", DataType::Utf8, false),
+            partition_struct_field,
+        ]));
+
+        // Create splitter with has_partition_column=true
+        let partition_splitter = RecordBatchPartitionSplitter::new(
+            input_schema.clone(),
+            schema.clone(),
+            partition_spec,
+            true,
+        )
+        .expect("Failed to create splitter");
+
+        // Create test data with pre-computed partition column
+        let id_array = Int32Array::from(vec![1, 2, 1, 3, 2, 3, 1]);
+        let data_array = StringArray::from(vec!["a", "b", "c", "d", "e", "f", "g"]);
+
+        // Create partition column (same values as id for Identity transform)
+        let partition_values = Int32Array::from(vec![1, 2, 1, 3, 2, 3, 1]);
+        let partition_struct = StructArray::from(vec![(
+            Arc::new(partition_field),
+            Arc::new(partition_values) as ArrayRef,
+        )]);
+
+        let batch = RecordBatch::try_new(input_schema.clone(), vec![
+            Arc::new(id_array),
+            Arc::new(data_array),
+            Arc::new(partition_struct),
+        ])
+        .expect("Failed to create RecordBatch");
+
+        // Split using the pre-computed partition column
+        let mut partitioned_batches = partition_splitter
+            .split(&batch)
+            .expect("Failed to split RecordBatch");
+
+        partitioned_batches.sort_by_key(|(partition_key, _)| {
+            if let PrimitiveLiteral::Int(i) = partition_key.data().fields()[0]
+                .as_ref()
+                .unwrap()
+                .as_primitive_literal()
+                .unwrap()
+            {
+                i
+            } else {
+                panic!("The partition value is not a int");
+            }
+        });
+
+        assert_eq!(partitioned_batches.len(), 3);
+
+        // Verify partition values
+        let partition_values = partitioned_batches
+            .iter()
+            .map(|(partition_key, _)| partition_key.data().clone())
+            .collect::<Vec<_>>();
+
+        assert_eq!(partition_values, vec![
+            Struct::from_iter(vec![Some(Literal::int(1))]),
+            Struct::from_iter(vec![Some(Literal::int(2))]),
+            Struct::from_iter(vec![Some(Literal::int(3))]),
+        ]);
+    }
 }
diff --git a/crates/iceberg/src/writer/task/mod.rs b/crates/iceberg/src/writer/task/mod.rs
@@ -353,6 +353,7 @@ mod tests {
             arrow_schema.clone(),
             schema.clone(),
             partition_spec.clone(),
+            false,
         )?;
 
         // Create DefaultTaskWriter with FanoutWriter and splitter
@@ -451,6 +452,7 @@ mod tests {
             arrow_schema.clone(),
             schema.clone(),
             partition_spec.clone(),
+            false,
         )?;
 
         // Create DefaultTaskWriter with ClusteredWriter and splitter
diff --git a/crates/integrations/datafusion/src/physical_plan/project.rs b/crates/integrations/datafusion/src/physical_plan/project.rs
@@ -27,16 +27,14 @@ use datafusion::physical_expr::PhysicalExpr;
 use datafusion::physical_expr::expressions::Column;
 use datafusion::physical_plan::projection::ProjectionExec;
 use datafusion::physical_plan::{ColumnarValue, ExecutionPlan};
+use iceberg::arrow::PROJECTED_PARTITION_VALUE_COLUMN;
 use iceberg::arrow::record_batch_projector::RecordBatchProjector;
 use iceberg::spec::{PartitionSpec, Schema};
 use iceberg::table::Table;
 use iceberg::transform::BoxedTransformFunction;
 
 use crate::to_datafusion_error;
 
-/// Column name for the combined partition values struct
-const PARTITION_VALUES_COLUMN: &str = "_partition";
-
 /// Extends an ExecutionPlan with partition value calculations for Iceberg tables.
 ///
 /// This function takes an input ExecutionPlan and extends it with an additional column
@@ -81,7 +79,7 @@ pub fn project_with_partition(
     }
 
     let partition_expr = Arc::new(PartitionExpr::new(calculator));
-    projection_exprs.push((partition_expr, PARTITION_VALUES_COLUMN.to_string()));
+    projection_exprs.push((partition_expr, PROJECTED_PARTITION_VALUE_COLUMN.to_string()));
 
     let projection = ProjectionExec::try_new(projection_exprs, input)?;
     Ok(Arc::new(projection))
@@ -343,7 +341,7 @@ mod tests {
         }
 
         let partition_expr = Arc::new(PartitionExpr::new(calculator));
-        projection_exprs.push((partition_expr, PARTITION_VALUES_COLUMN.to_string()));
+        projection_exprs.push((partition_expr, PROJECTED_PARTITION_VALUE_COLUMN.to_string()));
 
         let projection = ProjectionExec::try_new(projection_exprs, input).unwrap();
         let result = Arc::new(projection);