Skip to content

Commit bf7e75a

Browse files
committed
Enhance test to verify meaningful statistics
1 parent bbdff7d commit bf7e75a

File tree

1 file changed

+22
-2
lines changed
  • datafusion/physical-plan/src/joins/sort_merge_join

1 file changed

+22
-2
lines changed

datafusion/physical-plan/src/joins/sort_merge_join/tests.rs

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3033,6 +3033,7 @@ async fn test_anti_join_filtered_mask() -> Result<()> {
30333033
#[test]
30343034
fn test_partition_statistics() -> Result<()> {
30353035
use crate::ExecutionPlan;
3036+
use datafusion_common::stats::Precision;
30363037

30373038
let left = build_table(
30383039
("a1", &vec![1, 2, 3]),
@@ -3067,25 +3068,44 @@ fn test_partition_statistics() -> Result<()> {
30673068
join(Arc::clone(&left), Arc::clone(&right), on.clone(), join_type)?;
30683069

30693070
// Test aggregate statistics (partition = None)
3071+
// Should return meaningful statistics computed from both inputs
30703072
let stats = join_exec.partition_statistics(None)?;
30713073
assert_eq!(
30723074
stats.column_statistics.len(),
30733075
expected_cols,
3074-
"Aggregate stats failed for {join_type:?}"
3076+
"Aggregate stats column count failed for {join_type:?}"
3077+
);
3078+
// Verify that aggregate statistics have a meaningful num_rows (not Absent)
3079+
assert!(
3080+
!matches!(stats.num_rows, Precision::Absent),
3081+
"Aggregate stats should have meaningful num_rows for {join_type:?}, got {:?}",
3082+
stats.num_rows
30753083
);
30763084

30773085
// Test partition-specific statistics (partition = Some(0))
3086+
// The implementation correctly passes `partition` to children.
3087+
// Since the child TestMemoryExec returns unknown stats for specific partitions,
3088+
// the join output will also have Absent num_rows. This is expected behavior
3089+
// as the statistics depend on what the children can provide.
30783090
let partition_stats = join_exec.partition_statistics(Some(0))?;
30793091
assert_eq!(
30803092
partition_stats.column_statistics.len(),
30813093
expected_cols,
3082-
"Partition stats failed for {join_type:?}"
3094+
"Partition stats column count failed for {join_type:?}"
3095+
);
3096+
// When children return unknown stats, the join's partition stats will be Absent
3097+
assert!(
3098+
matches!(partition_stats.num_rows, Precision::Absent),
3099+
"Partition stats should have Absent num_rows when children return unknown for {join_type:?}, got {:?}",
3100+
partition_stats.num_rows
30833101
);
30843102
}
30853103

30863104
Ok(())
30873105
}
30883106

3107+
3108+
30893109
/// Returns the column names on the schema
30903110
fn columns(schema: &Schema) -> Vec<String> {
30913111
schema.fields().iter().map(|f| f.name().clone()).collect()

0 commit comments

Comments
 (0)