diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs index de16e9e01073e..c9b39eacefc6a 100644 --- a/datafusion/expr-common/src/type_coercion/binary.rs +++ b/datafusion/expr-common/src/type_coercion/binary.rs @@ -260,8 +260,16 @@ impl<'a> BinaryTypeCoercer<'a> { ) }) } + Minus if is_date_minus_date(lhs, rhs) => { + return Ok(Signature { + lhs: lhs.clone(), + rhs: rhs.clone(), + ret: Int64, + }); + } Plus | Minus | Multiply | Divide | Modulo => { if let Ok(ret) = self.get_result(lhs, rhs) { + // Temporal arithmetic, e.g. Date32 + Interval Ok(Signature{ lhs: lhs.clone(), @@ -281,6 +289,7 @@ impl<'a> BinaryTypeCoercer<'a> { ret, }) } else if let Some(coerced) = temporal_coercion_strict_timezone(lhs, rhs) { + // Temporal arithmetic by first coercing to a common time representation // e.g. Date32 - Timestamp let ret = self.get_result(&coerced, &coerced).map_err(|e| { @@ -351,6 +360,15 @@ fn is_decimal(data_type: &DataType) -> bool { ) } +/// Returns true if both operands are Date types (Date32 or Date64) +/// Used to detect Date - Date operations which should return Int64 (days difference) +fn is_date_minus_date(lhs: &DataType, rhs: &DataType) -> bool { + matches!( + (lhs, rhs), + (DataType::Date32, DataType::Date32) | (DataType::Date64, DataType::Date64) + ) +} + /// Coercion rules for mathematics operators between decimal and non-decimal types. fn math_decimal_coercion( lhs_type: &DataType, diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs index 8df09c22bbd8d..72eae396e68a6 100644 --- a/datafusion/physical-expr/src/expressions/binary.rs +++ b/datafusion/physical-expr/src/expressions/binary.rs @@ -30,6 +30,7 @@ use arrow::datatypes::*; use arrow::error::ArrowError; use datafusion_common::cast::as_boolean_array; use datafusion_common::{Result, ScalarValue, internal_err, not_impl_err}; + use datafusion_expr::binary::BinaryTypeCoercer; use datafusion_expr::interval_arithmetic::{Interval, apply_operator}; use datafusion_expr::sort_properties::ExprProperties; @@ -162,6 +163,94 @@ fn boolean_op( op(ll, rr).map(|t| Arc::new(t) as _) } +/// Returns true if both operands are Date types (Date32 or Date64) +/// Used to detect Date - Date operations which should return Int64 (days difference) +fn is_date_minus_date(lhs: &DataType, rhs: &DataType) -> bool { + matches!( + (lhs, rhs), + (DataType::Date32, DataType::Date32) | (DataType::Date64, DataType::Date64) + ) +} + +/// Computes the difference between two dates and returns the result as Int64 (days) +/// This aligns with PostgreSQL, DuckDB, and MySQL behavior where date - date returns an integer +/// +/// Implementation: Uses Arrow's sub_wrapping to get Duration, then converts to Int64 days +fn apply_date_subtraction( + lhs: &ColumnarValue, + rhs: &ColumnarValue, +) -> Result { + use arrow::compute::kernels::numeric::sub_wrapping; + + // Use Arrow's sub_wrapping to compute the Duration result + let duration_result = apply(lhs, rhs, sub_wrapping)?; + + // Convert Duration to Int64 (days) + match duration_result { + ColumnarValue::Array(array) => { + let int64_array = duration_to_days(&array)?; + Ok(ColumnarValue::Array(int64_array)) + } + ColumnarValue::Scalar(scalar) => { + // Convert scalar Duration to Int64 days + let array = scalar.to_array_of_size(1)?; + let int64_array = duration_to_days(&array)?; + let int64_scalar = ScalarValue::try_from_array(int64_array.as_ref(), 0)?; + Ok(ColumnarValue::Scalar(int64_scalar)) + } + } +} + +/// Converts a Duration array to Int64 days +/// Handles different Duration time units (Second, Millisecond, Microsecond, Nanosecond) +fn duration_to_days(array: &ArrayRef) -> Result { + use datafusion_common::cast::{ + as_duration_microsecond_array, as_duration_millisecond_array, + as_duration_nanosecond_array, as_duration_second_array, + }; + + const SECONDS_PER_DAY: i64 = 86_400; + const MILLIS_PER_DAY: i64 = 86_400_000; + const MICROS_PER_DAY: i64 = 86_400_000_000; + const NANOS_PER_DAY: i64 = 86_400_000_000_000; + + match array.data_type() { + DataType::Duration(TimeUnit::Second) => { + let duration_array = as_duration_second_array(array)?; + let result: Int64Array = duration_array + .iter() + .map(|v| v.map(|val| val / SECONDS_PER_DAY)) + .collect(); + Ok(Arc::new(result)) + } + DataType::Duration(TimeUnit::Millisecond) => { + let duration_array = as_duration_millisecond_array(array)?; + let result: Int64Array = duration_array + .iter() + .map(|v| v.map(|val| val / MILLIS_PER_DAY)) + .collect(); + Ok(Arc::new(result)) + } + DataType::Duration(TimeUnit::Microsecond) => { + let duration_array = as_duration_microsecond_array(array)?; + let result: Int64Array = duration_array + .iter() + .map(|v| v.map(|val| val / MICROS_PER_DAY)) + .collect(); + Ok(Arc::new(result)) + } + DataType::Duration(TimeUnit::Nanosecond) => { + let duration_array = as_duration_nanosecond_array(array)?; + let result: Int64Array = duration_array + .iter() + .map(|v| v.map(|val| val / NANOS_PER_DAY)) + .collect(); + Ok(Arc::new(result)) + } + other => internal_err!("duration_to_days expected Duration type, got: {}", other), + } +} + impl PhysicalExpr for BinaryExpr { /// Return a reference to Any that can be used for downcasting fn as_any(&self) -> &dyn Any { @@ -251,6 +340,11 @@ impl PhysicalExpr for BinaryExpr { match self.op { Operator::Plus if self.fail_on_overflow => return apply(&lhs, &rhs, add), Operator::Plus => return apply(&lhs, &rhs, add_wrapping), + // Special case: Date - Date returns Int64 (days difference) + // This aligns with PostgreSQL, DuckDB, and MySQL behavior + Operator::Minus if is_date_minus_date(&left_data_type, &right_data_type) => { + return apply_date_subtraction(&lhs, &rhs); + } Operator::Minus if self.fail_on_overflow => return apply(&lhs, &rhs, sub), Operator::Minus => return apply(&lhs, &rhs, sub_wrapping), Operator::Multiply if self.fail_on_overflow => return apply(&lhs, &rhs, mul), diff --git a/datafusion/sqllogictest/test_files/datetime/arith_date_date.slt b/datafusion/sqllogictest/test_files/datetime/arith_date_date.slt index f6e4aad78b27c..8eb5cc176f365 100644 --- a/datafusion/sqllogictest/test_files/datetime/arith_date_date.slt +++ b/datafusion/sqllogictest/test_files/datetime/arith_date_date.slt @@ -1,16 +1,15 @@ # date - date → integer # Subtract dates, producing the number of days elapsed # date '2001-10-01' - date '2001-09-28' → 3 +# This aligns with PostgreSQL, DuckDB, and MySQL behavior +# Resolved by: https://github.com/apache/datafusion/issues/19528 -# note that datafusion returns Duration whereas postgres returns an int -# Tracking issue: https://github.com/apache/datafusion/issues/19528 - -query ? +query I SELECT '2001-10-01'::date - '2001-09-28'::date ---- -3 days 0 hours 0 mins 0 secs +3 query T SELECT arrow_typeof('2001-10-01'::date - '2001-09-28'::date) ---- -Duration(s) +Int64 diff --git a/datafusion/sqllogictest/test_files/datetime/dates.slt b/datafusion/sqllogictest/test_files/datetime/dates.slt index 6ba34cfcac03f..d2a7360b120c6 100644 --- a/datafusion/sqllogictest/test_files/datetime/dates.slt +++ b/datafusion/sqllogictest/test_files/datetime/dates.slt @@ -94,13 +94,6 @@ caused by Error during planning: Cannot coerce arithmetic expression Timestamp(ns) + Utf8 to valid types -# DATE minus DATE -# https://github.com/apache/arrow-rs/issues/4383 -query ? -SELECT DATE '2023-04-09' - DATE '2023-04-02'; ----- -7 days 0 hours 0 mins 0 secs - # DATE minus Timestamp query ? SELECT DATE '2023-04-09' - '2000-01-01T00:00:00'::timestamp; @@ -113,17 +106,18 @@ SELECT '2023-01-01T00:00:00'::timestamp - DATE '2021-01-01'; ---- 730 days 0 hours 0 mins 0.000000000 secs -# NULL with DATE arithmetic should yield NULL -query ? +# NULL with DATE arithmetic should yield NULL (but Int64 type) +query I SELECT NULL - DATE '1984-02-28'; ---- NULL -query ? +query I SELECT DATE '1984-02-28' - NULL ---- NULL + # to_date_test statement ok create table to_date_t1(ts bigint) as VALUES