Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions datafusion/expr-common/src/type_coercion/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -260,8 +260,16 @@ impl<'a> BinaryTypeCoercer<'a> {
)
})
}
Minus if is_date_minus_date(lhs, rhs) => {
return Ok(Signature {
lhs: lhs.clone(),
rhs: rhs.clone(),
ret: Int64,
});
}
Plus | Minus | Multiply | Divide | Modulo => {
if let Ok(ret) = self.get_result(lhs, rhs) {

// Temporal arithmetic, e.g. Date32 + Interval
Ok(Signature{
lhs: lhs.clone(),
Expand All @@ -281,6 +289,7 @@ impl<'a> BinaryTypeCoercer<'a> {
ret,
})
} else if let Some(coerced) = temporal_coercion_strict_timezone(lhs, rhs) {

// Temporal arithmetic by first coercing to a common time representation
// e.g. Date32 - Timestamp
let ret = self.get_result(&coerced, &coerced).map_err(|e| {
Expand Down Expand Up @@ -351,6 +360,15 @@ fn is_decimal(data_type: &DataType) -> bool {
)
}

/// Returns true if both operands are Date types (Date32 or Date64)
/// Used to detect Date - Date operations which should return Int64 (days difference)
fn is_date_minus_date(lhs: &DataType, rhs: &DataType) -> bool {
matches!(
(lhs, rhs),
(DataType::Date32, DataType::Date32) | (DataType::Date64, DataType::Date64)
)
}

/// Coercion rules for mathematics operators between decimal and non-decimal types.
fn math_decimal_coercion(
lhs_type: &DataType,
Expand Down
94 changes: 94 additions & 0 deletions datafusion/physical-expr/src/expressions/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ use arrow::datatypes::*;
use arrow::error::ArrowError;
use datafusion_common::cast::as_boolean_array;
use datafusion_common::{Result, ScalarValue, internal_err, not_impl_err};

use datafusion_expr::binary::BinaryTypeCoercer;
use datafusion_expr::interval_arithmetic::{Interval, apply_operator};
use datafusion_expr::sort_properties::ExprProperties;
Expand Down Expand Up @@ -162,6 +163,94 @@ fn boolean_op(
op(ll, rr).map(|t| Arc::new(t) as _)
}

/// Returns true if both operands are Date types (Date32 or Date64)
/// Used to detect Date - Date operations which should return Int64 (days difference)
fn is_date_minus_date(lhs: &DataType, rhs: &DataType) -> bool {
matches!(
(lhs, rhs),
(DataType::Date32, DataType::Date32) | (DataType::Date64, DataType::Date64)
)
}

/// Computes the difference between two dates and returns the result as Int64 (days)
/// This aligns with PostgreSQL, DuckDB, and MySQL behavior where date - date returns an integer
///
/// Implementation: Uses Arrow's sub_wrapping to get Duration, then converts to Int64 days
fn apply_date_subtraction(
lhs: &ColumnarValue,
rhs: &ColumnarValue,
) -> Result<ColumnarValue> {
use arrow::compute::kernels::numeric::sub_wrapping;

// Use Arrow's sub_wrapping to compute the Duration result
let duration_result = apply(lhs, rhs, sub_wrapping)?;

// Convert Duration to Int64 (days)
match duration_result {
ColumnarValue::Array(array) => {
let int64_array = duration_to_days(&array)?;
Ok(ColumnarValue::Array(int64_array))
}
ColumnarValue::Scalar(scalar) => {
// Convert scalar Duration to Int64 days
let array = scalar.to_array_of_size(1)?;
let int64_array = duration_to_days(&array)?;
let int64_scalar = ScalarValue::try_from_array(int64_array.as_ref(), 0)?;
Ok(ColumnarValue::Scalar(int64_scalar))
}
}
}

/// Converts a Duration array to Int64 days
/// Handles different Duration time units (Second, Millisecond, Microsecond, Nanosecond)
fn duration_to_days(array: &ArrayRef) -> Result<ArrayRef> {
use datafusion_common::cast::{
as_duration_microsecond_array, as_duration_millisecond_array,
as_duration_nanosecond_array, as_duration_second_array,
};

const SECONDS_PER_DAY: i64 = 86_400;
const MILLIS_PER_DAY: i64 = 86_400_000;
const MICROS_PER_DAY: i64 = 86_400_000_000;
const NANOS_PER_DAY: i64 = 86_400_000_000_000;

match array.data_type() {
DataType::Duration(TimeUnit::Second) => {
let duration_array = as_duration_second_array(array)?;
let result: Int64Array = duration_array
.iter()
.map(|v| v.map(|val| val / SECONDS_PER_DAY))
.collect();
Ok(Arc::new(result))
}
DataType::Duration(TimeUnit::Millisecond) => {
let duration_array = as_duration_millisecond_array(array)?;
let result: Int64Array = duration_array
.iter()
.map(|v| v.map(|val| val / MILLIS_PER_DAY))
.collect();
Ok(Arc::new(result))
}
DataType::Duration(TimeUnit::Microsecond) => {
let duration_array = as_duration_microsecond_array(array)?;
let result: Int64Array = duration_array
.iter()
.map(|v| v.map(|val| val / MICROS_PER_DAY))
.collect();
Ok(Arc::new(result))
}
DataType::Duration(TimeUnit::Nanosecond) => {
let duration_array = as_duration_nanosecond_array(array)?;
let result: Int64Array = duration_array
.iter()
.map(|v| v.map(|val| val / NANOS_PER_DAY))
.collect();
Ok(Arc::new(result))
}
other => internal_err!("duration_to_days expected Duration type, got: {}", other),
}
}

impl PhysicalExpr for BinaryExpr {
/// Return a reference to Any that can be used for downcasting
fn as_any(&self) -> &dyn Any {
Expand Down Expand Up @@ -251,6 +340,11 @@ impl PhysicalExpr for BinaryExpr {
match self.op {
Operator::Plus if self.fail_on_overflow => return apply(&lhs, &rhs, add),
Operator::Plus => return apply(&lhs, &rhs, add_wrapping),
// Special case: Date - Date returns Int64 (days difference)
// This aligns with PostgreSQL, DuckDB, and MySQL behavior
Operator::Minus if is_date_minus_date(&left_data_type, &right_data_type) => {
return apply_date_subtraction(&lhs, &rhs);
}
Operator::Minus if self.fail_on_overflow => return apply(&lhs, &rhs, sub),
Operator::Minus => return apply(&lhs, &rhs, sub_wrapping),
Operator::Multiply if self.fail_on_overflow => return apply(&lhs, &rhs, mul),
Expand Down
11 changes: 5 additions & 6 deletions datafusion/sqllogictest/test_files/datetime/arith_date_date.slt
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
# date - date → integer
# Subtract dates, producing the number of days elapsed
# date '2001-10-01' - date '2001-09-28' → 3
# This aligns with PostgreSQL, DuckDB, and MySQL behavior
# Resolved by: https://github.com/apache/datafusion/issues/19528

# note that datafusion returns Duration whereas postgres returns an int
# Tracking issue: https://github.com/apache/datafusion/issues/19528

query ?
query I
SELECT '2001-10-01'::date - '2001-09-28'::date
----
3 days 0 hours 0 mins 0 secs
3

query T
SELECT arrow_typeof('2001-10-01'::date - '2001-09-28'::date)
----
Duration(s)
Int64
14 changes: 4 additions & 10 deletions datafusion/sqllogictest/test_files/datetime/dates.slt
Original file line number Diff line number Diff line change
Expand Up @@ -94,13 +94,6 @@ caused by
Error during planning: Cannot coerce arithmetic expression Timestamp(ns) + Utf8 to valid types


# DATE minus DATE
# https://github.com/apache/arrow-rs/issues/4383
query ?
SELECT DATE '2023-04-09' - DATE '2023-04-02';
----
7 days 0 hours 0 mins 0 secs

# DATE minus Timestamp
query ?
SELECT DATE '2023-04-09' - '2000-01-01T00:00:00'::timestamp;
Expand All @@ -113,17 +106,18 @@ SELECT '2023-01-01T00:00:00'::timestamp - DATE '2021-01-01';
----
730 days 0 hours 0 mins 0.000000000 secs

# NULL with DATE arithmetic should yield NULL
query ?
# NULL with DATE arithmetic should yield NULL (but Int64 type)
query I
SELECT NULL - DATE '1984-02-28';
----
NULL

query ?
query I
SELECT DATE '1984-02-28' - NULL
----
NULL


# to_date_test
statement ok
create table to_date_t1(ts bigint) as VALUES
Expand Down