Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 3 additions & 19 deletions datafusion/expr/src/expr_schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ use arrow::datatypes::{DataType, Field, FieldRef};
use datafusion_common::datatype::FieldExt;
use datafusion_common::metadata::FieldMetadata;
use datafusion_common::{
Column, DataFusionError, ExprSchema, Result, ScalarValue, Spans, TableReference,
not_impl_err, plan_datafusion_err, plan_err,
Column, ExprSchema, Result, ScalarValue, Spans, TableReference, not_impl_err,
plan_err,
};
use datafusion_expr_common::type_coercion::binary::BinaryTypeCoercer;
use datafusion_functions_window_common::field::WindowUDFFieldArgs;
Expand Down Expand Up @@ -628,23 +628,7 @@ fn verify_function_arguments<F: UDFCoercionExt>(
input_fields: &[FieldRef],
) -> Result<Vec<FieldRef>> {
fields_with_udf(input_fields, function).map_err(|err| {
let data_types = input_fields
.iter()
.map(|f| f.data_type())
.cloned()
.collect::<Vec<_>>();
plan_datafusion_err!(
"{} {}",
match err {
DataFusionError::Plan(msg) => msg,
err => err.to_string(),
},
utils::generate_signature_error_message(
function.name(),
function.signature(),
&data_types
)
)
utils::generate_signature_error_message(function, input_fields, err)
})
}

Expand Down
16 changes: 5 additions & 11 deletions datafusion/expr/src/type_coercion/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ use datafusion_common::utils::{
ListCoercion, base_type, coerced_fixed_size_list_to_list,
};
use datafusion_common::{
Result, exec_err, internal_err, plan_err, types::NativeType, utils::list_ndims,
Result, internal_err, plan_err, types::NativeType, utils::list_ndims,
};
use datafusion_expr_common::signature::ArrayFunctionArgument;
use datafusion_expr_common::type_coercion::binary::type_union_resolution;
Expand Down Expand Up @@ -313,16 +313,10 @@ fn get_valid_types_with_udf<F: UDFCoercionExt>(
func: &F,
) -> Result<Vec<Vec<DataType>>> {
let valid_types = match signature {
TypeSignature::UserDefined => match func.coerce_types(current_types) {
Ok(coerced_types) => vec![coerced_types],
Err(e) => {
return exec_err!(
"Function '{}' user-defined coercion failed with {:?}",
func.name(),
e.strip_backtrace()
);
}
},
TypeSignature::UserDefined => {
let coerced_types = func.coerce_types(current_types)?;
vec![coerced_types]
}
TypeSignature::OneOf(signatures) => {
let mut res = vec![];
let mut errors = vec![];
Expand Down
131 changes: 104 additions & 27 deletions datafusion/expr/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,20 @@ use std::sync::Arc;

use crate::expr::{Alias, Sort, WildcardOptions, WindowFunctionParams};
use crate::expr_rewriter::strip_outer_reference;
use crate::type_coercion::functions::UDFCoercionExt;
use crate::{
BinaryExpr, Expr, ExprSchemable, Filter, GroupingSet, LogicalPlan, Operator, and,
};
use datafusion_expr_common::signature::{Signature, TypeSignature};

use arrow::datatypes::{DataType, Field, Schema};
use arrow::datatypes::{DataType, Field, FieldRef, Schema};
use datafusion_common::tree_node::{
Transformed, TransformedResult, TreeNode, TreeNodeRecursion,
};
use datafusion_common::utils::get_at_indices;
use datafusion_common::{
Column, DFSchema, DFSchemaRef, HashMap, Result, TableReference, internal_err,
plan_err,
Column, DFSchema, DFSchemaRef, DataFusionError, HashMap, Result, TableReference,
internal_err, plan_datafusion_err, plan_err,
};

#[cfg(not(feature = "sql"))]
Expand Down Expand Up @@ -971,13 +972,57 @@ pub fn generate_signature_error_msg(
/// round(Float64)
/// round(Float32)
/// ```
pub(crate) fn generate_signature_error_message(
func_name: &str,
func_signature: &Signature,
input_expr_types: &[DataType],
) -> String {
#[expect(deprecated)]
generate_signature_error_msg(func_name, func_signature.clone(), input_expr_types)
pub(crate) fn generate_signature_error_message<F: UDFCoercionExt>(
function: &F,
input_fields: &[FieldRef],
original_error: DataFusionError,
) -> DataFusionError {
let data_types = input_fields
.iter()
.map(|f| f.data_type())
.cloned()
.collect::<Vec<_>>();
let function_call_str = format!(
"{}({})",
function.name(),
TypeSignature::join_types(&data_types, ", "),
);

// UserDefined signatures are better served returning the original error, since
// we can't know what went wrong in the custom code
if function.signature().type_signature == TypeSignature::UserDefined {
let original_error = match original_error {
// Since we're returning a Plan error we don't want any double nesting.
// TODO: is there a better way to strip backtrace & the planning prefix?
err @ DataFusionError::Plan(_) => err
.strip_backtrace()
.strip_prefix("Error during planning: ")
.unwrap()
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

strip_prefix("Error during planning: ").unwrap() can panic if the string format ever differs (or if the prefix is already stripped), turning an error-reporting path into a crash. Consider avoiding unwrap here (e.g., extract the inner Plan message directly) so planning failures remain non-panicking.

Fix This in Augment

🤖 Was this useful? React with 👍 or 👎, or 🚀 if it prevented an incident/outage.

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

value:useful; category:bug; feedback: The Augment AI reviewer is correct! Error reporting should not cause crashes, ever. It is good to make a an attempt to cleanup the message but it should be in a safer way, as the proposed change.

.to_string(),
Comment on lines +997 to +1001
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The use of .unwrap() here could cause a panic if the prefix is not found in the error string. While it seems safe with the current implementation of DataFusionError::Plan, it's better to avoid unwrap() in production code to make it more robust against future changes. The TODO comment also indicates this might be a concern. I suggest handling the None case gracefully.

            err @ DataFusionError::Plan(_) => {
                let s = err.strip_backtrace();
                if let Some(unprefixed) = s.strip_prefix("Error during planning: ") {
                    unprefixed.to_string()
                } else {
                    s
                }
            },

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

value:useful; category:bug; feedback: The Gemini AI reviewer is correct! Error reporting should not cause crashes, ever. It is good to make a an attempt to cleanup the message but it should be in a safer way, as the proposed change.

err => err.strip_backtrace(),
Comment on lines +994 to +1002
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Avoid potential panic when stripping the planning prefix.

strip_prefix(...).unwrap() will panic if the prefix is absent (e.g., a DataFusionError::Plan carrying a raw message without the standard prefix). Safer to fall back to the unstripped message.

🔧 Suggested fix
-        let original_error = match original_error {
-            // Since we're returning a Plan error we don't want any double nesting.
-            // TODO: is there a better way to strip backtrace & the planning prefix?
-            err @ DataFusionError::Plan(_) => err
-                .strip_backtrace()
-                .strip_prefix("Error during planning: ")
-                .unwrap()
-                .to_string(),
-            err => err.strip_backtrace(),
-        };
+        let original_error = match original_error {
+            // Since we're returning a Plan error we don't want any double nesting.
+            // TODO: is there a better way to strip backtrace & the planning prefix?
+            err @ DataFusionError::Plan(_) => {
+                let msg = err.strip_backtrace();
+                msg.strip_prefix("Error during planning: ")
+                    .unwrap_or(&msg)
+                    .to_string()
+            }
+            err => err.strip_backtrace(),
+        };
🤖 Prompt for AI Agents
In `@datafusion/expr/src/utils.rs` around lines 994 - 1002, The match arm handling
DataFusionError::Plan can panic because strip_prefix(...).unwrap() assumes the
prefix exists; update the DataFusionError::Plan branch (where original_error is
matched) to call strip_backtrace(), then attempt strip_prefix("Error during
planning: ") safely by using map/and_then or an if let to take the stripped
result when present and otherwise fall back to the unstripped string (i.e.,
replace the .strip_prefix(...).unwrap().to_string() with logic that uses
.strip_prefix(...).map(|s| s.to_string()).unwrap_or_else(|| <original stripped
value>.to_string())). Keep the other branch using err.strip_backtrace()
unchanged and ensure the final original_error remains a String.

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

value:useful; category:bug; feedback: The CodeRabbit AI reviewer is correct! Error reporting should not cause crashes, ever. It is good to make a an attempt to cleanup the message but it should be in a safer way, as the proposed change.

};
return plan_datafusion_err!(
"User-defined coercion of function call '{function_call_str}' failed with:\n{original_error}"
);
}

// For other signature types, we don't rely on what specifically went wrong;
// we will only show the valid signatures to better display what is allowed
// (instead of enumerating everything that potentially went wrong).
drop(original_error);

let candidate_signatures = function
.signature()
.type_signature
.to_string_repr_with_names(function.signature().parameter_names.as_deref())
.iter()
.map(|args_str| format!("\t{}({args_str})", function.name()))
.collect::<Vec<String>>()
.join("\n");

plan_datafusion_err!(
"Failed to coerce function call '{function_call_str}'. You might need to add explicit type casts.\n\tCandidate functions:\n{candidate_signatures}"
)
}

/// Splits a conjunctive [`Expr`] such as `A AND B AND C` => `[A, B, C]`
Expand Down Expand Up @@ -1735,6 +1780,35 @@ mod tests {
assert!(!can_hash(&list_union_type));
}

struct MockUdf(Signature);

impl UDFCoercionExt for MockUdf {
fn name(&self) -> &str {
"mock"
}

fn signature(&self) -> &Signature {
&self.0
}

fn coerce_types(&self, _arg_types: &[DataType]) -> Result<Vec<DataType>> {
unimplemented!()
}
}

#[test]
fn test_generate_signature_error_msg_user_defined() {
let error = generate_signature_error_message(
&MockUdf(Signature::user_defined(Volatility::Immutable)),
&[Field::new("name", DataType::Int32, true).into()],
DataFusionError::Plan("Expected 'mock' to fail".to_string()),
);

let expected = "Error during planning: User-defined coercion of function call 'mock(Int32)' failed with:
Expected 'mock' to fail";
assert!(error.to_string().starts_with(expected));
}

#[test]
fn test_generate_signature_error_msg_with_parameter_names() {
let sig = Signature::one_of(
Expand All @@ -1755,18 +1829,17 @@ mod tests {
])
.expect("valid parameter names");

// Generate error message with only 1 argument provided
let error_msg =
generate_signature_error_message("substr", &sig, &[DataType::Utf8]);

assert!(
error_msg.contains("str: Utf8, start_pos: Int64"),
"Expected 'str: Utf8, start_pos: Int64' in error message, got: {error_msg}"
);
assert!(
error_msg.contains("str: Utf8, start_pos: Int64, length: Int64"),
"Expected 'str: Utf8, start_pos: Int64, length: Int64' in error message, got: {error_msg}"
let error = generate_signature_error_message(
&MockUdf(sig),
&[Field::new("name", DataType::Utf8, true).into()],
DataFusionError::Plan("".to_string()),
);

let expected = "Error during planning: Failed to coerce function call 'mock(Utf8)'. You might need to add explicit type casts.
\tCandidate functions:
\tmock(str: Utf8, start_pos: Int64)
\tmock(str: Utf8, start_pos: Int64, length: Int64)";
assert!(error.to_string().starts_with(expected));
}

#[test]
Expand All @@ -1776,12 +1849,16 @@ mod tests {
Volatility::Immutable,
);

let error_msg =
generate_signature_error_message("my_func", &sig, &[DataType::Int32]);

assert!(
error_msg.contains("Any, Any"),
"Expected 'Any, Any' without parameter names, got: {error_msg}"
let error = generate_signature_error_message(
&MockUdf(sig),
&[Field::new("name", DataType::Int32, true).into()],
DataFusionError::Plan("".to_string()),
);

let expected = "Error during planning: Failed to coerce function call 'mock(Int32)'. You might need to add explicit type casts.
\tCandidate functions:
\tmock(Any, Any)
\tmock(Any, Any, Any)";
assert!(error.to_string().starts_with(expected));
}
}
4 changes: 2 additions & 2 deletions datafusion/optimizer/src/analyzer/type_coercion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1830,7 +1830,7 @@ mod test {

let err = Projection::try_new(vec![udaf], empty).err().unwrap();
assert!(
err.strip_backtrace().starts_with("Error during planning: Failed to coerce arguments to satisfy a call to 'MY_AVG' function: coercion from Utf8 to the signature Uniform(1, [Float64]) failed")
err.strip_backtrace().starts_with("Error during planning: Failed to coerce function call 'MY_AVG(Utf8)'. You might need to add explicit type casts.\n\tCandidate functions:\n\tMY_AVG(Float64)")
);
Ok(())
}
Expand Down Expand Up @@ -1891,7 +1891,7 @@ mod test {
.err()
.unwrap()
.strip_backtrace();
assert!(err.starts_with("Error during planning: Failed to coerce arguments to satisfy a call to 'avg' function: coercion from Utf8 to the signature Uniform(1, [Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float32, Float64]) failed"));
assert!(err.starts_with("Error during planning: Failed to coerce function call 'avg(Utf8)'. You might need to add explicit type casts.\n\tCandidate functions:\n\tavg(Int8/Int16/Int32/Int64/UInt8/UInt16/UInt32/UInt64/Float32/Float64)"));
Ok(())
}

Expand Down
15 changes: 9 additions & 6 deletions datafusion/sql/tests/sql_integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4744,41 +4744,44 @@ fn error_message_test(sql: &str, err_msg_starts_with: &str) {
fn test_error_message_invalid_scalar_function_signature() {
error_message_test(
"select sqrt()",
"Error during planning: 'sqrt' does not support zero arguments",
"Error during planning: Failed to coerce function call 'sqrt()'",
);
error_message_test(
"select sqrt(1, 2)",
"Error during planning: Failed to coerce arguments",
"Error during planning: Failed to coerce function call 'sqrt(Int64, Int64)'",
);
}

#[test]
fn test_error_message_invalid_aggregate_function_signature() {
error_message_test(
"select sum()",
"Error during planning: Execution error: Function 'sum' user-defined coercion failed with \"Execution error: sum function requires 1 argument, got 0\"",
"Error during planning: User-defined coercion of function call 'sum()' failed with:
Execution error: sum function requires 1 argument, got 0",
);
// We keep two different prefixes because they clarify each other.
// It might be incorrect, and we should consider keeping only one.
error_message_test(
"select max(9, 3)",
"Error during planning: Execution error: Function 'max' user-defined coercion failed",
"Error during planning: User-defined coercion of function call 'max(Int64, Int64)' failed with:
Execution error: min/max was called with 2 arguments. It requires only 1.",
);
}

#[test]
fn test_error_message_invalid_window_function_signature() {
error_message_test(
"select rank(1) over()",
"Error during planning: The function 'rank' expected zero argument but received 1",
"Error during planning: Failed to coerce function call 'rank(Int64)'",
);
}

#[test]
fn test_error_message_invalid_window_aggregate_function_signature() {
error_message_test(
"select sum() over()",
"Error during planning: Execution error: Function 'sum' user-defined coercion failed with \"Execution error: sum function requires 1 argument, got 0\"",
"Error during planning: User-defined coercion of function call 'sum()' failed with:
Execution error: sum function requires 1 argument, got 0",
);
}

Expand Down
12 changes: 6 additions & 6 deletions datafusion/sqllogictest/test_files/aggregate.slt
Original file line number Diff line number Diff line change
Expand Up @@ -144,26 +144,26 @@ statement error DataFusion error: Schema error: Schema contains duplicate unqual
SELECT approx_distinct(c9) count_c9, approx_distinct(cast(c9 as varchar)) count_c9_str FROM aggregate_test_100

# csv_query_approx_percentile_cont_with_weight
statement error Failed to coerce arguments to satisfy a call to 'approx_percentile_cont_with_weight' function
statement error Failed to coerce function call 'approx_percentile_cont_with_weight\(Utf8View, Int8, Float64\)'
SELECT approx_percentile_cont_with_weight(c2, 0.95) WITHIN GROUP (ORDER BY c1) FROM aggregate_test_100

statement error Failed to coerce arguments to satisfy a call to 'approx_percentile_cont_with_weight' function
statement error Failed to coerce function call 'approx_percentile_cont_with_weight\(Int16, Utf8View, Float64\)'
SELECT approx_percentile_cont_with_weight(c1, 0.95) WITHIN GROUP (ORDER BY c3) FROM aggregate_test_100

statement error Failed to coerce arguments to satisfy a call to 'approx_percentile_cont_with_weight' function
statement error Failed to coerce function call 'approx_percentile_cont_with_weight\(Int16, Int8, Utf8View\)'
SELECT approx_percentile_cont_with_weight(c2, c1) WITHIN GROUP (ORDER BY c3) FROM aggregate_test_100

# csv_query_approx_percentile_cont_with_histogram_bins
statement error DataFusion error: Error during planning: Tdigest max_size value for 'APPROX_PERCENTILE_CONT' must be UInt > 0 literal \(got data type Int64\)\.
SELECT c1, approx_percentile_cont(0.95, -1000) WITHIN GROUP (ORDER BY c3) AS c3_p95 FROM aggregate_test_100 GROUP BY 1 ORDER BY 1

statement error Failed to coerce arguments to satisfy a call to 'approx_percentile_cont' function
statement error Failed to coerce function call 'approx_percentile_cont\(Int16, Float64, Utf8View\)'
SELECT approx_percentile_cont(0.95, c1) WITHIN GROUP (ORDER BY c3) FROM aggregate_test_100

statement error DataFusion error: Error during planning: Failed to coerce arguments to satisfy a call to 'approx_percentile_cont' function: coercion from Int16, Float64, Float64 to the signature OneOf(.*) failed(.|\n)*
statement error Failed to coerce function call 'approx_percentile_cont\(Int16, Float64, Float64\)'
SELECT approx_percentile_cont(0.95, 111.1) WITHIN GROUP (ORDER BY c3) FROM aggregate_test_100

statement error DataFusion error: Error during planning: Failed to coerce arguments to satisfy a call to 'approx_percentile_cont' function: coercion from Float64, Float64, Float64 to the signature OneOf(.*) failed(.|\n)*
statement error Failed to coerce function call 'approx_percentile_cont\(Float64, Float64, Float64\)'
SELECT approx_percentile_cont(0.95, 111.1) WITHIN GROUP (ORDER BY c12) FROM aggregate_test_100

statement error DataFusion error: Error during planning: Percentile value for 'APPROX_PERCENTILE_CONT' must be a literal
Expand Down
Loading