Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 23 additions & 2 deletions datafusion/sql/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,24 @@ impl RecursiveUnnestRewriter<'_> {
.collect()
}

/// Check if the current expression is at the root level for struct unnest purposes.
/// This is true if:
/// 1. The expression IS the root expression, OR
/// 2. The root expression is an Alias wrapping this expression
///
/// This allows `unnest(struct_col) AS alias` to work, where the alias is simply
/// ignored for struct unnest (matching DuckDB behavior).
fn is_at_struct_allowed_root(&self, expr: &Expr) -> bool {
if expr == self.root_expr {
return true;
}
// Allow struct unnest when root is an alias wrapping the unnest
if let Expr::Alias(Alias { expr: inner, .. }) = self.root_expr {
return inner.as_ref() == expr;
}
false
}

fn transform(
&mut self,
level: usize,
Expand Down Expand Up @@ -566,15 +584,18 @@ impl TreeNodeRewriter for RecursiveUnnestRewriter<'_> {
// instead of unnest(struct_arr_col, depth = 2)

let unnest_recursion = unnest_stack.len();
let struct_allowed = (&expr == self.root_expr) && unnest_recursion == 1;
let struct_allowed =
self.is_at_struct_allowed_root(&expr) && unnest_recursion == 1;

let mut transformed_exprs = self.transform(
unnest_recursion,
expr.schema_name().to_string(),
inner_expr,
struct_allowed,
)?;
if struct_allowed {
// Only set transformed_root_exprs for struct unnest (which returns multiple expressions).
// For list unnest (single expression), we let the normal rewrite handle the alias.
if struct_allowed && transformed_exprs.len() > 1 {
self.transformed_root_exprs = Some(transformed_exprs.clone());
}
return Ok(Transformed::new(
Expand Down
30 changes: 28 additions & 2 deletions datafusion/sqllogictest/test_files/unnest.slt
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,20 @@ select unnest(struct(1,2,3));
----
1 2 3

## Basic unnest expression in select struct with alias (alias is ignored for struct unnest)
query III
select unnest(struct(1,2,3)) as ignored_alias;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you also please add a test for the output schema. Perhaps using describe?

Someting like

 describe select unnest(struct(1,2,3));
+-------------------------------------------------------------+-----------+-------------+
| column_name                                                 | data_type | is_nullable |
+-------------------------------------------------------------+-----------+-------------+
| __unnest_placeholder(struct(Int64(1),Int64(2),Int64(3))).c0 | Int64     | YES         |
| __unnest_placeholder(struct(Int64(1),Int64(2),Int64(3))).c1 | Int64     | YES         |
| __unnest_placeholder(struct(Int64(1),Int64(2),Int64(3))).c2 | Int64     | YES         |
+-------------------------------------------------------------+-----------+-------------+
3 row(s) fetched.
Elapsed 0.007 seconds.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, sounds good. Thanks for the feedback.

----
1 2 3

## Verify schema output for struct unnest with alias (alias is ignored)
query TTT
describe select unnest(struct(1,2,3)) as ignored_alias;
----
__unnest_placeholder(struct(Int64(1),Int64(2),Int64(3))).c0 Int64 YES
__unnest_placeholder(struct(Int64(1),Int64(2),Int64(3))).c1 Int64 YES
__unnest_placeholder(struct(Int64(1),Int64(2),Int64(3))).c2 Int64 YES

## Basic unnest list expression in from clause
query I
select * from unnest([1,2,3]);
Expand Down Expand Up @@ -798,9 +812,21 @@ NULL 1
query error DataFusion error: Error during planning: Column in SELECT must be in GROUP BY or an aggregate function: While expanding wildcard, column "nested_unnest_table\.column1" must appear in the GROUP BY clause or must be part of an aggregate function, currently only "UNNEST\(nested_unnest_table\.column1\)\[c0\]" appears in the SELECT clause satisfies this requirement
select unnest(column1) c1 from nested_unnest_table group by c1.c0;

# TODO: this query should work. see issue: https://github.com/apache/datafusion/issues/12794
query error DataFusion error: Internal error: Assertion failed: struct_allowed: unnest on struct can only be applied at the root level of select expression
## Unnest struct with alias - alias is ignored (same as DuckDB behavior)
## See: https://github.com/apache/datafusion/issues/12794
query TT?
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Likewise here, it would be great to test the schema output

describe select unnest(column1) c1 from nested_unnest_table;

select unnest(column1) c1 from nested_unnest_table
----
a b {c0: c}
d e {c0: f}

## Verify schema output for struct unnest with alias (alias is ignored)
query TTT
describe select unnest(column1) c1 from nested_unnest_table;
----
__unnest_placeholder(nested_unnest_table.column1).c0 Utf8 YES
__unnest_placeholder(nested_unnest_table.column1).c1 Utf8 YES
__unnest_placeholder(nested_unnest_table.column1).c2 Struct("c0": Utf8) YES

query II??I??
select unnest(column5), * from unnest_table;
Expand Down