Skip to content

Commit 5efdfe4

Browse files
Return DestClausePrefix.MERGE when TOK_FROM is missing in getMergeDestClausePrefix
1 parent 7f0e7f1 commit 5efdfe4

File tree

5 files changed

+177
-23
lines changed

5 files changed

+177
-23
lines changed

iceberg/iceberg-handler/src/test/queries/negative/merge_with_null_check_on_joining_col.q

Lines changed: 0 additions & 5 deletions
This file was deleted.
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
2+
create table target(a int, b int, c int) stored by iceberg tblproperties('format-version'='2', 'write.merge.mode'='copy-on-write');
3+
create table source(a int, b int, c int) stored by iceberg tblproperties('format-version'='2', 'write.merge.mode'='copy-on-write');
4+
5+
-- empty plan as joining column cannot be null for matched clause
6+
explain cbo
7+
merge into target as t using source as s on t.a = s.a and t.b = s.b
8+
when matched and t.a is null then delete;
9+
10+
explain cbo
11+
merge into target as t using source as s on t.a = s.a and t.b = s.b
12+
when matched and t.a is null then update set b = t.b + 10;
13+
--------------------------------------------------------------------
14+
15+
-- non empty plans for these queries
16+
explain cbo
17+
merge into target as t using source as s on t.a = s.a and t.b = s.b
18+
when not matched and t.a is null then insert values (1, 2, 3);
19+
20+
explain cbo
21+
merge into target as t using source as s on t.a = s.a and t.b = s.b
22+
when matched and t.a is null then delete
23+
when matched then update set b = t.b + 10
24+
when not matched then insert values (1, 2, 3);

iceberg/iceberg-handler/src/test/results/negative/merge_with_null_check_on_joining_col.q.out

Lines changed: 0 additions & 17 deletions
This file was deleted.
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
PREHOOK: query: create table target(a int, b int, c int) stored by iceberg tblproperties('format-version'='2', 'write.merge.mode'='copy-on-write')
2+
PREHOOK: type: CREATETABLE
3+
PREHOOK: Output: database:default
4+
PREHOOK: Output: default@target
5+
POSTHOOK: query: create table target(a int, b int, c int) stored by iceberg tblproperties('format-version'='2', 'write.merge.mode'='copy-on-write')
6+
POSTHOOK: type: CREATETABLE
7+
POSTHOOK: Output: database:default
8+
POSTHOOK: Output: default@target
9+
PREHOOK: query: create table source(a int, b int, c int) stored by iceberg tblproperties('format-version'='2', 'write.merge.mode'='copy-on-write')
10+
PREHOOK: type: CREATETABLE
11+
PREHOOK: Output: database:default
12+
PREHOOK: Output: default@source
13+
POSTHOOK: query: create table source(a int, b int, c int) stored by iceberg tblproperties('format-version'='2', 'write.merge.mode'='copy-on-write')
14+
POSTHOOK: type: CREATETABLE
15+
POSTHOOK: Output: database:default
16+
POSTHOOK: Output: default@source
17+
PREHOOK: query: explain cbo
18+
merge into target as t using source as s on t.a = s.a and t.b = s.b
19+
when matched and t.a is null then delete
20+
PREHOOK: type: QUERY
21+
PREHOOK: Input: _dummy_database@_dummy_table
22+
PREHOOK: Input: default@source
23+
PREHOOK: Input: default@target
24+
PREHOOK: Output: default@target
25+
POSTHOOK: query: explain cbo
26+
merge into target as t using source as s on t.a = s.a and t.b = s.b
27+
when matched and t.a is null then delete
28+
POSTHOOK: type: QUERY
29+
POSTHOOK: Input: _dummy_database@_dummy_table
30+
POSTHOOK: Input: default@source
31+
POSTHOOK: Input: default@target
32+
POSTHOOK: Output: default@target
33+
CBO PLAN:
34+
HiveValues(tuples=[[]])
35+
36+
PREHOOK: query: explain cbo
37+
merge into target as t using source as s on t.a = s.a and t.b = s.b
38+
when matched and t.a is null then update set b = t.b + 10
39+
PREHOOK: type: QUERY
40+
PREHOOK: Input: _dummy_database@_dummy_table
41+
PREHOOK: Input: default@source
42+
PREHOOK: Input: default@target
43+
PREHOOK: Output: default@target
44+
POSTHOOK: query: explain cbo
45+
merge into target as t using source as s on t.a = s.a and t.b = s.b
46+
when matched and t.a is null then update set b = t.b + 10
47+
POSTHOOK: type: QUERY
48+
POSTHOOK: Input: _dummy_database@_dummy_table
49+
POSTHOOK: Input: default@source
50+
POSTHOOK: Input: default@target
51+
POSTHOOK: Output: default@target
52+
CBO PLAN:
53+
HiveValues(tuples=[[]])
54+
55+
PREHOOK: query: -- non empty plans for these queries
56+
explain cbo
57+
merge into target as t using source as s on t.a = s.a and t.b = s.b
58+
when not matched and t.a is null then insert values (1, 2, 3)
59+
PREHOOK: type: QUERY
60+
PREHOOK: Input: default@source
61+
PREHOOK: Input: default@target
62+
PREHOOK: Output: default@target
63+
POSTHOOK: query: -- non empty plans for these queries
64+
explain cbo
65+
merge into target as t using source as s on t.a = s.a and t.b = s.b
66+
when not matched and t.a is null then insert values (1, 2, 3)
67+
POSTHOOK: type: QUERY
68+
POSTHOOK: Input: default@source
69+
POSTHOOK: Input: default@target
70+
POSTHOOK: Output: default@target
71+
CBO PLAN:
72+
HiveProject(t__partition__spec__id=[$0], t__partition__hash=[$1], t__file__path=[$2], t__row__position=[$3], t__partition__projection=[$4], _o__c5=[1], _o__c6=[2], _o__c7=[3])
73+
HiveFilter(condition=[AND(IS NULL($5), IS NULL($6))])
74+
HiveJoin(condition=[AND(=($5, $7), =($6, $8))], joinType=[full], algorithm=[none], cost=[not available])
75+
HiveProject(t__partition__spec__id=[$7], t__partition__hash=[$8], t__file__path=[$9], t__row__position=[$10], t__partition__projection=[$11], t__a=[$0], t__b=[$1])
76+
HiveTableScan(table=[[default, target]], table:alias=[target])
77+
HiveProject(a=[$0], b=[$1])
78+
HiveTableScan(table=[[default, source]], table:alias=[s])
79+
80+
PREHOOK: query: explain cbo
81+
merge into target as t using source as s on t.a = s.a and t.b = s.b
82+
when matched and t.a is null then delete
83+
when matched then update set b = t.b + 10
84+
when not matched then insert values (1, 2, 3)
85+
PREHOOK: type: QUERY
86+
PREHOOK: Input: default@source
87+
PREHOOK: Input: default@target
88+
PREHOOK: Output: default@target
89+
POSTHOOK: query: explain cbo
90+
merge into target as t using source as s on t.a = s.a and t.b = s.b
91+
when matched and t.a is null then delete
92+
when matched then update set b = t.b + 10
93+
when not matched then insert values (1, 2, 3)
94+
POSTHOOK: type: QUERY
95+
POSTHOOK: Input: default@source
96+
POSTHOOK: Input: default@target
97+
POSTHOOK: Output: default@target
98+
CBO PLAN:
99+
HiveUnion(all=[true])
100+
HiveProject(t__partition__spec__id=[$0], t__partition__hash=[$1], t__file__path=[$2], t__row__position=[$3], t__partition__projection=[$4], t__a=[$5], t__b=[+($6, 10)], t__c=[$7])
101+
HiveJoin(condition=[AND(=($5, $8), =($6, $9))], joinType=[inner], algorithm=[none], cost=[not available])
102+
HiveProject(t__partition__spec__id=[$7], t__partition__hash=[$8], t__file__path=[$9], t__row__position=[$10], t__partition__projection=[$11], t__a=[$0], t__b=[$1], t__c=[$2])
103+
HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))])
104+
HiveTableScan(table=[[default, target]], table:alias=[target])
105+
HiveProject(a=[$0], b=[$1])
106+
HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))])
107+
HiveTableScan(table=[[default, source]], table:alias=[s])
108+
HiveProject(t__partition__spec__id=[$0], t__partition__hash=[$1], t__file__path=[$2], t__row__position=[$3], t__partition__projection=[$4], _o__c5=[1], _o__c6=[2], _o__c7=[3])
109+
HiveFilter(condition=[AND(IS NULL($5), IS NULL($6))])
110+
HiveJoin(condition=[AND(=($5, $7), =($6, $8))], joinType=[full], algorithm=[none], cost=[not available])
111+
HiveProject(t__partition__spec__id=[$7], t__partition__hash=[$8], t__file__path=[$9], t__row__position=[$10], t__partition__projection=[$11], t__a=[$0], t__b=[$1])
112+
HiveTableScan(table=[[default, target]], table:alias=[target])
113+
HiveProject(a=[$0], b=[$1])
114+
HiveTableScan(table=[[default, source]], table:alias=[s])
115+
HiveProject(t__partition__spec__id=[$0], t__partition__hash=[$1], t__file__path=[$2], t__row__position=[$3], t__partition__projection=[$4], t__a=[$5], t__b=[$6], t__c=[$7])
116+
HiveSemiJoin(condition=[=($2, $10)], joinType=[semi])
117+
HiveProject(t__partition__spec__id=[$0], t__partition__hash=[$1], t__file__path=[$2], t__row__position=[$3], t__partition__projection=[$4], t__a=[$5], t__b=[$6], t__c=[$7], a=[$8], b=[$9])
118+
HiveFilter(condition=[OR(IS NULL(OR(AND(=($5, $8), =($6, $9)), AND(IS NULL($5), IS NULL($6)))), AND(OR(<>($5, $8), <>($6, $9)), OR(IS NOT NULL($5), IS NOT NULL($6))))])
119+
HiveJoin(condition=[AND(=($5, $8), =($6, $9))], joinType=[left], algorithm=[none], cost=[not available])
120+
HiveProject(t__partition__spec__id=[$7], t__partition__hash=[$8], t__file__path=[$9], t__row__position=[$10], t__partition__projection=[$11], t__a=[$0], t__b=[$1], t__c=[$2])
121+
HiveFilter(condition=[IS NOT NULL($9)])
122+
HiveTableScan(table=[[default, target]], table:alias=[target])
123+
HiveProject(a=[$0], b=[$1])
124+
HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))])
125+
HiveTableScan(table=[[default, source]], table:alias=[s])
126+
HiveProject(t__file__path=[$0])
127+
HiveFilter(condition=[=($1, 1)])
128+
HiveProject(t__file__path=[$0], row_number_window_0=[row_number() OVER (PARTITION BY $0 ORDER BY $0 NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)])
129+
HiveJoin(condition=[AND(=($1, $3), =($2, $4))], joinType=[inner], algorithm=[none], cost=[not available])
130+
HiveProject(t__file__path=[$9], t__a=[$0], t__b=[$1])
131+
HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1), IS NOT NULL($9))])
132+
HiveTableScan(table=[[default, target]], table:alias=[target])
133+
HiveProject(a=[$0], b=[$1])
134+
HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))])
135+
HiveTableScan(table=[[default, source]], table:alias=[s])
136+
HiveProject(t__partition__spec__id=[$0], t__partition__hash=[$1], t__file__path=[$2], _o__c3=[-1:BIGINT], t__partition__projection=[$3], t__a=[$4], t__b=[$5], t__c=[$6])
137+
HiveFilter(condition=[=($7, 1)])
138+
HiveProject(t__partition__spec__id=[$0], t__partition__hash=[$1], t__file__path=[$2], t__partition__projection=[$3], t__a=[$4], t__b=[$5], t__c=[$6], row_number_window_0=[row_number() OVER (PARTITION BY $2 ORDER BY $2 NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)])
139+
HiveJoin(condition=[AND(=($4, $7), =($5, $8))], joinType=[inner], algorithm=[none], cost=[not available])
140+
HiveProject(t__partition__spec__id=[$7], t__partition__hash=[$8], t__file__path=[$9], t__partition__projection=[$11], t__a=[$0], t__b=[$1], t__c=[$2])
141+
HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))])
142+
HiveTableScan(table=[[default, target]], table:alias=[target])
143+
HiveProject(a=[$0], b=[$1])
144+
HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))])
145+
HiveTableScan(table=[[default, source]], table:alias=[s])
146+

ql/src/java/org/apache/hadoop/hive/ql/Context.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -362,7 +362,13 @@ private DestClausePrefix getMergeDestClausePrefix(ASTNode curNode) {
362362
ASTNode query = (ASTNode) insert.getParent();
363363
assert query != null && query.getType() == HiveParser.TOK_QUERY;
364364
ASTNode from = (ASTNode) query.getFirstChildWithType(HiveParser.TOK_FROM);
365-
assert from != null : "Couldn't find a child of type FROM in the AST";
365+
366+
if (from == null) {
367+
// We are here when TOK_FROM is missing from the AST.
368+
// This can happen for merge queries with a predicate like `<joining_column> is null`
369+
// in the matched clause.
370+
return DestClausePrefix.MERGE;
371+
}
366372

367373
int tokFromIdx = from.getChildIndex();
368374
for (int childIdx = tokFromIdx + 1; childIdx < query.getChildCount(); childIdx++) {

0 commit comments

Comments
 (0)