-
Notifications
You must be signed in to change notification settings - Fork 76
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: support parsing of SQL queries with APPLY (#106)
* feat: support parsing of SQL queries with APPLY This change adds support for parsing of SQL queries with APPLY (join with correlated subquery), and to build OuterReferences map of correlated variables present in the query's join predicates. The OuterRefs will be used while constructing Substrait plans to bind correlated variables. The change also adds few example queries which depend on APPLY / LATERAL operators. This change still does not map calcite-correlated-join to Substrait, as the spec for APPLY is still not approved. As such, while the parsing of calcite query plans will succeed after this change, the unit tests and run time conversion will continue to fail in the final step of building the Substrait plan. Additional changes are needed to support APPLY. Refs #substrait-io/substrait/issues/357 * fix: unit test cases to validate correlated vars This change addresses review comments, the unit tests validate the outer reference map built from calcite plans of APPLY queries. * fix: add test for nested APPLY This change addresses review comments. A new test case to validate nested APPLY join parsing is added. Also added validation of depth information in existing tests.
- Loading branch information
Showing
6 changed files
with
247 additions
and
21 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
173 changes: 173 additions & 0 deletions
173
isthmus/src/test/java/io/substrait/isthmus/ApplyJoinPlanTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,173 @@ | ||
package io.substrait.isthmus; | ||
|
||
import java.util.Map; | ||
import org.apache.calcite.adapter.tpcds.TpcdsSchema; | ||
import org.apache.calcite.rel.RelRoot; | ||
import org.apache.calcite.rex.RexFieldAccess; | ||
import org.apache.calcite.sql.parser.SqlParseException; | ||
import org.apache.calcite.sql.parser.SqlParser; | ||
import org.apache.calcite.sql.validate.SqlConformanceEnum; | ||
import org.junit.jupiter.api.Assertions; | ||
import org.junit.jupiter.api.Test; | ||
|
||
public class ApplyJoinPlanTest { | ||
|
||
private static RelRoot getCalcitePlan(SqlToSubstrait s, TpcdsSchema schema, String sql) | ||
throws SqlParseException { | ||
var pair = s.registerSchema("tpcds", schema); | ||
var converter = s.createSqlToRelConverter(pair.left, pair.right); | ||
SqlParser parser = SqlParser.create(sql, s.parserConfig); | ||
var root = s.getBestExpRelRoot(converter, parser.parseQuery()); | ||
return root; | ||
} | ||
|
||
private static void validateOuterRef( | ||
Map<RexFieldAccess, Integer> fieldAccessDepthMap, String refName, String colName, int depth) { | ||
var entry = | ||
fieldAccessDepthMap.entrySet().stream() | ||
.filter(f -> f.getKey().getReferenceExpr().toString().equals(refName)) | ||
.filter(f -> f.getKey().getField().getName().equals(colName)) | ||
.filter(f -> f.getValue() == depth) | ||
.findFirst(); | ||
Assertions.assertTrue(entry.isPresent()); | ||
} | ||
|
||
private static Map<RexFieldAccess, Integer> buildOuterFieldRefMap(RelRoot root) { | ||
final OuterReferenceResolver resolver = new OuterReferenceResolver(); | ||
var fieldAccessDepthMap = resolver.getFieldAccessDepthMap(); | ||
Assertions.assertEquals(0, fieldAccessDepthMap.size()); | ||
resolver.apply(root.rel); | ||
return fieldAccessDepthMap; | ||
} | ||
|
||
@Test | ||
public void lateralJoinQuery() throws SqlParseException { | ||
TpcdsSchema schema = new TpcdsSchema(1.0); | ||
String sql; | ||
sql = | ||
""" | ||
SELECT ss_sold_date_sk, ss_item_sk, ss_customer_sk | ||
FROM store_sales CROSS JOIN LATERAL | ||
(select i_item_sk from item where item.i_item_sk = store_sales.ss_item_sk)"""; | ||
|
||
/* the calcite plan for the above query is: | ||
LogicalProject(SS_SOLD_DATE_SK=[$0], SS_ITEM_SK=[$2], SS_CUSTOMER_SK=[$3]) | ||
LogicalCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{2}]) | ||
LogicalTableScan(table=[[tpcds, STORE_SALES]]) | ||
LogicalProject(I_ITEM_SK=[$0]) | ||
LogicalFilter(condition=[=($0, $cor0.SS_ITEM_SK)]) | ||
LogicalTableScan(table=[[tpcds, ITEM]]) | ||
*/ | ||
|
||
// validate outer reference map | ||
RelRoot root = getCalcitePlan(new SqlToSubstrait(), schema, sql); | ||
Map<RexFieldAccess, Integer> fieldAccessDepthMap = buildOuterFieldRefMap(root); | ||
Assertions.assertEquals(1, fieldAccessDepthMap.size()); | ||
validateOuterRef(fieldAccessDepthMap, "$cor0", "SS_ITEM_SK", 1); | ||
|
||
// TODO validate end to end conversion | ||
var sE2E = new SqlToSubstrait(); | ||
Assertions.assertThrows( | ||
UnsupportedOperationException.class, | ||
() -> sE2E.execute(sql, "tpcds", schema), | ||
"Lateral join is not supported"); | ||
} | ||
|
||
@Test | ||
public void outerApplyQuery() throws SqlParseException { | ||
TpcdsSchema schema = new TpcdsSchema(1.0); | ||
String sql; | ||
sql = | ||
""" | ||
SELECT ss_sold_date_sk, ss_item_sk, ss_customer_sk | ||
FROM store_sales OUTER APPLY | ||
(select i_item_sk from item where item.i_item_sk = store_sales.ss_item_sk)"""; | ||
|
||
FeatureBoard featureBoard = | ||
ImmutableFeatureBoard.builder() | ||
.sqlConformanceMode(SqlConformanceEnum.SQL_SERVER_2008) | ||
.build(); | ||
SqlToSubstrait s = new SqlToSubstrait(featureBoard); | ||
RelRoot root = getCalcitePlan(s, schema, sql); | ||
|
||
Map<RexFieldAccess, Integer> fieldAccessDepthMap = buildOuterFieldRefMap(root); | ||
Assertions.assertEquals(1, fieldAccessDepthMap.size()); | ||
validateOuterRef(fieldAccessDepthMap, "$cor0", "SS_ITEM_SK", 1); | ||
|
||
// TODO validate end to end conversion | ||
Assertions.assertThrows( | ||
UnsupportedOperationException.class, | ||
() -> s.execute(sql, "tpcds", schema), | ||
"APPLY is not supported"); | ||
} | ||
|
||
@Test | ||
public void nestedApplyJoinQuery() throws SqlParseException { | ||
TpcdsSchema schema = new TpcdsSchema(1.0); | ||
String sql; | ||
sql = | ||
""" | ||
SELECT ss_sold_date_sk, ss_item_sk, ss_customer_sk | ||
FROM store_sales CROSS APPLY | ||
( SELECT i_item_sk | ||
FROM item CROSS APPLY | ||
( SELECT p_promo_sk | ||
FROM promotion | ||
WHERE p_item_sk = i_item_sk AND p_item_sk = ss_item_sk ) | ||
WHERE item.i_item_sk = store_sales.ss_item_sk )"""; | ||
|
||
/* the calcite plan for the above query is: | ||
LogicalProject(SS_SOLD_DATE_SK=[$0], SS_ITEM_SK=[$2], SS_CUSTOMER_SK=[$3]) | ||
LogicalCorrelate(correlation=[$cor2], joinType=[inner], requiredColumns=[{2}]) | ||
LogicalTableScan(table=[[tpcds, STORE_SALES]]) | ||
LogicalProject(I_ITEM_SK=[$0]) | ||
LogicalFilter(condition=[=($0, $cor2.SS_ITEM_SK)]) | ||
LogicalCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{0}]) | ||
LogicalTableScan(table=[[tpcds, ITEM]]) | ||
LogicalProject(P_PROMO_SK=[$0]) | ||
LogicalFilter(condition=[AND(=($4, $cor0.I_ITEM_SK), =($4, $cor2.SS_ITEM_SK))]) | ||
LogicalTableScan(table=[[tpcds, PROMOTION]]) | ||
*/ | ||
FeatureBoard featureBoard = | ||
ImmutableFeatureBoard.builder() | ||
.sqlConformanceMode(SqlConformanceEnum.SQL_SERVER_2008) | ||
.build(); | ||
SqlToSubstrait s = new SqlToSubstrait(featureBoard); | ||
RelRoot root = getCalcitePlan(s, schema, sql); | ||
|
||
Map<RexFieldAccess, Integer> fieldAccessDepthMap = buildOuterFieldRefMap(root); | ||
Assertions.assertEquals(3, fieldAccessDepthMap.size()); | ||
validateOuterRef(fieldAccessDepthMap, "$cor2", "SS_ITEM_SK", 1); | ||
validateOuterRef(fieldAccessDepthMap, "$cor2", "SS_ITEM_SK", 2); | ||
validateOuterRef(fieldAccessDepthMap, "$cor0", "I_ITEM_SK", 1); | ||
|
||
// TODO validate end to end conversion | ||
Assertions.assertThrows( | ||
UnsupportedOperationException.class, | ||
() -> s.execute(sql, "tpcds", schema), | ||
"APPLY is not supported"); | ||
} | ||
|
||
@Test | ||
public void crossApplyQuery() throws SqlParseException { | ||
TpcdsSchema schema = new TpcdsSchema(1.0); | ||
String sql; | ||
sql = | ||
""" | ||
SELECT ss_sold_date_sk, ss_item_sk, ss_customer_sk | ||
FROM store_sales CROSS APPLY | ||
(select i_item_sk from item where item.i_item_sk = store_sales.ss_item_sk)"""; | ||
|
||
FeatureBoard featureBoard = | ||
ImmutableFeatureBoard.builder() | ||
.sqlConformanceMode(SqlConformanceEnum.SQL_SERVER_2008) | ||
.build(); | ||
SqlToSubstrait s = new SqlToSubstrait(featureBoard); | ||
|
||
// TODO validate end to end conversion | ||
Assertions.assertThrows( | ||
UnsupportedOperationException.class, | ||
() -> s.execute(sql, "tpcds", schema), | ||
"APPLY is not supported"); | ||
} | ||
} |