Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,12 @@
* <li>Since version: 3.3.0</li>
* </ul>
* </li>
* <li>Name: <code>BOOLEAN_EXPRESSION</code>
* <ul>
* <li>A simple wrapper for any expression that returns boolean type.</li>
* <li>Since version: 4.1.0</li>
* </ul>
* </li>
* </ol>
*
* @since 3.3.0
Expand All @@ -145,5 +151,8 @@ public class Predicate extends GeneralScalarExpression {

public Predicate(String name, Expression[] children) {
super(name, children);
if ("BOOLEAN_EXPRESSION".equals(name)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should use final to modify "BOOLEAN_EXPRESSION".

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

all other functions names are hardcoded and we just match the string literal in the code.

assert children.length == 1;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@ public String build(Expression expr) {
case "CONTAINS" -> visitContains(build(e.children()[0]), build(e.children()[1]));
case "=", "<>", "<=>", "<", "<=", ">", ">=" ->
visitBinaryComparison(name, e.children()[0], e.children()[1]);
case "BOOLEAN_EXPRESSION" ->
build(expr.children()[0]);
case "+", "*", "/", "%", "&", "|", "^" ->
visitBinaryArithmetic(name, inputToSQL(e.children()[0]), inputToSQL(e.children()[1]));
case "-" -> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,7 @@ object V2ExpressionUtils extends SQLConfHelper with Logging {
case "ENDS_WITH" => convertBinaryExpr(expr, EndsWith)
case "CONTAINS" => convertBinaryExpr(expr, Contains)
case "IN" => convertExpr(expr, children => In(children.head, children.tail))
case "BOOLEAN_EXPRESSION" => toCatalyst(expr.children().head)
case _ => None
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,21 @@ class V2ExpressionBuilder(e: Expression, isPredicate: Boolean = false) extends L
def build(): Option[V2Expression] = generateExpression(e, isPredicate)

def buildPredicate(): Option[V2Predicate] = {

if (isPredicate) {
val translated = build()
val translated0 = build()
val conf = SQLConf.get
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hold the SQLConf will lead to unable to obtain real-time changes to SQLConf.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is within a single method, I don't think we want to be that dynamic. And in practise this should be run within the same session so it won't change.

val alwaysCreateV2Predicate = conf.getConf(SQLConf.DATA_SOURCE_ALWAYS_CREATE_V2_PREDICATE)
val translated = if (alwaysCreateV2Predicate && e.dataType == BooleanType) {
translated0.map {
case p: V2Predicate => p
case other => new V2Predicate("BOOLEAN_EXPRESSION", Array(other))
}
} else {
translated0
}

val modifiedExprOpt = if (
SQLConf.get.getConf(SQLConf.DATA_SOURCE_DONT_ASSERT_ON_PREDICATE)
conf.getConf(SQLConf.DATA_SOURCE_DONT_ASSERT_ON_PREDICATE)
&& translated.isDefined
&& !translated.get.isInstanceOf[V2Predicate]) {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1695,9 +1695,19 @@ object SQLConf {
buildConf("spark.sql.dataSource.skipAssertOnPredicatePushdown")
.internal()
.doc("Enable skipping assert when expression in not translated to predicate.")
.version("4.0.0")
.booleanConf
.createWithDefault(!Utils.isTesting)

val DATA_SOURCE_ALWAYS_CREATE_V2_PREDICATE =
buildConf("spark.sql.dataSource.alwaysCreateV2Predicate")
.internal()
.doc("When true, the v2 push-down framework always wraps the expression that returns " +
"boolean type with a v2 Predicate so that it can be pushed down.")
.version("4.1.0")
.booleanConf
.createWithDefault(true)

// This is used to set the default data source
val DEFAULT_DATA_SOURCE_NAME = buildConf("spark.sql.sources.default")
.doc("The default data source to use in input/output.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,38 +18,71 @@
package org.apache.spark.sql.connector

import org.apache.spark.sql.QueryTest
import org.apache.spark.sql.catalyst.expressions.Literal
import org.apache.spark.sql.catalyst.expressions.{Cast, Literal}
import org.apache.spark.sql.connector.expressions.filter.{AlwaysTrue, Predicate => V2Predicate}
import org.apache.spark.sql.execution.datasources.v2.PushablePredicate
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.SharedSparkSession
import org.apache.spark.sql.types.BooleanType

class PushablePredicateSuite extends QueryTest with SharedSparkSession {

test("PushablePredicate None returned - flag on") {
withSQLConf(SQLConf.DATA_SOURCE_DONT_ASSERT_ON_PREDICATE.key -> "true") {
val pushable = PushablePredicate.unapply(Literal.create("string"))
assert(!pushable.isDefined)
}
}

test("PushablePredicate success - flag on") {
withSQLConf(SQLConf.DATA_SOURCE_DONT_ASSERT_ON_PREDICATE.key -> "true") {
val pushable = PushablePredicate.unapply(Literal.create(true))
assert(pushable.isDefined)
test("simple boolean expression should always return v2 Predicate") {
Seq(true, false).foreach { createV2Predicate =>
Seq(true, false).foreach { noAssert =>
withSQLConf(
SQLConf.DATA_SOURCE_ALWAYS_CREATE_V2_PREDICATE.key -> createV2Predicate.toString,
SQLConf.DATA_SOURCE_DONT_ASSERT_ON_PREDICATE.key -> noAssert.toString) {
val pushable = PushablePredicate.unapply(Literal.create(true))
assert(pushable.isDefined)
assert(pushable.get.isInstanceOf[AlwaysTrue])
}
}
}
}

test("PushablePredicate success") {
withSQLConf(SQLConf.DATA_SOURCE_DONT_ASSERT_ON_PREDICATE.key -> "false") {
val pushable = PushablePredicate.unapply(Literal.create(true))
assert(pushable.isDefined)
test("non-boolean expression") {
Seq(true, false).foreach { createV2Predicate =>
Seq(true, false).foreach { noAssert =>
withSQLConf(
SQLConf.DATA_SOURCE_ALWAYS_CREATE_V2_PREDICATE.key -> createV2Predicate.toString,
SQLConf.DATA_SOURCE_DONT_ASSERT_ON_PREDICATE.key -> noAssert.toString) {
val catalystExpr = Literal.create("string")
if (noAssert) {
val pushable = PushablePredicate.unapply(catalystExpr)
assert(pushable.isEmpty)
} else {
intercept[java.lang.AssertionError] {
PushablePredicate.unapply(catalystExpr)
}
}
}
}
}
}

test("PushablePredicate throws") {
withSQLConf(SQLConf.DATA_SOURCE_DONT_ASSERT_ON_PREDICATE.key -> "false") {
intercept[java.lang.AssertionError] {
PushablePredicate.unapply(Literal.create("string"))
test("non-trivial boolean expression") {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like it fails when ANSI is off:

2025-07-01T01:53:41.0866397Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m- non-trivial boolean expression *** FAILED *** (21 milliseconds)�[0m�[0m
2025-07-01T01:53:41.0867934Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  pushable.isDefined was false (PushablePredicateSuite.scala:73)�[0m�[0m
2025-07-01T01:53:41.0869296Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  org.scalatest.exceptions.TestFailedException:�[0m�[0m
2025-07-01T01:53:41.0896382Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.scalatest.Assertions.newAssertionFailedException(Assertions.scala:472)�[0m�[0m
2025-07-01T01:53:41.0927708Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.scalatest.Assertions.newAssertionFailedException$(Assertions.scala:471)�[0m�[0m
2025-07-01T01:53:41.0929141Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.scalatest.Assertions$.newAssertionFailedException(Assertions.scala:1231)�[0m�[0m
2025-07-01T01:53:41.0930649Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.scalatest.Assertions$AssertionsHelper.macroAssert(Assertions.scala:1295)�[0m�[0m
2025-07-01T01:53:41.0932361Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.apache.spark.sql.connector.PushablePredicateSuite.$anonfun$new$13(PushablePredicateSuite.scala:73)�[0m�[0m
2025-07-01T01:53:41.0934090Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.apache.spark.sql.catalyst.SQLConfHelper.withSQLConf(SQLConfHelper.scala:56)�[0m�[0m
2025-07-01T01:53:41.0935881Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.apache.spark.sql.catalyst.SQLConfHelper.withSQLConf$(SQLConfHelper.scala:38)�[0m�[0m
2025-07-01T01:53:41.0938224Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.apache.spark.sql.connector.PushablePredicateSuite.org$apache$spark$sql$test$SQLTestUtilsBase$$super$withSQLConf(PushablePredicateSuite.scala:28)�[0m�[0m
2025-07-01T01:53:41.0940417Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.apache.spark.sql.test.SQLTestUtilsBase.withSQLConf(SQLTestUtils.scala:253)�[0m�[0m
2025-07-01T01:53:41.0941802Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.apache.spark.sql.test.SQLTestUtilsBase.withSQLConf$(SQLTestUtils.scala:251)�[0m�[0m
2025-07-01T01:53:41.0943319Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.apache.spark.sql.connector.PushablePredicateSuite.withSQLConf(PushablePredicateSuite.scala:28)�[0m�[0m
2025-07-01T01:53:41.0995346Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.apache.spark.sql.connector.PushablePredicateSuite.$anonfun$new$12(PushablePredicateSuite.scala:69)�[0m�[0m
2025-07-01T01:53:41.0997347Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.apache.spark.sql.connector.PushablePredicateSuite.$anonfun$new$12$adapted(PushablePredicateSuite.scala:66)�[0m�[0m
2025-07-01T01:53:41.0998896Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at scala.collection.immutable.List.foreach(List.scala:334)�[0m�[0m
2025-07-01T01:53:41.1000441Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.apache.spark.sql.connector.PushablePredicateSuite.$anonfun$new$11(PushablePredicateSuite.scala:66)�[0m�[0m
2025-07-01T01:53:41.1002318Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.apache.spark.sql.connector.PushablePredicateSuite.$anonfun$new$11$adapted(PushablePredicateSuite.scala:65)�[0m�[0m
2025-07-01T01:53:41.1003808Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at scala.collection.immutable.List.foreach(List.scala:334)�[0m�[0m
2025-07-01T01:53:41.1005547Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.apache.spark.sql.connector.PushablePredicateSuite.$anonfun$new$10(PushablePredicateSuite.scala:65)�[0m�[0m
2025-07-01T01:53:41.1007134Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.scala:18)�[0m�[0m
2025-07-01T01:53:41.1008445Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.scalatest.enablers.Timed$$anon$1.timeoutAfter(Timed.scala:127)�[0m�[0m
2025-07-01T01:53:41.1009782Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.scalatest.concurrent.TimeLimits$.failAfterImpl(TimeLimits.scala:282)�[0m�[0m
2025-07-01T01:53:41.1011135Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.scalatest.concurrent.TimeLimits.failAfter(TimeLimits.scala:231)�[0m�[0m
2025-07-01T01:53:41.1012467Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.scalatest.concurrent.TimeLimits.failAfter$(TimeLimits.scala:230)�[0m�[0m
2025-07-01T01:53:41.1013779Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.apache.spark.SparkFunSuite.failAfter(SparkFunSuite.scala:69)�[0m�[0m
2025-07-01T01:53:41.1015275Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.apache.spark.SparkFunSuite.$anonfun$test$2(SparkFunSuite.scala:155)�[0m�[0m
2025-07-01T01:53:41.1016475Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85)�[0m�[0m
2025-07-01T01:53:41.1017584Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83)�[0m�[0m
2025-07-01T01:53:41.1018705Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)�[0m�[0m
2025-07-01T01:53:41.1019812Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.scalatest.Transformer.apply(Transformer.scala:22)�[0m�[0m
2025-07-01T01:53:41.1020878Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.scalatest.Transformer.apply(Transformer.scala:20)�[0m�[0m
2025-07-01T01:53:41.1022135Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.scalatest.funsuite.AnyFunSuiteLike$$anon$1.apply(AnyFunSuiteLike.scala:226)�[0m�[0m
2025-07-01T01:53:41.1023509Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:227)�[0m�[0m
2025-07-01T01:53:41.1025129Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.scalatest.funsuite.AnyFunSuiteLike.invokeWithFixture$1(AnyFunSuiteLike.scala:224)�[0m�[0m
2025-07-01T01:53:41.1040426Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTest$1(AnyFunSuiteLike.scala:236)�[0m�[0m
2025-07-01T01:53:41.1042058Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306)�[0m�[0m
2025-07-01T01:53:41.1043439Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.scalatest.funsuite.AnyFunSuiteLike.runTest(AnyFunSuiteLike.scala:236)�[0m�[0m
2025-07-01T01:53:41.1045114Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.scalatest.funsuite.AnyFunSuiteLike.runTest$(AnyFunSuiteLike.scala:218)�[0m�[0m
2025-07-01T01:53:41.1046784Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterEach$$super$runTest(SparkFunSuite.scala:69)�[0m�[0m
2025-07-01T01:53:41.1048366Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.scalatest.BeforeAndAfterEach.runTest(BeforeAndAfterEach.scala:234)�[0m�[0m
2025-07-01T01:53:41.1049792Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.scalatest.BeforeAndAfterEach.runTest$(BeforeAndAfterEach.scala:227)�[0m�[0m
2025-07-01T01:53:41.1051094Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.apache.spark.SparkFunSuite.runTest(SparkFunSuite.scala:69)�[0m�[0m
2025-07-01T01:53:41.1052533Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTests$1(AnyFunSuiteLike.scala:269)�[0m�[0m
2025-07-01T01:53:41.1053958Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.scalatest.SuperEngine.$anonfun$runTestsInBranch$1(Engine.scala:413)�[0m�[0m
2025-07-01T01:53:41.1055463Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at scala.collection.immutable.List.foreach(List.scala:334)�[0m�[0m
2025-07-01T01:53:41.1056718Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401)�[0m�[0m
2025-07-01T01:53:41.1057981Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.scalatest.SuperEngine.runTestsInBranch(Engine.scala:396)�[0m�[0m
2025-07-01T01:53:41.1059186Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:475)�[0m�[0m
2025-07-01T01:53:41.1060522Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.scalatest.funsuite.AnyFunSuiteLike.runTests(AnyFunSuiteLike.scala:269)�[0m�[0m
2025-07-01T01:53:41.1061986Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.scalatest.funsuite.AnyFunSuiteLike.runTests$(AnyFunSuiteLike.scala:268)�[0m�[0m
2025-07-01T01:53:41.1063360Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.scalatest.funsuite.AnyFunSuite.runTests(AnyFunSuite.scala:1564)�[0m�[0m
2025-07-01T01:53:41.1064491Z �[0m[�[0m�[0minfo�[0m] �[0m�[0m�[31m  at org.scalatest.Suite.run(Suite.scala:1114)�[0m�[0m

https://github.com/apache/spark/actions/runs/15987607479/job/45094925023

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah, let me force enable ansi for this suite

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay ,, it can be only pushed when ANSI is on. Let me just enable aNSI for tihs test case:

    case Cast(child, dataType, _, evalMode)
        if evalMode == EvalMode.ANSI || Cast.canUpCast(child.dataType, dataType) =>
      generateExpression(child).map(v => new V2Cast(v, child.dataType, dataType))
      ```

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seq(true, false).foreach { createV2Predicate =>
Seq(true, false).foreach { noAssert =>
withSQLConf(
SQLConf.DATA_SOURCE_ALWAYS_CREATE_V2_PREDICATE.key -> createV2Predicate.toString,
SQLConf.DATA_SOURCE_DONT_ASSERT_ON_PREDICATE.key -> noAssert.toString) {
val catalystExpr = Cast(Literal.create("true"), BooleanType)
if (createV2Predicate) {
val pushable = PushablePredicate.unapply(catalystExpr)
assert(pushable.isDefined)
assert(pushable.get.isInstanceOf[V2Predicate])
} else {
if (noAssert) {
val pushable = PushablePredicate.unapply(catalystExpr)
assert(pushable.isEmpty)
} else {
intercept[java.lang.AssertionError] {
PushablePredicate.unapply(catalystExpr)
}
}
}
}
}
}
}
Expand Down