Skip to content

Commit e6109a4

Browse files
committed
feat: expose DataFrame.parse_sql_expr to python
1 parent f5fdf59 commit e6109a4

File tree

3 files changed

+55
-0
lines changed

3 files changed

+55
-0
lines changed

python/datafusion/dataframe.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -482,6 +482,28 @@ def filter(self, *predicates: Expr) -> DataFrame:
482482
df = df.filter(ensure_expr(p))
483483
return DataFrame(df)
484484

485+
def parse_sql_expr(self, expr: str) -> Expr:
486+
"""Creates logical expression from a SQL query text.
487+
488+
The expression is created and processed against the current schema.
489+
490+
Example::
491+
492+
from datafusion import col, lit
493+
df.parse_sql_expr("a > 1")
494+
495+
should produce:
496+
497+
col("a") > lit(1)
498+
499+
Args:
500+
expr: Expression string to be converted to datafusion expression
501+
502+
Returns:
503+
Logical expression .
504+
"""
505+
return Expr(self.df.parse_sql_expr(expr))
506+
485507
def with_column(self, name: str, expr: Expr) -> DataFrame:
486508
"""Add an additional column to the DataFrame.
487509

python/tests/test_dataframe.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,31 @@ def test_filter(df):
274274
assert result.column(2) == pa.array([5])
275275

276276

277+
def test_parse_sql_expr(df):
278+
df1 = df.filter(df.parse_sql_expr("a > 2")).select(
279+
column("a") + column("b"),
280+
column("a") - column("b"),
281+
)
282+
283+
# execute and collect the first (and only) batch
284+
result = df1.collect()[0]
285+
286+
assert result.column(0) == pa.array([9])
287+
assert result.column(1) == pa.array([-3])
288+
289+
df.show()
290+
# verify that if there is no filter applied, internal dataframe is unchanged
291+
df2 = df.filter()
292+
assert df.df == df2.df
293+
294+
df3 = df.filter(df.parse_sql_expr("a > 1"), df.parse_sql_expr("b != 6"))
295+
result = df3.collect()[0]
296+
297+
assert result.column(0) == pa.array([2])
298+
assert result.column(1) == pa.array([5])
299+
assert result.column(2) == pa.array([5])
300+
301+
277302
def test_show_empty(df, capsys):
278303
df_empty = df.filter(column("a") > literal(3))
279304
df_empty.show()

src/dataframe.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -454,6 +454,14 @@ impl PyDataFrame {
454454
Ok(Self::new(df))
455455
}
456456

457+
fn parse_sql_expr(&self, expr: PyBackedStr) -> PyDataFusionResult<PyExpr> {
458+
self.df
459+
.as_ref()
460+
.parse_sql_expr(&expr)
461+
.map(|e| PyExpr::from(e))
462+
.map_err(PyDataFusionError::from)
463+
}
464+
457465
fn with_column(&self, name: &str, expr: PyExpr) -> PyDataFusionResult<Self> {
458466
let df = self.df.as_ref().clone().with_column(name, expr.into())?;
459467
Ok(Self::new(df))

0 commit comments

Comments
 (0)