File tree Expand file tree Collapse file tree 3 files changed +55
-0
lines changed
Expand file tree Collapse file tree 3 files changed +55
-0
lines changed Original file line number Diff line number Diff line change @@ -482,6 +482,28 @@ def filter(self, *predicates: Expr) -> DataFrame:
482482 df = df .filter (ensure_expr (p ))
483483 return DataFrame (df )
484484
485+ def parse_sql_expr (self , expr : str ) -> Expr :
486+ """Creates logical expression from a SQL query text.
487+
488+ The expression is created and processed against the current schema.
489+
490+ Example::
491+
492+ from datafusion import col, lit
493+ df.parse_sql_expr("a > 1")
494+
495+ should produce:
496+
497+ col("a") > lit(1)
498+
499+ Args:
500+ expr: Expression string to be converted to datafusion expression
501+
502+ Returns:
503+ Logical expression .
504+ """
505+ return Expr (self .df .parse_sql_expr (expr ))
506+
485507 def with_column (self , name : str , expr : Expr ) -> DataFrame :
486508 """Add an additional column to the DataFrame.
487509
Original file line number Diff line number Diff line change @@ -274,6 +274,31 @@ def test_filter(df):
274274 assert result .column (2 ) == pa .array ([5 ])
275275
276276
277+ def test_parse_sql_expr (df ):
278+ df1 = df .filter (df .parse_sql_expr ("a > 2" )).select (
279+ column ("a" ) + column ("b" ),
280+ column ("a" ) - column ("b" ),
281+ )
282+
283+ # execute and collect the first (and only) batch
284+ result = df1 .collect ()[0 ]
285+
286+ assert result .column (0 ) == pa .array ([9 ])
287+ assert result .column (1 ) == pa .array ([- 3 ])
288+
289+ df .show ()
290+ # verify that if there is no filter applied, internal dataframe is unchanged
291+ df2 = df .filter ()
292+ assert df .df == df2 .df
293+
294+ df3 = df .filter (df .parse_sql_expr ("a > 1" ), df .parse_sql_expr ("b != 6" ))
295+ result = df3 .collect ()[0 ]
296+
297+ assert result .column (0 ) == pa .array ([2 ])
298+ assert result .column (1 ) == pa .array ([5 ])
299+ assert result .column (2 ) == pa .array ([5 ])
300+
301+
277302def test_show_empty (df , capsys ):
278303 df_empty = df .filter (column ("a" ) > literal (3 ))
279304 df_empty .show ()
Original file line number Diff line number Diff line change @@ -454,6 +454,14 @@ impl PyDataFrame {
454454 Ok ( Self :: new ( df) )
455455 }
456456
457+ fn parse_sql_expr ( & self , expr : PyBackedStr ) -> PyDataFusionResult < PyExpr > {
458+ self . df
459+ . as_ref ( )
460+ . parse_sql_expr ( & expr)
461+ . map ( |e| PyExpr :: from ( e) )
462+ . map_err ( PyDataFusionError :: from)
463+ }
464+
457465 fn with_column ( & self , name : & str , expr : PyExpr ) -> PyDataFusionResult < Self > {
458466 let df = self . df . as_ref ( ) . clone ( ) . with_column ( name, expr. into ( ) ) ?;
459467 Ok ( Self :: new ( df) )
You can’t perform that action at this time.
0 commit comments