diff --git a/python/sedonadb/python/sedonadb/dataframe.py b/python/sedonadb/python/sedonadb/dataframe.py index f1b641b97..429800637 100644 --- a/python/sedonadb/python/sedonadb/dataframe.py +++ b/python/sedonadb/python/sedonadb/dataframe.py @@ -518,7 +518,10 @@ def _qualified_type_name(obj): SPECIAL_CASED_SCANS = { "pyarrow.lib.Table": _scan_collected_default, + # pandas < 3.0 "pandas.core.frame.DataFrame": _scan_collected_default, + # pandas >= 3.0 + "pandas.DataFrame": _scan_collected_default, "geopandas.geodataframe.GeoDataFrame": _scan_geopandas, "polars.dataframe.frame.DataFrame": _scan_collected_default, } diff --git a/python/sedonadb/tests/test_datasource.py b/python/sedonadb/tests/test_datasource.py index 64e5b0b81..59578949e 100644 --- a/python/sedonadb/tests/test_datasource.py +++ b/python/sedonadb/tests/test_datasource.py @@ -74,8 +74,13 @@ def test_read_ogr_multi_file(con): with tempfile.TemporaryDirectory() as td: # Create partitioned files by writing Parquet first and translating - # one file at a time - con.create_data_frame(gdf).to_parquet(td, partition_by="partition") + # one file at a time. We need to cast partition in pandas>=3.0 because + # the default translation of a string column is LargeUtf8 and this is not + # currently supported by DataFusion partition_by. + con.create_data_frame(gdf).to_view("tmp_gdf", overwrite=True) + con.sql( + """SELECT idx, partition::VARCHAR AS partition, wkb_geometry FROM tmp_gdf""" + ).to_parquet(td, partition_by="partition") for parquet_path in Path(td).rglob("*.parquet"): fgb_path = str(parquet_path).replace(".parquet", ".fgb") con.read_parquet(parquet_path).to_pandas().to_file(fgb_path)