Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions python/sedonadb/python/sedonadb/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,7 +518,10 @@ def _qualified_type_name(obj):

SPECIAL_CASED_SCANS = {
"pyarrow.lib.Table": _scan_collected_default,
# pandas < 3.0
"pandas.core.frame.DataFrame": _scan_collected_default,
# pandas >= 3.0
"pandas.DataFrame": _scan_collected_default,
"geopandas.geodataframe.GeoDataFrame": _scan_geopandas,
"polars.dataframe.frame.DataFrame": _scan_collected_default,
}
9 changes: 7 additions & 2 deletions python/sedonadb/tests/test_datasource.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,13 @@ def test_read_ogr_multi_file(con):

with tempfile.TemporaryDirectory() as td:
# Create partitioned files by writing Parquet first and translating
# one file at a time
con.create_data_frame(gdf).to_parquet(td, partition_by="partition")
# one file at a time. We need to cast partition in pandas>=3.0 because
# the default translation of a string column is LargeUtf8 and this is not
# currently supported by DataFusion partition_by.
con.create_data_frame(gdf).to_view("tmp_gdf", overwrite=True)
con.sql(
"""SELECT idx, partition::VARCHAR AS partition, wkb_geometry FROM tmp_gdf"""
).to_parquet(td, partition_by="partition")
for parquet_path in Path(td).rglob("*.parquet"):
fgb_path = str(parquet_path).replace(".parquet", ".fgb")
con.read_parquet(parquet_path).to_pandas().to_file(fgb_path)
Expand Down