|
11 | 11 | from pyarrow.lib import ArrowInvalid |
12 | 12 | from upath import UPath |
13 | 13 |
|
14 | | -from ..series.dtype import NestedDtype |
| 14 | +from ..series.ext_array import NestedExtensionArray |
15 | 15 | from ..series.packer import pack_lists |
16 | | -from ..series.utils import table_to_struct_array |
| 16 | +from ..series.utils import is_pa_type_a_list, table_to_struct_array |
17 | 17 | from .core import NestedFrame |
18 | 18 |
|
19 | 19 | # Use smaller block size for these FSSPEC filesystems. |
@@ -148,7 +148,7 @@ def read_parquet( |
148 | 148 | # if any of the columns are not list type, reject the cast |
149 | 149 | # and remove the column from the list of nested structures if |
150 | 150 | # it was added |
151 | | - if not pa.types.is_list(table.schema[i].type): |
| 151 | + if not is_pa_type_a_list(table.schema[i].type): |
152 | 152 | reject_nesting.append(nested_col) |
153 | 153 | if nested_col in nested_structures: |
154 | 154 | # remove the column from the list of nested structures |
@@ -455,28 +455,25 @@ def _cast_struct_cols_to_nested(df, reject_nesting): |
455 | 455 | """cast struct columns to nested dtype""" |
456 | 456 | # Attempt to cast struct columns to NestedDTypes |
457 | 457 | for col, dtype in df.dtypes.items(): |
458 | | - # First validate the dtype |
459 | | - # will return valueerror when not a struct-list |
460 | | - valid_dtype = True |
| 458 | + if col in reject_nesting: |
| 459 | + continue |
| 460 | + |
| 461 | + if not NestedExtensionArray.is_input_pa_type_supported(dtype.pyarrow_dtype): |
| 462 | + continue |
| 463 | + |
461 | 464 | try: |
462 | | - NestedDtype._validate_dtype(dtype.pyarrow_dtype) |
463 | | - except ValueError: |
464 | | - valid_dtype = False |
465 | | - |
466 | | - if valid_dtype and col not in reject_nesting: |
467 | | - try: |
468 | | - # Attempt to cast Struct to NestedDType |
469 | | - df = df.astype({col: NestedDtype(dtype.pyarrow_dtype)}) |
470 | | - except ValueError as err: |
471 | | - # If cast fails, the struct likely does not fit nested-pandas |
472 | | - # criteria for a valid nested column |
473 | | - raise ValueError( |
474 | | - f"Column '{col}' is a Struct, but an attempt to cast it to a NestedDType failed. " |
475 | | - "This is likely due to the struct not meeting the requirements for a nested column " |
476 | | - "(all fields should be equal length). To proceed, you may add the column to the " |
477 | | - "`reject_nesting` argument of the read_parquet function to skip the cast attempt:" |
478 | | - f" read_parquet(..., reject_nesting=['{col}'])" |
479 | | - ) from err |
| 465 | + # Attempt to cast Struct to NestedDType |
| 466 | + df[col] = NestedExtensionArray(pa.array(df[col])) |
| 467 | + except ValueError as err: |
| 468 | + # If cast fails, the struct likely does not fit nested-pandas |
| 469 | + # criteria for a valid nested column |
| 470 | + raise ValueError( |
| 471 | + f"Column '{col}' is a Struct, but an attempt to cast it to a NestedDType failed. " |
| 472 | + "This is likely due to the struct not meeting the requirements for a nested column " |
| 473 | + "(all fields should be equal length). To proceed, you may add the column to the " |
| 474 | + "`reject_nesting` argument of the read_parquet function to skip the cast attempt:" |
| 475 | + f" read_parquet(..., reject_nesting=['{col}'])" |
| 476 | + ) from err |
480 | 477 | return df |
481 | 478 |
|
482 | 479 |
|
|
0 commit comments