Skip to content

Commit 2fcb4b7

Browse files
committed
GH-48254: [Python][Parquet] Support extension types in read_schema
1 parent 8ee7aeb commit 2fcb4b7

1 file changed

Lines changed: 18 additions & 7 deletions

File tree

python/pyarrow/parquet/core.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -262,9 +262,9 @@ class ParquetFile:
262262
page_checksum_verification : bool, default False
263263
If True, verify the checksum for each page read from the file.
264264
arrow_extensions_enabled : bool, default True
265-
If True, read Parquet logical types as Arrow extension types where possible,
266-
(e.g., read JSON as the canonical `arrow.json` extension type or UUID as
267-
the canonical `arrow.uuid` extension type).
265+
If True, read Parquet logical types as Arrow extension types where
266+
possible (e.g., read JSON as the canonical `arrow.json` extension type
267+
or UUID as the canonical `arrow.uuid` extension type).
268268
269269
Examples
270270
--------
@@ -2347,6 +2347,10 @@ def read_metadata(where, memory_map=False, decryption_properties=None,
23472347
If nothing passed, will be inferred based on path.
23482348
Path will try to be found in the local on-disk filesystem otherwise
23492349
it will be parsed as an URI to determine the filesystem.
2350+
arrow_extensions_enabled : bool, default True
2351+
If True, read Parquet logical types as Arrow extension types where
2352+
possible (e.g. UUID as the canonical `arrow.uuid` extension type).
2353+
If False, use the underlying storage types instead.
23502354
23512355
Returns
23522356
-------
@@ -2382,7 +2386,7 @@ def read_metadata(where, memory_map=False, decryption_properties=None,
23822386

23832387

23842388
def read_schema(where, memory_map=False, decryption_properties=None,
2385-
filesystem=None):
2389+
filesystem=None, arrow_extensions_enabled=True):
23862390
"""
23872391
Read effective Arrow schema from Parquet file metadata.
23882392
@@ -2397,6 +2401,9 @@ def read_schema(where, memory_map=False, decryption_properties=None,
23972401
If nothing passed, will be inferred based on path.
23982402
Path will try to be found in the local on-disk filesystem otherwise
23992403
it will be parsed as an URI to determine the filesystem.
2404+
arrow_extensions_enabled : bool, default True
2405+
If True, read Parquet logical types as Arrow extension types where
2406+
possible (e.g., UUID as the canonical `arrow.uuid` extension type).
24002407
24012408
Returns
24022409
-------
@@ -2422,11 +2429,15 @@ def read_schema(where, memory_map=False, decryption_properties=None,
24222429

24232430
with file_ctx:
24242431
file = ParquetFile(
2425-
where, memory_map=memory_map,
2426-
decryption_properties=decryption_properties)
2432+
where,
2433+
memory_map=memory_map,
2434+
decryption_properties=decryption_properties,
2435+
arrow_extensions_enabled=arrow_extensions_enabled,
2436+
)
2437+
if arrow_extensions_enabled:
2438+
return file.schema_arrow
24272439
return file.schema.to_arrow_schema()
24282440

2429-
24302441
__all__ = (
24312442
"ColumnChunkMetaData",
24322443
"ColumnSchema",

0 commit comments

Comments
 (0)