Skip to content

Commit

Permalink
use json_ type if available
Browse files Browse the repository at this point in the history
  • Loading branch information
tswast committed Mar 10, 2025
1 parent 0bd186a commit 7b225d7
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 2 deletions.
33 changes: 31 additions & 2 deletions google/cloud/bigquery/_pyarrow_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
"""Shared helper functions for connecting BigQuery and pyarrow.
NOTE: This module is DEPRECATED. Please make updates in the pandas-gbq package,
instead. See: go/pandas-gbq-and-bigframes-redundancy and
instead. See: go/pandas-gbq-and-bigframes-redundancy,
https://github.com/googleapis/python-bigquery-pandas/blob/main/pandas_gbq/schema/bigquery_to_pyarrow.py
and
https://github.com/googleapis/python-bigquery-pandas/blob/main/pandas_gbq/schema/pyarrow_to_bigquery.py
"""

Expand All @@ -26,6 +28,14 @@
except ImportError:
pyarrow = None

try:
import db_dtypes # type: ignore

db_dtypes_import_exception = None
except ImportError as exc:
db_dtypes = None
db_dtypes_import_exception = exc


def pyarrow_datetime():
return pyarrow.timestamp("us", tz=None)
Expand Down Expand Up @@ -53,6 +63,16 @@ def pyarrow_timestamp():
_ARROW_SCALAR_IDS_TO_BQ = {}

if pyarrow:
# Prefer JSON type built-in to pyarrow (adding in 19.0.0), if available.
# Otherwise, fallback to db-dtypes, where the JSONArrowType was added in 1.4.0,
# but since they might have an older db-dtypes, have string as a fallback for that.
if hasattr(pyarrow, "json_"):
json_arrow_type = pyarrow.json_(pyarrow.string())
elif hasattr(db_dtypes, "JSONArrowType"):
json_arrow_type = db_dtypes.JSONArrowType()
else:
json_arrow_type = pyarrow.string()

# This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py
# When modifying it be sure to update it there as well.
# Note(todo!!): type "BIGNUMERIC"'s matching pyarrow type is added in _pandas_helpers.py
Expand All @@ -67,13 +87,14 @@ def pyarrow_timestamp():
"GEOGRAPHY": pyarrow.string,
"INT64": pyarrow.int64,
"INTEGER": pyarrow.int64,
"JSON": pyarrow.string,
"JSON": json_arrow_type,
"NUMERIC": pyarrow_numeric,
"STRING": pyarrow.string,
"TIME": pyarrow_time,
"TIMESTAMP": pyarrow_timestamp,
}

# DEPRECATED: update pandas_gbq.schema.pyarrow_to_bigquery, instead.
_ARROW_SCALAR_IDS_TO_BQ = {
# https://arrow.apache.org/docs/python/api/datatypes.html#type-classes
pyarrow.bool_().id: "BOOL",
Expand All @@ -98,6 +119,9 @@ def pyarrow_timestamp():
pyarrow.large_string().id: "STRING",
# The exact scale and precision don't matter, see below.
pyarrow.decimal128(38, scale=9).id: "NUMERIC",
# NOTE: all extension types (e.g. json_, uuid, db_dtypes.JSONArrowType)
# have the same id (31 as of version 19.0.1), so these should not be
# matched by id.
}

_BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric
Expand All @@ -108,6 +132,9 @@ def pyarrow_timestamp():

def bq_to_arrow_scalars(bq_scalar: str):
"""
DEPRECATED: update pandas_gbq.schema.bigquery_to_pyarrow, instead, which is
to be added in https://github.com/googleapis/python-bigquery-pandas/pull/893.
Returns:
The Arrow scalar type that the input BigQuery scalar type maps to.
If it cannot find the BigQuery scalar, return None.
Expand All @@ -117,6 +144,8 @@ def bq_to_arrow_scalars(bq_scalar: str):

def arrow_scalar_ids_to_bq(arrow_scalar: Any):
"""
DEPRECATED: update pandas_gbq.schema.pyarrow_to_bigquery, instead.
Returns:
The BigQuery scalar type that the input arrow scalar type maps to.
If it cannot find the arrow scalar, return None.
Expand Down
4 changes: 4 additions & 0 deletions tests/unit/test__pyarrow_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ def test_bq_to_arrow_scalars(module_under_test):
module_under_test.bq_to_arrow_scalars("BIGNUMERIC")
== module_under_test.pyarrow_bignumeric
)
assert (
module_under_test.bq_to_arrow_scalars("JSON")
== module_under_test.json_arrow_type
)
assert module_under_test.bq_to_arrow_scalars("UNKNOWN_TYPE") is None


Expand Down

0 comments on commit 7b225d7

Please sign in to comment.