From 1b234c87bcd19c334f9040cf4b7438d168d03437 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Wed, 4 Sep 2024 17:12:27 +0000 Subject: [PATCH 01/28] Adds ExternalCatalogDatasetOptions to Dataset --- google/cloud/bigquery/dataset.py | 52 ++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index c49a52faf..dd56cb287 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -530,6 +530,7 @@ class Dataset(object): "storage_billing_model": "storageBillingModel", "max_time_travel_hours": "maxTimeTravelHours", "default_rounding_mode": "defaultRoundingMode", + "external_catalog_dataset_options": "externalCatalogDatasetOptions", } def __init__(self, dataset_ref) -> None: @@ -937,6 +938,24 @@ def _build_resource(self, filter_fields): """Generate a resource for ``update``.""" return _helpers._build_resource_from_properties(self, filter_fields) + @property + def external_catalog_dataset_options(self): + """Options defining open source compatible datasets living in the + BigQuery catalog. Contains metadata of open source database, schema + or namespace represented by the current dataset.""" + + return self._properties.get("externalCatalogDatasetOptions") + + @external_catalog_dataset_options.setter + def external_catalog_dataset_options(self, value): + if not isinstance(value, ExternalCatalogDatasetOptions) and value is not None: + raise ValueError( + "external_catalog_dataset_options must be an " + "ExternalCatalogDatasetOptions object or None. " + f"Got {repr(value)}." + ) + self._properties["externalCatalogDatasetOptions"] = value + table = _get_table_reference model = _get_model_reference @@ -1026,3 +1045,36 @@ def reference(self): model = _get_model_reference routine = _get_routine_reference + + +class ExternalCatalogDatasetOptions(object): + """Options defining open source compatible datasets living in the BigQuery + catalog. Contains metadata of open source database, schema or namespace + represented by the current dataset. + + Args: + defaultStorageLocationUri: + Optional. The storage location URI for all tables in the dataset. + Equivalent to hive metastore's database locationUri. Maximum length + of 1024 characters. (str) + + parameters: + Optional. A map of key value pairs defining the parameters and + properties of the open source schema. Maximum size of 2Mib. + + Raises: + ValueError: If either argument is not of type ``str``. + """ + + def __init__( + self, + default_storage_location_uri: Optional[str] = None, + parameters: Optional[dict] = None + ): + self._properties = {} + if not isinstance(default_storage_location_uri, str): + raise ValueError("Pass a string as default_storage_location_uri") + if not isinstance(parameters, dict): + raise ValueError("Pass a dict as parameters to define the schema.") + self._properties["default_storage_location_uri"] = default_storage_location_uri + self._properties["parameters"] = parameters From 8029213df8440ce84f7918b585b214a090ccc1b4 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Thu, 5 Sep 2024 15:41:37 +0000 Subject: [PATCH 02/28] adds ExternalCatalogTableOptions class and assorted content --- google/cloud/bigquery/dataset.py | 35 +-------- google/cloud/bigquery/external_config.py | 92 ++++++++++++++++++++++++ google/cloud/bigquery/table.py | 59 +++++++++++++++ 3 files changed, 153 insertions(+), 33 deletions(-) diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index dd56cb287..9191defc0 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -27,6 +27,8 @@ from google.cloud.bigquery.routine import Routine, RoutineReference from google.cloud.bigquery.table import Table, TableReference from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration +from google.cloud.bigquery.external_config import ExternalCatalogDatasetOptions + from typing import Optional, List, Dict, Any, Union @@ -1045,36 +1047,3 @@ def reference(self): model = _get_model_reference routine = _get_routine_reference - - -class ExternalCatalogDatasetOptions(object): - """Options defining open source compatible datasets living in the BigQuery - catalog. Contains metadata of open source database, schema or namespace - represented by the current dataset. - - Args: - defaultStorageLocationUri: - Optional. The storage location URI for all tables in the dataset. - Equivalent to hive metastore's database locationUri. Maximum length - of 1024 characters. (str) - - parameters: - Optional. A map of key value pairs defining the parameters and - properties of the open source schema. Maximum size of 2Mib. - - Raises: - ValueError: If either argument is not of type ``str``. - """ - - def __init__( - self, - default_storage_location_uri: Optional[str] = None, - parameters: Optional[dict] = None - ): - self._properties = {} - if not isinstance(default_storage_location_uri, str): - raise ValueError("Pass a string as default_storage_location_uri") - if not isinstance(parameters, dict): - raise ValueError("Pass a dict as parameters to define the schema.") - self._properties["default_storage_location_uri"] = default_storage_location_uri - self._properties["parameters"] = parameters diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index a891bc232..919a27a96 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -1003,3 +1003,95 @@ def from_api_repr(cls, resource: dict) -> "ExternalConfig": config = cls(resource["sourceFormat"]) config._properties = copy.deepcopy(resource) return config + + +class ExternalCatalogDatasetOptions(object): + """Options defining open source compatible datasets living in the BigQuery catalog. + Contains metadata of open source database, schema or namespace represented + by the current dataset. + + Args: + defaultStorageLocationUri: Optional. The storage location URI for all + tables in the dataset. Equivalent to hive metastore's database + locationUri. Maximum length of 1024 characters. (str) + parameters: Optional. A map of key value pairs defining the parameters + and properties of the open source schema. Maximum size of 2Mib. + """ + + def __init__(self, defaultStorageLocationUri: Optional[str] = None, parameters: Optional[dict] = None): + self._properties = {} + if not isinstance(defaultStorageLocationUri, (str, None)): + raise ValueError( + "Pass defaultStorageLocationUri as a 'str'." + f"Got {repr(dtype)}." + ) + if not isinstance(parameters, (dict, None)): + raise ValueError( + "Pass parameters as a ''." + f"Got {repr(dtype)}." + ) + self._properties["defaultStorageLocationUri"] = defaultStorageLocationUri + self._properties["parameters"] = parameters + + def to_api_repr(self) -> dict: + """Build an API representation of this object. + + Returns: + Dict[str, Any]: + A dictionary in the format used by the BigQuery API. + """ + config = copy.deepcopy(self._properties) + return config + +class ExternalCatalogTableOptions(object): + """Metadata about open source compatible table. The fields contained in these + options correspond to hive metastore's table level properties. + + Args: + connectionId: Optional. The connection specifying the credentials to be + used to read external storage, such as Azure Blob, Cloud Storage, or + S3. The connection is needed to read the open source table from + BigQuery Engine. The connection_id can have the form `..` or + `projects//locations//connections/`. (str) + parameters: Optional. A map of key value pairs defining the parameters + and properties of the open source table. Corresponds with hive meta + store table parameters. Maximum size of 4Mib. (dict) + storageDescriptor: Optional. A storage descriptor containing information + about the physical storage of this table. (StorageDescriptor) + """ + + def __init__( + self, + connectionId: Optional[str] = None, + parameters: Optional[dict] = None, + storageDescriptor: Optional[str] = None # TODO implement StorageDescriptor, correct this type hint + ): + self._properties = {} + if not isinstance(connectionId, str): + raise ValueError( + "Pass connectionId as a 'str'." + f"Got {repr(dtype)}." + ) + if not isinstance(parameters, dict): + raise ValueError( + "Pass parameters as a 'dict'." + f"Got {repr(dtype)}." + ) + if not isinstance(storageDescriptor, str): # TODO implement StorageDescriptor, correct this type hint + raise ValueError( + "Pass storageDescriptor as a 'StorageDescriptor'." + f"Got {repr(dtype)}." + ) + self._properties["connectionId"] = connectionId + self._properties["parameters"] = parameters + self._properties["storageDescriptor"] = storageDescriptor + + def to_api_repr(self) -> dict: + """Build an API representation of this object. + + Returns: + Dict[str, Any]: + A dictionary in the format used by the BigQuery API. + """ + config = copy.deepcopy(self._properties) + return config diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index faf827be4..8e7dce8f3 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -69,6 +69,7 @@ from google.cloud.bigquery.schema import _build_schema_resource from google.cloud.bigquery.schema import _parse_schema_resource from google.cloud.bigquery.schema import _to_schema_fields +from google.cloud.bigquery.external_config import ExternalCatalogTableOptions if typing.TYPE_CHECKING: # pragma: NO COVER # Unconditionally import optional dependencies again to tell pytype that @@ -999,6 +1000,24 @@ def table_constraints(self) -> Optional["TableConstraints"]: table_constraints = TableConstraints.from_api_repr(table_constraints) return table_constraints + @property + def external_catalog_table_options(self): + """Options defining open source compatible datasets living in the + BigQuery catalog. Contains metadata of open source database, schema + or namespace represented by the current dataset.""" + + return self._properties.get("externalCatalogTableOptions") + + @external_catalog_table_options.setter + def external_catalog_table_options(self, value): + if not isinstance(value, ExternalCatalogTableOptions) and value is not None: + raise ValueError( + "external_catalog_table_options must be an " + "ExternalCatalogTableOptions object or None. " + f"Got {repr(value)}." + ) + self._properties["externalCatalogTableOptions"] = value + @classmethod def from_string(cls, full_table_id: str) -> "Table": """Construct a table from fully-qualified table ID. @@ -3308,3 +3327,43 @@ def _table_arg_to_table(value, default_project=None) -> Table: value = newvalue return value + + +class ExternalCatalogTableOptions + """Metadata about open source compatible table. The fields contained in these options correspond to hive metastore's table level properties. + Args: + connectionId: Optional. The connection specifying the credentials to be + used to read external storage, such as Azure Blob, Cloud Storage, or + S3. The connection is needed to read the open source table from + BigQuery Engine. The connection_id can have the form `..` or + `projects//locations//connections/`. (str) + parameters: Optional. A map of key value pairs defining the parameters + and properties of the open source table. Corresponds with hive meta + store table parameters. Maximum size of 4Mib. + storageDescriptor: Optional. A storage descriptor containing information + about the physical storage of this table. + """ + + def __init__(self, connectionId=None, parameters=None, storageDescriptor=None): + self._properties = {} + self.connectionId = connectionId + self.parameters = parameters + self.storageDescriptor = storageDescriptor + + if not isinstance(connectionId, str): + raise ValueError( + connectionId must be a str. + Got 'connectionId'. + ) + + if not isinstance(parameters, ): + raise ValueError( + parameters must be a . + Got 'parameters'. + ) + + if not isinstance(storageDescriptor, ): + raise ValueError( + storageDescriptor must be a . + Got 'storageDescriptor'. + ) \ No newline at end of file From 0992bbf4ab2f138e064914b0cf701318eda69326 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Wed, 11 Sep 2024 14:13:06 +0000 Subject: [PATCH 03/28] modifies argument names to snake_case --- google/cloud/bigquery/external_config.py | 30 ++++++++++++------------ 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 919a27a96..23bbbb828 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -1011,18 +1011,18 @@ class ExternalCatalogDatasetOptions(object): by the current dataset. Args: - defaultStorageLocationUri: Optional. The storage location URI for all + default_storage_location_uri: Optional. The storage location URI for all tables in the dataset. Equivalent to hive metastore's database locationUri. Maximum length of 1024 characters. (str) parameters: Optional. A map of key value pairs defining the parameters and properties of the open source schema. Maximum size of 2Mib. """ - def __init__(self, defaultStorageLocationUri: Optional[str] = None, parameters: Optional[dict] = None): + def __init__(self, default_storage_location_uri: Optional[str] = None, parameters: Optional[dict] = None): self._properties = {} - if not isinstance(defaultStorageLocationUri, (str, None)): + if not isinstance(default_storage_location_uri, (str, None)): raise ValueError( - "Pass defaultStorageLocationUri as a 'str'." + "Pass default_storage_location_uri as a 'str' or None." f"Got {repr(dtype)}." ) if not isinstance(parameters, (dict, None)): @@ -1030,7 +1030,7 @@ def __init__(self, defaultStorageLocationUri: Optional[str] = None, parameters: "Pass parameters as a ''." f"Got {repr(dtype)}." ) - self._properties["defaultStorageLocationUri"] = defaultStorageLocationUri + self._properties["defaultStorageLocationUri"] =default_storage_location_uri self._properties["parameters"] = parameters def to_api_repr(self) -> dict: @@ -1048,7 +1048,7 @@ class ExternalCatalogTableOptions(object): options correspond to hive metastore's table level properties. Args: - connectionId: Optional. The connection specifying the credentials to be + connection_id: Optional. The connection specifying the credentials to be used to read external storage, such as Azure Blob, Cloud Storage, or S3. The connection is needed to read the open source table from BigQuery Engine. The connection_id can have the form `..` or @@ -1056,20 +1056,20 @@ class ExternalCatalogTableOptions(object): parameters: Optional. A map of key value pairs defining the parameters and properties of the open source table. Corresponds with hive meta store table parameters. Maximum size of 4Mib. (dict) - storageDescriptor: Optional. A storage descriptor containing information + storage_descriptor: Optional. A storage descriptor containing information about the physical storage of this table. (StorageDescriptor) """ def __init__( self, - connectionId: Optional[str] = None, + connection_id: Optional[str] = None, parameters: Optional[dict] = None, - storageDescriptor: Optional[str] = None # TODO implement StorageDescriptor, correct this type hint + storage_descriptor: Optional[str] = None # TODO implement StorageDescriptor, correct this type hint ): self._properties = {} - if not isinstance(connectionId, str): + if not isinstance(connection_id, str): raise ValueError( - "Pass connectionId as a 'str'." + "Pass connection_id as a 'str'." f"Got {repr(dtype)}." ) if not isinstance(parameters, dict): @@ -1077,14 +1077,14 @@ def __init__( "Pass parameters as a 'dict'." f"Got {repr(dtype)}." ) - if not isinstance(storageDescriptor, str): # TODO implement StorageDescriptor, correct this type hint + if not isinstance(storage_descriptor, str): # TODO implement StorageDescriptor, correct this type hint raise ValueError( - "Pass storageDescriptor as a 'StorageDescriptor'." + "Pass storage_descriptor as a 'StorageDescriptor' object." f"Got {repr(dtype)}." ) - self._properties["connectionId"] = connectionId + self._properties["connectionId"] = connection_id self._properties["parameters"] = parameters - self._properties["storageDescriptor"] = storageDescriptor + self._properties["storageDescriptor"] = storage_descriptor def to_api_repr(self) -> dict: """Build an API representation of this object. From 45ddd893e3b767748ddd305a55e659c90f2b00a4 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Wed, 11 Sep 2024 14:45:31 +0000 Subject: [PATCH 04/28] replaces dtype placeholder with parameter names --- google/cloud/bigquery/external_config.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 23bbbb828..6496ac657 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -1023,12 +1023,12 @@ def __init__(self, default_storage_location_uri: Optional[str] = None, parameter if not isinstance(default_storage_location_uri, (str, None)): raise ValueError( "Pass default_storage_location_uri as a 'str' or None." - f"Got {repr(dtype)}." + f"Got {repr(default_storage_location_uri)}." ) if not isinstance(parameters, (dict, None)): raise ValueError( "Pass parameters as a ''." - f"Got {repr(dtype)}." + f"Got {repr(parameters)}." ) self._properties["defaultStorageLocationUri"] =default_storage_location_uri self._properties["parameters"] = parameters @@ -1070,17 +1070,17 @@ def __init__( if not isinstance(connection_id, str): raise ValueError( "Pass connection_id as a 'str'." - f"Got {repr(dtype)}." + f"Got {repr(connection_id)}." ) if not isinstance(parameters, dict): raise ValueError( "Pass parameters as a 'dict'." - f"Got {repr(dtype)}." + f"Got {repr(parameters)}." ) if not isinstance(storage_descriptor, str): # TODO implement StorageDescriptor, correct this type hint raise ValueError( "Pass storage_descriptor as a 'StorageDescriptor' object." - f"Got {repr(dtype)}." + f"Got {repr(storage_descriptor)}." ) self._properties["connectionId"] = connection_id self._properties["parameters"] = parameters From 141146054428a0e8693aeaca27f8230068272b9c Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Wed, 11 Sep 2024 15:07:21 +0000 Subject: [PATCH 05/28] updates the inclusion of value in properties to use repr version --- google/cloud/bigquery/table.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 8e7dce8f3..1ea5c6da6 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1016,7 +1016,7 @@ def external_catalog_table_options(self, value): "ExternalCatalogTableOptions object or None. " f"Got {repr(value)}." ) - self._properties["externalCatalogTableOptions"] = value + self._properties["externalCatalogTableOptions"] = value.to_api_repr() @classmethod def from_string(cls, full_table_id: str) -> "Table": From 20ee950ef921e31d0a7e5e00f5c9ff64bafb54e2 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Wed, 11 Sep 2024 15:12:52 +0000 Subject: [PATCH 06/28] updates another inclusion of value in properties to use repr version --- google/cloud/bigquery/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index 9191defc0..89f43095a 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -956,7 +956,7 @@ def external_catalog_dataset_options(self, value): "ExternalCatalogDatasetOptions object or None. " f"Got {repr(value)}." ) - self._properties["externalCatalogDatasetOptions"] = value + self._properties["externalCatalogDatasetOptions"] = value.to_api_repr() table = _get_table_reference From bee33eff6851b0fd9519a74dc2fe6cb401d33c8c Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Wed, 11 Sep 2024 15:42:35 +0000 Subject: [PATCH 07/28] updates type check via isinstance() or None --- google/cloud/bigquery/external_config.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 6496ac657..fb3926fcf 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -1020,17 +1020,17 @@ class ExternalCatalogDatasetOptions(object): def __init__(self, default_storage_location_uri: Optional[str] = None, parameters: Optional[dict] = None): self._properties = {} - if not isinstance(default_storage_location_uri, (str, None)): + if not isinstance(default_storage_location_uri, str) and default_storage_location_uri is not None: raise ValueError( - "Pass default_storage_location_uri as a 'str' or None." + "Pass default_storage_location_uri as a str or None." f"Got {repr(default_storage_location_uri)}." ) - if not isinstance(parameters, (dict, None)): + if not isinstance(parameters, dict) and parameters is not None: raise ValueError( - "Pass parameters as a ''." + "Pass parameters as a dict or None." f"Got {repr(parameters)}." ) - self._properties["defaultStorageLocationUri"] =default_storage_location_uri + self._properties["defaultStorageLocationUri"] = default_storage_location_uri self._properties["parameters"] = parameters def to_api_repr(self) -> dict: From ee69f24b531b68e01ee0cc6a9e4f49a47a511989 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Thu, 12 Sep 2024 12:36:07 +0000 Subject: [PATCH 08/28] adds tests related to ExternalCatalogDatasetOptions --- google/cloud/bigquery/dataset.py | 2 +- google/cloud/bigquery/external_config.py | 43 ++++++++++------- google/cloud/bigquery/table.py | 42 +--------------- tests/unit/test_external_config.py | 61 ++++++++++++++++++++++++ 4 files changed, 88 insertions(+), 60 deletions(-) diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index 89f43095a..6db620eeb 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -945,7 +945,7 @@ def external_catalog_dataset_options(self): """Options defining open source compatible datasets living in the BigQuery catalog. Contains metadata of open source database, schema or namespace represented by the current dataset.""" - + return self._properties.get("externalCatalogDatasetOptions") @external_catalog_dataset_options.setter diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index fb3926fcf..641463e6d 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -1018,17 +1018,23 @@ class ExternalCatalogDatasetOptions(object): and properties of the open source schema. Maximum size of 2Mib. """ - def __init__(self, default_storage_location_uri: Optional[str] = None, parameters: Optional[dict] = None): + def __init__( + self, + default_storage_location_uri: Optional[str] = None, + parameters: Optional[dict] = None, + ): self._properties = {} - if not isinstance(default_storage_location_uri, str) and default_storage_location_uri is not None: + if ( + not isinstance(default_storage_location_uri, str) + and default_storage_location_uri is not None + ): raise ValueError( "Pass default_storage_location_uri as a str or None." f"Got {repr(default_storage_location_uri)}." ) if not isinstance(parameters, dict) and parameters is not None: raise ValueError( - "Pass parameters as a dict or None." - f"Got {repr(parameters)}." + "Pass parameters as a dict or None." f"Got {repr(parameters)}." ) self._properties["defaultStorageLocationUri"] = default_storage_location_uri self._properties["parameters"] = parameters @@ -1043,6 +1049,7 @@ def to_api_repr(self) -> dict: config = copy.deepcopy(self._properties) return config + class ExternalCatalogTableOptions(object): """Metadata about open source compatible table. The fields contained in these options correspond to hive metastore's table level properties. @@ -1061,31 +1068,31 @@ class ExternalCatalogTableOptions(object): """ def __init__( - self, - connection_id: Optional[str] = None, - parameters: Optional[dict] = None, - storage_descriptor: Optional[str] = None # TODO implement StorageDescriptor, correct this type hint - ): + self, + connection_id: Optional[str] = None, + parameters: Optional[dict] = None, + storage_descriptor: Optional[ + str + ] = None, # TODO implement StorageDescriptor, correct this type hint + ): self._properties = {} if not isinstance(connection_id, str): raise ValueError( - "Pass connection_id as a 'str'." - f"Got {repr(connection_id)}." + "Pass connection_id as a 'str'." f"Got {repr(connection_id)}." ) if not isinstance(parameters, dict): - raise ValueError( - "Pass parameters as a 'dict'." - f"Got {repr(parameters)}." - ) - if not isinstance(storage_descriptor, str): # TODO implement StorageDescriptor, correct this type hint + raise ValueError("Pass parameters as a 'dict'." f"Got {repr(parameters)}.") + if not isinstance( + storage_descriptor, str + ): # TODO implement StorageDescriptor, correct this type hint raise ValueError( "Pass storage_descriptor as a 'StorageDescriptor' object." f"Got {repr(storage_descriptor)}." ) self._properties["connectionId"] = connection_id self._properties["parameters"] = parameters - self._properties["storageDescriptor"] = storage_descriptor - + self._properties["storageDescriptor"] = storage_descriptor + def to_api_repr(self) -> dict: """Build an API representation of this object. diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 1ea5c6da6..0846e08dd 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1005,7 +1005,7 @@ def external_catalog_table_options(self): """Options defining open source compatible datasets living in the BigQuery catalog. Contains metadata of open source database, schema or namespace represented by the current dataset.""" - + return self._properties.get("externalCatalogTableOptions") @external_catalog_table_options.setter @@ -3327,43 +3327,3 @@ def _table_arg_to_table(value, default_project=None) -> Table: value = newvalue return value - - -class ExternalCatalogTableOptions - """Metadata about open source compatible table. The fields contained in these options correspond to hive metastore's table level properties. - Args: - connectionId: Optional. The connection specifying the credentials to be - used to read external storage, such as Azure Blob, Cloud Storage, or - S3. The connection is needed to read the open source table from - BigQuery Engine. The connection_id can have the form `..` or - `projects//locations//connections/`. (str) - parameters: Optional. A map of key value pairs defining the parameters - and properties of the open source table. Corresponds with hive meta - store table parameters. Maximum size of 4Mib. - storageDescriptor: Optional. A storage descriptor containing information - about the physical storage of this table. - """ - - def __init__(self, connectionId=None, parameters=None, storageDescriptor=None): - self._properties = {} - self.connectionId = connectionId - self.parameters = parameters - self.storageDescriptor = storageDescriptor - - if not isinstance(connectionId, str): - raise ValueError( - connectionId must be a str. - Got 'connectionId'. - ) - - if not isinstance(parameters, ): - raise ValueError( - parameters must be a . - Got 'parameters'. - ) - - if not isinstance(storageDescriptor, ): - raise ValueError( - storageDescriptor must be a . - Got 'storageDescriptor'. - ) \ No newline at end of file diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 9fd16e699..6c4a4855b 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -19,6 +19,8 @@ from google.cloud.bigquery import external_config from google.cloud.bigquery import schema +import pytest + class TestExternalConfig(unittest.TestCase): SOURCE_URIS = ["gs://foo", "gs://bar"] @@ -890,3 +892,62 @@ def _copy_and_update(d, u): d = copy.deepcopy(d) d.update(u) return d + + +class TestExternalCatalogDatasetOptions: + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.external_config import ExternalCatalogDatasetOptions + + return ExternalCatalogDatasetOptions + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor_defaults(self): + """Test ExternalCatalogDatasetOptions constructor with default values.""" + instance = self._make_one() + + assert instance._properties["defaultStorageLocationUri"] is None + assert instance._properties["parameters"] is None + + def test_ctor_explicit( + self, + ): + """Test ExternalCatalogDatasetOptions constructor with explicit values.""" + + default_storage_location_uri = "gs://test-bucket/test-path" + parameters = {"key": "value"} + + instance = self._make_one( + default_storage_location_uri=default_storage_location_uri, + parameters=parameters, + ) + + assert ( + instance._properties["defaultStorageLocationUri"] + == default_storage_location_uri + ) + assert instance._properties["parameters"] == parameters + + def test_ctor_invalid_input(self): + """Test ExternalCatalogDatasetOptions constructor with invalid input.""" + + with pytest.raises(ValueError): + self._make_one(default_storage_location_uri=123) + with pytest.raises(ValueError): + self._make_one(parameters=123) + + def test_to_api_repr(self): + """Test ExternalCatalogDatasetOptions.to_api_repr method.""" + + default_storage_location_uri = "gs://test-bucket/test-path" + parameters = {"key": "value"} + + instance = self._make_one( + default_storage_location_uri=default_storage_location_uri, + parameters=parameters, + ) + resource = instance.to_api_repr() + assert resource["defaultStorageLocationUri"] == default_storage_location_uri + assert resource["parameters"] == parameters From f9d657bcdd46ad60b7fa60ddb53760589c00afb0 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Thu, 12 Sep 2024 18:10:52 +0000 Subject: [PATCH 09/28] adds test suite for ExternalCatalogTableOptions and minor tweaks elsewhere --- google/cloud/bigquery/external_config.py | 34 ++++---- tests/unit/test_external_config.py | 101 +++++++++++++++++++++++ 2 files changed, 119 insertions(+), 16 deletions(-) diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 641463e6d..360a5ef9a 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -1021,19 +1021,19 @@ class ExternalCatalogDatasetOptions(object): def __init__( self, default_storage_location_uri: Optional[str] = None, - parameters: Optional[dict] = None, + parameters: Optional[Dict[str, Any]] = None, ): - self._properties = {} + self._properties = {} # type: Dict[str, Any] if ( not isinstance(default_storage_location_uri, str) and default_storage_location_uri is not None ): - raise ValueError( + raise TypeError( "Pass default_storage_location_uri as a str or None." f"Got {repr(default_storage_location_uri)}." ) if not isinstance(parameters, dict) and parameters is not None: - raise ValueError( + raise TypeError( "Pass parameters as a dict or None." f"Got {repr(parameters)}." ) self._properties["defaultStorageLocationUri"] = default_storage_location_uri @@ -1070,23 +1070,25 @@ class ExternalCatalogTableOptions(object): def __init__( self, connection_id: Optional[str] = None, - parameters: Optional[dict] = None, + parameters: Union[Dict[str, Any], None] = None, storage_descriptor: Optional[ str - ] = None, # TODO implement StorageDescriptor, correct this type hint + ] = None, # TODO implement StorageDescriptor, then correct this type hint ): - self._properties = {} - if not isinstance(connection_id, str): - raise ValueError( - "Pass connection_id as a 'str'." f"Got {repr(connection_id)}." + self._properties = {} # type: Dict[str, Any] + if not isinstance(connection_id, str) and connection_id is not None: + raise TypeError( + "Pass connection_id as a 'str' or None. " f"Got {repr(connection_id)}." ) - if not isinstance(parameters, dict): - raise ValueError("Pass parameters as a 'dict'." f"Got {repr(parameters)}.") - if not isinstance( - storage_descriptor, str + if not isinstance(parameters, dict) and parameters is not None: + raise TypeError( + "Pass parameters as a 'dict' or None. " f"Got {repr(parameters)}." + ) + if ( + not isinstance(storage_descriptor, str) and storage_descriptor is not None ): # TODO implement StorageDescriptor, correct this type hint - raise ValueError( - "Pass storage_descriptor as a 'StorageDescriptor' object." + raise TypeError( + "Pass storage_descriptor as a 'StorageDescriptor' object. " f"Got {repr(storage_descriptor)}." ) self._properties["connectionId"] = connection_id diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 6c4a4855b..c7be0ce14 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -14,6 +14,7 @@ import base64 import copy +from typing import Any, Dict import unittest from google.cloud.bigquery import external_config @@ -951,3 +952,103 @@ def test_to_api_repr(self): resource = instance.to_api_repr() assert resource["defaultStorageLocationUri"] == default_storage_location_uri assert resource["parameters"] == parameters + + +class TestExternalCatalogTableOptions: + # TODO add docstring(s) + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.external_config import ExternalCatalogTableOptions + + return ExternalCatalogTableOptions + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + # connection_id: Optional[str] = None, + # parameters: Optional[dict] = None, + # storage_descriptor: Optional[ + # str + # ] = None + + @pytest.mark.parametrize( + "connection_id,parameters,storage_descriptor", + [ + ("connection123", {"key": "value"}, "placeholder"), # set all params + ("connection123", None, None), # set only one parameter at a time + (None, {"key": "value"}, None), + (None, None, "placeholder"), + (None, None, None), # all default parameters + ], + ) + def test_ctor_initialization(self, connection_id, parameters, storage_descriptor): + instance = self._make_one( + connection_id=connection_id, + parameters=parameters, + storage_descriptor=storage_descriptor, + ) + assert instance._properties == { + "connectionId": connection_id, + "parameters": parameters, + "storageDescriptor": storage_descriptor, + } + + def test_to_api_repr(self): + instance = self._make_one() + instance._properties = { + "connectionId": "connection123", + "parameters": {"key": "value"}, + "storageDescriptor": "placeholder", + } + + resource = instance.to_api_repr() + + assert resource == { + "connectionId": "connection123", + "parameters": {"key": "value"}, + "storageDescriptor": "placeholder", + } + + @pytest.mark.parametrize( + "connection_id, parameters, storage_descriptor, exception_class", + [ + pytest.param( + 123, + {"key": "value"}, + "placeholder", + TypeError, + id="connection_id-invalid-type", + ), + pytest.param( + "connection123", + 123, + "placeholder", + TypeError, + id="parameters-invalid-type", + ), + pytest.param( + "connection123", + {"key": "value"}, + 123, + TypeError, + id="storage_descriptor-invalid-type", + ), + ], + ) + def test_ctor_invalid_input( + self, + connection_id: str, + parameters: Dict[str, Any], + storage_descriptor: str, + exception_class: TypeError, + ): + with pytest.raises(exception_class): + external_config.ExternalCatalogTableOptions( + connection_id=connection_id, + parameters=parameters, + storage_descriptor=storage_descriptor, + ) + + +# =============================================== From 89896a3690a14369c37518cd03099c4597967af1 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Thu, 19 Sep 2024 15:16:16 +0000 Subject: [PATCH 10/28] corrects Error type of failing test --- tests/unit/test_external_config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index c7be0ce14..01fd42916 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -934,9 +934,9 @@ def test_ctor_explicit( def test_ctor_invalid_input(self): """Test ExternalCatalogDatasetOptions constructor with invalid input.""" - with pytest.raises(ValueError): + with pytest.raises(TypeError): self._make_one(default_storage_location_uri=123) - with pytest.raises(ValueError): + with pytest.raises(TypeError): self._make_one(parameters=123) def test_to_api_repr(self): From c45245908b380c58f5d9d9203c655f4e2c301a25 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Thu, 26 Sep 2024 21:48:13 +0000 Subject: [PATCH 11/28] forgive me... a wild mess of tests, tweaks, etc --- google/cloud/bigquery/_helpers.py | 31 ++++++++ google/cloud/bigquery/external_config.py | 49 ++++++++----- google/cloud/bigquery/schema.py | 90 ++++++++++++++++++++++++ tests/unit/test_schema.py | 36 +++++++++- 4 files changed, 188 insertions(+), 18 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 1eda80712..dfc303eb9 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -15,6 +15,7 @@ """Shared helper functions for BigQuery API classes.""" import base64 +import copy import datetime import decimal import json @@ -1004,3 +1005,33 @@ def _verify_job_config_type(job_config, expected_type, param_name="job_config"): job_config=job_config, ) ) + + +class ResourceBase: + """Base class providing the from_api_repr method.""" + + def __init__(self): + self._properties = {} + + @classmethod + def from_api_repr(cls, resource: dict): + """Factory: constructs an instance of the class (cls) + given its API representation. + + Args: + resource (Dict[str, Any]): + API representation of the object to be instantiated. + + Returns: + ResourceBase: An instance of the class initialized with data + from 'resource'. + """ + config = cls() + config._properties = copy.deepcopy(resource) + return config + + +def _isinstance_raise(value, dtype): + if not isinstance(value, dtype): + raise TypeError(f"Pass {value} as a 'repr({dtype})'. " f"Got {type(value)}.") + return value diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 360a5ef9a..03cafdb0f 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -28,6 +28,8 @@ from google.cloud.bigquery._helpers import _bytes_to_json from google.cloud.bigquery._helpers import _int_or_none from google.cloud.bigquery._helpers import _str_or_none +from google.cloud.bigquery._helpers import _isinstance_raise +from google.cloud.bigquery._helpers import ResourceBase from google.cloud.bigquery.format_options import AvroOptions, ParquetOptions from google.cloud.bigquery.schema import SchemaField @@ -1005,16 +1007,16 @@ def from_api_repr(cls, resource: dict) -> "ExternalConfig": return config -class ExternalCatalogDatasetOptions(object): +class ExternalCatalogDatasetOptions(ResourceBase): """Options defining open source compatible datasets living in the BigQuery catalog. Contains metadata of open source database, schema or namespace represented by the current dataset. Args: - default_storage_location_uri: Optional. The storage location URI for all + default_storage_location_uri (Optional[str]): The storage location URI for all tables in the dataset. Equivalent to hive metastore's database locationUri. Maximum length of 1024 characters. (str) - parameters: Optional. A map of key value pairs defining the parameters + parameters (Optional[dict[str, Any]]): A map of key value pairs defining the parameters and properties of the open source schema. Maximum size of 2Mib. """ @@ -1023,22 +1025,35 @@ def __init__( default_storage_location_uri: Optional[str] = None, parameters: Optional[Dict[str, Any]] = None, ): - self._properties = {} # type: Dict[str, Any] - if ( - not isinstance(default_storage_location_uri, str) - and default_storage_location_uri is not None - ): - raise TypeError( - "Pass default_storage_location_uri as a str or None." - f"Got {repr(default_storage_location_uri)}." - ) - if not isinstance(parameters, dict) and parameters is not None: - raise TypeError( - "Pass parameters as a dict or None." f"Got {repr(parameters)}." - ) + self._properties = {} self._properties["defaultStorageLocationUri"] = default_storage_location_uri self._properties["parameters"] = parameters + @property + def default_storage_location_uri(self) -> Any: + """Optional. The storage location URI for all tables in the dataset. + Equivalent to hive metastore's database locationUri. Maximum length of + 1024 characters.""" + + return self._properties.get("defaultStorageLocationUri") + + @default_storage_location_uri.setter + def default_storage_location_uri(self, value: str) -> str: + value = _isinstance_raise(value, str) + self._properties["defaultStorageLocationUri"] = value + + @property + def parameters(self) -> Any: + """Optional. A map of key value pairs defining the parameters and + properties of the open source schema. Maximum size of 2Mib.""" + + return self._properties.get("parameters") + + @parameters.setter + def parameters(self, value: dict[str, Any]) -> str: + value = _isinstance_raise(value, dict) + self._properties["parameters"] = value + def to_api_repr(self) -> dict: """Build an API representation of this object. @@ -1050,7 +1065,7 @@ def to_api_repr(self) -> dict: return config -class ExternalCatalogTableOptions(object): +class ExternalCatalogTableOptions(ResourceBase): """Metadata about open source compatible table. The fields contained in these options correspond to hive metastore's table level properties. diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index f5b03cbef..7c24c6b97 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -15,10 +15,12 @@ """Schemas for BigQuery tables / queries.""" import collections +import copy import enum from typing import Any, Dict, Iterable, Optional, Union, cast from google.cloud.bigquery import standard_sql +from google.cloud.bigquery._helpers import ResourceBase from google.cloud.bigquery.enums import StandardSqlTypeNames @@ -588,3 +590,91 @@ def to_api_repr(self) -> dict: """ answer = {"names": list(self.names)} return answer + + +class TableSchema(ResourceBase): + """Schema of a table + + Args: + fields (list): Describes the fields in a table. + foreignTypeInfo: Optional. Specifies metadata of the foreign data type + definition in field schema + (TableFieldSchema.foreign_type_definition). + """ + + def __init__(self, fields: list = None, foreign_type_info: Optional[str] = None): + self._properties = {} + self._properties["fields"] = fields + self._properties["foreignTypeInfo"] = foreign_type_info + + @property + def fields(self) -> Any: + """Describes the fields in a table.""" + + return self._properties.get("fields") + + @fields.setter + def fields(self, value: list, dtype: str) -> str: + if not isinstance(value, list): + raise ValueError(f"Pass fields as a '{repr(dtype)}'." f"Got {type(value)}.") + self._properties["fields"] = value + + @property + def foreign_type_info(self) -> Any: + """Optional. Specifies metadata of the foreign data type definition in + field schema (TableFieldSchema.foreign_type_definition).""" + + return self._properties.get("foreignTypeInfo") + + @foreign_type_info.setter + def foreign_type_info(self, value: str, dtype: str) -> str: + if not isinstance(value, str): + raise ValueError( + f"Pass {value} as a '{repr(dtype)}'." f"Got {type(value)}." + ) + self._properties["foreignTypeInfo"] = value + + def to_api_repr(self) -> dict: + """Build an API representation of this object. + + Returns: + Dict[str, Any]: + A dictionary in the format used by the BigQuery API. + """ + return copy.deepcopy(self._properties) + + +class ForeignTypeInfo(ResourceBase): + """Metadata about the foreign data type definition such as the system in which the + type is defined. + + Args: + typeSystem (str): Required. Specifies the system which defines the + foreign data type. + """ + + def __init__(self, type_system_="TYPE_SYSTEM_UNSPECIFIED"): + self._properties = {} + self._properties["typeSystem"] = type_system_ + + @property + def type_system(self): + """Required. Specifies the system which defines the foreign data + type.""" + + return self._properties.get("typeSystem") + + @type_system.setter + def type_system(self, value: str): + if not isinstance(value, str) or value is None: + raise ValueError("Pass type_system as a 'str'." f" Got {type(value)}.") + self._properties["typeSystem"] = value + + def to_api_repr(self) -> dict: + """Build an API representation of this object. + + Returns: + Dict[str, Any]: + A dictionary in the format used by the BigQuery API. + """ + return copy.deepcopy(self._properties) diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index b17cd0281..d8a7b90cc 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -14,7 +14,8 @@ from google.cloud import bigquery from google.cloud.bigquery.standard_sql import StandardSqlStructType -from google.cloud.bigquery.schema import PolicyTagList +from google.cloud.bigquery.schema import PolicyTagList, ForeignTypeInfo + import unittest from unittest import mock @@ -1108,3 +1109,36 @@ def test_to_api_repr_parameterized(field, api): from google.cloud.bigquery.schema import SchemaField assert SchemaField(**field).to_api_repr() == api + + +class TestForeignTypeSystem: + """TODO: add doc string.""" + + def test_foreign_type_system_constructor_valid_type_system(self): + foreign_type_info = ForeignTypeInfo("my_type_system") + assert foreign_type_info.type_system == "my_type_system" + + @pytest.mark.parametrize("value", [(42), (None)]) + def test_foreign_type_system_constructor_invalid_type_system(self, value): + foreign_type_info = ForeignTypeInfo("my_type_system") + with pytest.raises(ValueError) as exc_info: + foreign_type_info.type_system = value + assert "Pass type_system as a 'str'." in str(exc_info.value) + + @pytest.mark.parametrize( + "type_system, expected_api_repr", + [ + ("TYPE_SYSTEM_UNSPECIFIED", {"typeSystem": "TYPE_SYSTEM_UNSPECIFIED"}), + ("HIVE", {"typeSystem": "HIVE"}), + ], + ) + def test_to_api_repr(self, type_system, expected_api_repr): + foreign_type_info = ForeignTypeInfo(type_system) + actual = foreign_type_info.to_api_repr() + assert actual == expected_api_repr + + def test_to_api_repr_type_system_unspecified(self): + foreign_type_info = ForeignTypeInfo() + expected_api_repr = {"typeSystem": "TYPE_SYSTEM_UNSPECIFIED"} + actual = foreign_type_info.to_api_repr() + assert actual == expected_api_repr From 199e903e2540a1d7716ced42a261f4752fe0b6ae Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Wed, 2 Oct 2024 14:25:35 +0000 Subject: [PATCH 12/28] Updates isinstance_or_raise, refines ExternalCatalogDatasetOptions internals --- google/cloud/bigquery/_helpers.py | 35 +++++++++++++++++++--- google/cloud/bigquery/external_config.py | 38 ++++++++++++++---------- tests/unit/test__helpers.py | 38 ++++++++++++++++++++++++ tests/unit/test_external_config.py | 10 ++++--- 4 files changed, 98 insertions(+), 23 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index dfc303eb9..d9481b8a6 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -23,7 +23,7 @@ import re import os import warnings -from typing import Optional, Union +from typing import Optional, Union, Any, Tuple from dateutil import relativedelta from google.cloud._helpers import UTC # type: ignore @@ -1031,7 +1031,34 @@ def from_api_repr(cls, resource: dict): return config -def _isinstance_raise(value, dtype): - if not isinstance(value, dtype): - raise TypeError(f"Pass {value} as a 'repr({dtype})'. " f"Got {type(value)}.") +def _isinstance_or_raise(value: Any, dtype: Optional[Union[Any, Tuple[Any, ...]]]): + """Determine whether a value type matches a given datatype or None. + + Args: + value (Any): Value to be checked. + dtype (Optional[Union[Any, Tuple[Any, ...]]]): Expected data type(s). + + Returns: + Any: Returns the input value if the type check is successful. + + Raises: + TypeError: If the input value's type does not match the expected data type(s). """ + + # Simplest case + if dtype is None and value is None: + return value + + elif isinstance(dtype, tuple): + # Another simple case + if None in dtype and value is None: + return value + # Iterate through the tuple and check if value is an instance of any type + if not any(isinstance(value, t) for t in dtype if t is not None): + valid_types_str = ", ".join(str(t) for t in dtype if t is not None) + msg = f"Pass {value} as one of '{valid_types_str}' or None. Got {type(value)}." + raise TypeError(msg) + else: + if not isinstance(value, dtype): + msg = f"Pass {value} as a '{dtype}'. Got {type(value)}." + raise TypeError(msg) return value diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 03cafdb0f..7df4ddac5 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -24,12 +24,14 @@ import copy from typing import Any, Dict, FrozenSet, Iterable, Optional, Union -from google.cloud.bigquery._helpers import _to_bytes -from google.cloud.bigquery._helpers import _bytes_to_json -from google.cloud.bigquery._helpers import _int_or_none -from google.cloud.bigquery._helpers import _str_or_none -from google.cloud.bigquery._helpers import _isinstance_raise -from google.cloud.bigquery._helpers import ResourceBase +from google.cloud.bigquery._helpers import ( + _to_bytes, + _bytes_to_json, + _int_or_none, + _str_or_none, + _isinstance_or_raise, + ResourceBase, +) from google.cloud.bigquery.format_options import AvroOptions, ParquetOptions from google.cloud.bigquery.schema import SchemaField @@ -1026,8 +1028,8 @@ def __init__( parameters: Optional[Dict[str, Any]] = None, ): self._properties = {} - self._properties["defaultStorageLocationUri"] = default_storage_location_uri - self._properties["parameters"] = parameters + self.default_storage_location_uri = default_storage_location_uri + self.parameters = parameters @property def default_storage_location_uri(self) -> Any: @@ -1038,8 +1040,8 @@ def default_storage_location_uri(self) -> Any: return self._properties.get("defaultStorageLocationUri") @default_storage_location_uri.setter - def default_storage_location_uri(self, value: str) -> str: - value = _isinstance_raise(value, str) + def default_storage_location_uri(self, value: str): + value = _isinstance_or_raise(value, (str, None)) self._properties["defaultStorageLocationUri"] = value @property @@ -1050,8 +1052,8 @@ def parameters(self) -> Any: return self._properties.get("parameters") @parameters.setter - def parameters(self, value: dict[str, Any]) -> str: - value = _isinstance_raise(value, dict) + def parameters(self, value: dict[str, Any]): + value = _isinstance_or_raise(value, (dict, None)) self._properties["parameters"] = value def to_api_repr(self) -> dict: @@ -1091,6 +1093,15 @@ def __init__( ] = None, # TODO implement StorageDescriptor, then correct this type hint ): self._properties = {} # type: Dict[str, Any] + self._properties["connectionId"] = connection_id + self._properties["parameters"] = parameters + self._properties["storageDescriptor"] = storage_descriptor + + # TODO: revise to create validators + @property + def connection_id(self, value: str): + + if not isinstance(connection_id, str) and connection_id is not None: raise TypeError( "Pass connection_id as a 'str' or None. " f"Got {repr(connection_id)}." @@ -1106,9 +1117,6 @@ def __init__( "Pass storage_descriptor as a 'StorageDescriptor' object. " f"Got {repr(storage_descriptor)}." ) - self._properties["connectionId"] = connection_id - self._properties["parameters"] = parameters - self._properties["storageDescriptor"] = storage_descriptor def to_api_repr(self) -> dict: """Build an API representation of this object. diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 0a307498f..fad28a0ad 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -24,6 +24,7 @@ from unittest import mock import google.api_core +from google.cloud.bigquery._helpers import _isinstance_or_raise @pytest.mark.skipif( @@ -1661,3 +1662,40 @@ def test_w_env_var(self): host = self._call_fut() self.assertEqual(host, HOST) + + +class Test__isinstance_or_raise: + @pytest.mark.parametrize( + "value,dtype,expected", + [ + (None, None, None), + ('hello world.uri', str, 'hello world.uri'), + (None, (str, None), None), + (None, (None, str), None), + (None, (str, None), None), + ('hello world.uri', (None, str), 'hello world.uri'), + ('hello world.uri', (str, None), 'hello world.uri'), + ], + ) + def test__valid_isinstance_or_raise(self, value, dtype, expected): + result = _isinstance_or_raise(value, dtype) + + assert result == expected + + @pytest.mark.parametrize( + "value,dtype,expected", + [ + (None, str, pytest.raises(TypeError)), + ({"key": "value"}, str, pytest.raises(TypeError)), + ({"key": "value"}, None, pytest.raises(TypeError)), + ({"key": "value"}, (str, None), pytest.raises(TypeError)), + ({"key": "value"}, (None, str), pytest.raises(TypeError)), + ], + ) + def test__invalid_isinstance_or_raise(self, value, dtype, expected): + with expected as e: + result = _isinstance_or_raise(value, dtype) + + assert result == e + + diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 01fd42916..83328ed7c 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -933,11 +933,13 @@ def test_ctor_explicit( def test_ctor_invalid_input(self): """Test ExternalCatalogDatasetOptions constructor with invalid input.""" + with pytest.raises(TypeError) as e: + result = self._make_one(default_storage_location_uri=123) + assert result == e + with pytest.raises(TypeError) as e: + result = self._make_one(parameters=123) + assert result == e - with pytest.raises(TypeError): - self._make_one(default_storage_location_uri=123) - with pytest.raises(TypeError): - self._make_one(parameters=123) def test_to_api_repr(self): """Test ExternalCatalogDatasetOptions.to_api_repr method.""" From e238ba03242f7fdb57eddabae963cebf48dd0a19 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Wed, 2 Oct 2024 16:43:54 +0000 Subject: [PATCH 13/28] Updates ExternalCatalogTableOptions and associated tests --- google/cloud/bigquery/external_config.py | 64 ++++++++++++++++-------- tests/unit/test_external_config.py | 12 +---- 2 files changed, 43 insertions(+), 33 deletions(-) diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 7df4ddac5..8bec70056 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -1093,30 +1093,50 @@ def __init__( ] = None, # TODO implement StorageDescriptor, then correct this type hint ): self._properties = {} # type: Dict[str, Any] - self._properties["connectionId"] = connection_id - self._properties["parameters"] = parameters - self._properties["storageDescriptor"] = storage_descriptor + self.connection_id = connection_id + self.parameters = parameters + self.storage_descriptor = storage_descriptor + + @property + def connection_id(self): + """Optional. The connection specifying the credentials to be + used to read external storage, such as Azure Blob, Cloud Storage, or + S3. The connection is needed to read the open source table from + BigQuery Engine. The connection_id can have the form `..` or + `projects//locations//connections/`. (str) + """ + return self._properties.get("connectionId") + + @connection_id.setter + def connection_id(self, value: Optional[str]): + value = _isinstance_or_raise(value, (str, None)) + self._properties["connectionId"] = value - # TODO: revise to create validators @property - def connection_id(self, value: str): - - - if not isinstance(connection_id, str) and connection_id is not None: - raise TypeError( - "Pass connection_id as a 'str' or None. " f"Got {repr(connection_id)}." - ) - if not isinstance(parameters, dict) and parameters is not None: - raise TypeError( - "Pass parameters as a 'dict' or None. " f"Got {repr(parameters)}." - ) - if ( - not isinstance(storage_descriptor, str) and storage_descriptor is not None - ): # TODO implement StorageDescriptor, correct this type hint - raise TypeError( - "Pass storage_descriptor as a 'StorageDescriptor' object. " - f"Got {repr(storage_descriptor)}." - ) + def parameters(self) -> Any: + """Optional. A map of key value pairs defining the parameters and + properties of the open source table. Corresponds with hive meta + store table parameters. Maximum size of 4Mib.""" + + return self._properties.get("parameters") + + @parameters.setter + def parameters(self, value: Union[Dict[str, Any], None]): + value = _isinstance_or_raise(value, (dict, None)) + self._properties["parameters"] = value + + @property + def storage_descriptor(self) -> Any: + """Optional. A storage descriptor containing information about the + physical storage of this table.""" + + return self._properties.get("storageDescriptor") + + @storage_descriptor.setter + def storage_descriptor(self, value: Optional[str]): + value = _isinstance_or_raise(value, (str, None)) + self._properties["storageDescriptor"] = value + def to_api_repr(self) -> dict: """Build an API representation of this object. diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 83328ed7c..ce45f76a5 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -957,8 +957,6 @@ def test_to_api_repr(self): class TestExternalCatalogTableOptions: - # TODO add docstring(s) - @staticmethod def _get_target_class(): from google.cloud.bigquery.external_config import ExternalCatalogTableOptions @@ -968,11 +966,6 @@ def _get_target_class(): def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) - # connection_id: Optional[str] = None, - # parameters: Optional[dict] = None, - # storage_descriptor: Optional[ - # str - # ] = None @pytest.mark.parametrize( "connection_id,parameters,storage_descriptor", @@ -981,7 +974,7 @@ def _make_one(self, *args, **kw): ("connection123", None, None), # set only one parameter at a time (None, {"key": "value"}, None), (None, None, "placeholder"), - (None, None, None), # all default parameters + (None, None, None), # use default parameters ], ) def test_ctor_initialization(self, connection_id, parameters, storage_descriptor): @@ -1051,6 +1044,3 @@ def test_ctor_invalid_input( parameters=parameters, storage_descriptor=storage_descriptor, ) - - -# =============================================== From 5fc89ae7adf1040d79a30e04db96aef3cf75c0a1 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Wed, 2 Oct 2024 17:14:51 +0000 Subject: [PATCH 14/28] Tweaks several docstrings --- google/cloud/bigquery/_helpers.py | 3 ++- google/cloud/bigquery/external_config.py | 15 ++++++++------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index d9481b8a6..e753eada4 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -1042,7 +1042,8 @@ def _isinstance_or_raise(value: Any, dtype: Optional[Union[Any, Tuple[Any, ...]] Any: Returns the input value if the type check is successful. Raises: - TypeError: If the input value's type does not match the expected data type(s). """ + TypeError: If the input value's type does not match the expected data type(s). + """ # Simplest case if dtype is None and value is None: diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 8bec70056..69888c3cf 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -1072,16 +1072,16 @@ class ExternalCatalogTableOptions(ResourceBase): options correspond to hive metastore's table level properties. Args: - connection_id: Optional. The connection specifying the credentials to be + connection_id (Optional[str]): The connection specifying the credentials to be used to read external storage, such as Azure Blob, Cloud Storage, or S3. The connection is needed to read the open source table from BigQuery Engine. The connection_id can have the form `..` or - `projects//locations//connections/`. (str) - parameters: Optional. A map of key value pairs defining the parameters + `projects//locations//connections/`. + parameters (Union[Dict[str, Any], None]): A map of key value pairs defining the parameters and properties of the open source table. Corresponds with hive meta - store table parameters. Maximum size of 4Mib. (dict) - storage_descriptor: Optional. A storage descriptor containing information - about the physical storage of this table. (StorageDescriptor) + store table parameters. Maximum size of 4Mib. + storage_descriptor (Optional[StorageDescriptor]): A storage descriptor containing information + about the physical storage of this table. """ def __init__( @@ -1116,7 +1116,8 @@ def connection_id(self, value: Optional[str]): def parameters(self) -> Any: """Optional. A map of key value pairs defining the parameters and properties of the open source table. Corresponds with hive meta - store table parameters. Maximum size of 4Mib.""" + store table parameters. Maximum size of 4Mib. + """ return self._properties.get("parameters") From 68d04f06bd4143a8c391a7b2245642dcf00e2474 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Wed, 2 Oct 2024 19:40:44 +0000 Subject: [PATCH 15/28] Adds content related to ForeignTypeInfo --- google/cloud/bigquery/schema.py | 29 ++++++++++--------- tests/unit/test_schema.py | 51 +++++++++++++++++++-------------- 2 files changed, 45 insertions(+), 35 deletions(-) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 7c24c6b97..5d046e72d 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -20,7 +20,7 @@ from typing import Any, Dict, Iterable, Optional, Union, cast from google.cloud.bigquery import standard_sql -from google.cloud.bigquery._helpers import ResourceBase +from google.cloud.bigquery._helpers import ResourceBase, _isinstance_or_raise from google.cloud.bigquery.enums import StandardSqlTypeNames @@ -596,16 +596,15 @@ class TableSchema(ResourceBase): """Schema of a table Args: - fields (list): Describes the fields in a table. - foreignTypeInfo: Optional. Specifies metadata of the foreign data type - definition in field schema - (TableFieldSchema.foreign_type_definition). + fields (Optional[list]): Describes the fields in a table. + foreignTypeInfo (Optional[str]): Specifies metadata of the foreign data type + definition in field schema. """ - def __init__(self, fields: list = None, foreign_type_info: Optional[str] = None): + def __init__(self, fields: Optional[list] = None, foreign_type_info: Optional[str] = None): self._properties = {} - self._properties["fields"] = fields - self._properties["foreignTypeInfo"] = foreign_type_info + self.fields = fields + self.foreign_type_info = foreign_type_info @property def fields(self) -> Any: @@ -615,8 +614,7 @@ def fields(self) -> Any: @fields.setter def fields(self, value: list, dtype: str) -> str: - if not isinstance(value, list): - raise ValueError(f"Pass fields as a '{repr(dtype)}'." f"Got {type(value)}.") + value = _isinstance_or_raise(value, (list, None)) self._properties["fields"] = value @property @@ -653,9 +651,9 @@ class ForeignTypeInfo(ResourceBase): foreign data type. """ - def __init__(self, type_system_="TYPE_SYSTEM_UNSPECIFIED"): + def __init__(self, type_system="TYPE_SYSTEM_UNSPECIFIED"): self._properties = {} - self._properties["typeSystem"] = type_system_ + self.type_system = type_system @property def type_system(self): @@ -666,8 +664,7 @@ def type_system(self): @type_system.setter def type_system(self, value: str): - if not isinstance(value, str) or value is None: - raise ValueError("Pass type_system as a 'str'." f" Got {type(value)}.") + value = _isinstance_or_raise(value, (str, None)) self._properties["typeSystem"] = value def to_api_repr(self) -> dict: @@ -678,3 +675,7 @@ def to_api_repr(self) -> dict: A dictionary in the format used by the BigQuery API. """ return copy.deepcopy(self._properties) + + + + diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index d8a7b90cc..c8f8aaef8 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -1111,34 +1111,43 @@ def test_to_api_repr_parameterized(field, api): assert SchemaField(**field).to_api_repr() == api -class TestForeignTypeSystem: +class TestForeignTypeInfo: """TODO: add doc string.""" - def test_foreign_type_system_constructor_valid_type_system(self): - foreign_type_info = ForeignTypeInfo("my_type_system") - assert foreign_type_info.type_system == "my_type_system" + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.schema import ForeignTypeInfo + return ForeignTypeInfo + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + @pytest.mark.parametrize( + "type_system,expected", + [ + (None, None), + ("TYPE_SYSTEM_UNSPECIFIED", "TYPE_SYSTEM_UNSPECIFIED"), + ("HIVE", "HIVE"), + ], + ) + def test_ctor_valid_input(self, type_system, expected): + result = self._make_one(type_system=type_system) + + assert result._properties['typeSystem'] == expected - @pytest.mark.parametrize("value", [(42), (None)]) - def test_foreign_type_system_constructor_invalid_type_system(self, value): - foreign_type_info = ForeignTypeInfo("my_type_system") - with pytest.raises(ValueError) as exc_info: - foreign_type_info.type_system = value - assert "Pass type_system as a 'str'." in str(exc_info.value) + def test_ctor_invalid_input(self): + with pytest.raises(TypeError) as e: + result = self._make_one(type_system=123) + assert result == e @pytest.mark.parametrize( - "type_system, expected_api_repr", + "type_system,expected", [ ("TYPE_SYSTEM_UNSPECIFIED", {"typeSystem": "TYPE_SYSTEM_UNSPECIFIED"}), ("HIVE", {"typeSystem": "HIVE"}), + (None, {"typeSystem": None}), ], ) - def test_to_api_repr(self, type_system, expected_api_repr): - foreign_type_info = ForeignTypeInfo(type_system) - actual = foreign_type_info.to_api_repr() - assert actual == expected_api_repr - - def test_to_api_repr_type_system_unspecified(self): - foreign_type_info = ForeignTypeInfo() - expected_api_repr = {"typeSystem": "TYPE_SYSTEM_UNSPECIFIED"} - actual = foreign_type_info.to_api_repr() - assert actual == expected_api_repr + def test_to_api_repr(self, type_system, expected): + result = self._make_one(type_system=type_system) + assert result.to_api_repr() == expected From 2a5774e89eb6eb039ed9dec22d3ba39c4fe7cfce Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Thu, 3 Oct 2024 14:16:48 +0000 Subject: [PATCH 16/28] add new classes and tests --- google/cloud/bigquery/schema.py | 153 +++++++++++++++++++++++++++ tests/unit/test_external_config.py | 24 ++--- tests/unit/test_schema.py | 162 ++++++++++++++++++++++++++++- 3 files changed, 324 insertions(+), 15 deletions(-) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 5d046e72d..155c85bf1 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -677,5 +677,158 @@ def to_api_repr(self) -> dict: return copy.deepcopy(self._properties) +class StorageDescriptor(ResourceBase): + """Contains information about how a table's data is stored and accessed by open + source query engines. + Args: + inputFormat (Optional[str]): Specifies the fully qualified class name of + the InputFormat (e.g. + "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"). The maximum + length is 128 characters. + locationUri (Optional[str]): The physical location of the table (e.g. + `gs://spark-dataproc-data/pangea-data/case_sensitive/` or + `gs://spark-dataproc-data/pangea-data/*`). The maximum length is + 2056 bytes. + outputFormat (Optional[str]): Specifies the fully qualified class name + of the OutputFormat (e.g. + "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"). The maximum + length is 128 characters. + serdeInfo (Optional[Any]): Serializer and deserializer information. + """ + + def __init__(self, input_format: Optional[str] = None, location_uri: Optional[str] = None, output_format: Optional[str] = None, serde_info: Optional[Any] = None): + self._properties = {} + self.input_format = input_format + self.location_uri = location_uri + self.output_format = output_format + self.serde_info = serde_info + + @property + def input_format(self) -> Any: + '''Optional. Specifies the fully qualified class name of the InputFormat + (e.g. "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"). The maximum + length is 128 characters.''' + + return self._properties.get("inputFormat") + + @input_format.setter + def input_format(self, value: Optional[str]): + value = _isinstance_or_raise(value, (str, None)) + self._properties['inputFormat'] = value + + @property + def location_uri(self) -> Any: + '''Optional. The physical location of the table (e.g. `gs://spark- + dataproc-data/pangea-data/case_sensitive/` or `gs://spark-dataproc- + data/pangea-data/*`). The maximum length is 2056 bytes.''' + + return self._properties.get("locationUri") + + @location_uri.setter + def location_uri(self, value: Optional[str]): + value = _isinstance_or_raise(value, (str, None)) + self._properties['locationUri'] = value + + @property + def output_format(self) -> Any: + '''Optional. Specifies the fully qualified class name of the + OutputFormat (e.g. "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"). + The maximum length is 128 characters.''' + return self._properties.get("outputFormat") + + @output_format.setter + def output_format(self, value: Optional[str]): + value = _isinstance_or_raise(value, (str, None)) + self._properties['outputFormat'] = value + + @property + def serde_info(self) -> Any: + '''Optional. Serializer and deserializer information.''' + + return self._properties.get("serdeInfo") + + @serde_info.setter + def serde_info(self, value: Optional[Any]): + value = _isinstance_or_raise(value, (str, None)) #TODO fix, when serde class is done + self._properties['serdeInfo'] = value + + + def to_api_repr(self) -> dict: + """Build an API representation of this object. + + Returns: + Dict[str, Any]: + A dictionary in the format used by the BigQuery API. + """ + return copy.deepcopy(self._properties) + + +class SerDeInfo(ResourceBase): + """Serializer and deserializer information. + + Args: + serializationLibrary (str): Required. Specifies a fully-qualified class + name of the serialization library that is responsible for the + translation of data between table representation and the underlying + low-level input and output format structures. The maximum length is + 256 characters. + name (Optional[str]): Name of the SerDe. The maximum length is 256 + characters. + parameters: (Optional[dict[str, str]]): Key-value pairs that define the initialization + parameters for the serialization library. Maximum size 10 Kib. + """ + + def __init__(self, serialization_library: str, name: Optional[str] = None, parameters: Optional[dict[str, str]] = None): + self._properties = {} + self.serialization_library = serialization_library + self.name = name + self.parameters = parameters + + @property + def serialization_library(self) -> Any: + '''Required. Specifies a fully-qualified class name of the serialization + library that is responsible for the translation of data between table + representation and the underlying low-level input and output format + structures. The maximum length is 256 characters.''' + + return self._properties.get('serializationLibrary') + + @serialization_library.setter + def serialization_library(self, value: str): + value = _isinstance_or_raise(value, str) + self._properties['serializationLibrary'] = value + + + @property + def name(self) -> Any: + '''Optional. Name of the SerDe. The maximum length is 256 characters.''' + + return self._properties.get('name') + + @name.setter + def name(self, value: Optional[str] = None): + value = _isinstance_or_raise(value, (str, None)) + self._properties['name'] = value + + @property + def parameters(self) -> Any: + '''Optional. Key-value pairs that define the initialization parameters + for the serialization library. Maximum size 10 Kib.''' + + return self._properties.get('parameters') + + @parameters.setter + def parameters(self, value: Optional[dict[str, str]] = None): + value = _isinstance_or_raise(value, (dict, None)) + self._properties['parameters'] = value + + def to_api_repr(self) -> dict: + """Build an API representation of this object. + + Returns: + Dict[str, Any]: + A dictionary in the format used by the BigQuery API. + """ + return copy.deepcopy(self._properties) \ No newline at end of file diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index ce45f76a5..790e3d3f6 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -935,11 +935,11 @@ def test_ctor_invalid_input(self): """Test ExternalCatalogDatasetOptions constructor with invalid input.""" with pytest.raises(TypeError) as e: result = self._make_one(default_storage_location_uri=123) - assert result == e + assert "Pass" in str(e.value) + with pytest.raises(TypeError) as e: result = self._make_one(parameters=123) - assert result == e - + assert "Pass" in str(e.value) def test_to_api_repr(self): """Test ExternalCatalogDatasetOptions.to_api_repr method.""" @@ -1006,27 +1006,24 @@ def test_to_api_repr(self): } @pytest.mark.parametrize( - "connection_id, parameters, storage_descriptor, exception_class", + "connection_id, parameters, storage_descriptor", [ pytest.param( 123, - {"key": "value"}, - "placeholder", - TypeError, + {"test_key": "test_value"}, + "test placeholder", id="connection_id-invalid-type", ), pytest.param( "connection123", 123, - "placeholder", - TypeError, + "test placeholder", id="parameters-invalid-type", ), pytest.param( "connection123", - {"key": "value"}, + {"test_key": "test_value"}, 123, - TypeError, id="storage_descriptor-invalid-type", ), ], @@ -1036,11 +1033,12 @@ def test_ctor_invalid_input( connection_id: str, parameters: Dict[str, Any], storage_descriptor: str, - exception_class: TypeError, ): - with pytest.raises(exception_class): + with pytest.raises(TypeError) as e: external_config.ExternalCatalogTableOptions( connection_id=connection_id, parameters=parameters, storage_descriptor=storage_descriptor, ) + + assert "Pass" in str(e.value) diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index c8f8aaef8..60ffc9bf6 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -14,7 +14,12 @@ from google.cloud import bigquery from google.cloud.bigquery.standard_sql import StandardSqlStructType -from google.cloud.bigquery.schema import PolicyTagList, ForeignTypeInfo +from google.cloud.bigquery.schema import ( + PolicyTagList, + ForeignTypeInfo, + StorageDescriptor, + SerDeInfo, +) import unittest from unittest import mock @@ -1138,7 +1143,7 @@ def test_ctor_valid_input(self, type_system, expected): def test_ctor_invalid_input(self): with pytest.raises(TypeError) as e: result = self._make_one(type_system=123) - assert result == e + assert "Pass" in str(e.value) @pytest.mark.parametrize( "type_system,expected", @@ -1151,3 +1156,156 @@ def test_ctor_invalid_input(self): def test_to_api_repr(self, type_system, expected): result = self._make_one(type_system=type_system) assert result.to_api_repr() == expected + + +class TestStorageDescriptor: + """Tests for the StorageDescriptor class.""" + + @staticmethod + def _get_target_class(): + return StorageDescriptor + + def _make_one(self, *args, **kwargs): + return self._get_target_class()(*args, **kwargs) + + @pytest.mark.parametrize( + "input_format,location_uri,output_format,serde_info", + [ + (None, None, None, None), + ("testpath.to.OrcInputFormat", None, None, None), + (None, "gs://test/path/", None, None), + (None, None, "testpath.to.OrcOutputFormat", None), + (None, None, None, "TODO fix serde info"), + ("testpath.to.OrcInputFormat", "gs://test/path/", "testpath.to.OrcOutputFormat", "TODO fix serde info"), + ], + ) + def test_ctor_valid_input(self, input_format, location_uri, output_format, serde_info): + storage_descriptor = self._make_one( + input_format=input_format, + location_uri=location_uri, + output_format=output_format, + serde_info=serde_info, + ) + assert storage_descriptor._properties["inputFormat"] == input_format + assert storage_descriptor._properties["locationUri"] == location_uri + assert storage_descriptor._properties["outputFormat"] == output_format + assert storage_descriptor._properties["serdeInfo"] == serde_info + # QUESTION: which makes more sense? check against the background dict + # OR check against the getter attribute? + assert storage_descriptor.input_format == input_format + assert storage_descriptor.location_uri == location_uri + assert storage_descriptor.output_format == output_format + assert storage_descriptor.serde_info == serde_info + + + @pytest.mark.parametrize( + "arg,value", + [ + ("input_format", 123), + ("location", 123), + ("output_format", 123), + ("serde_info", 123), + ] + ) + def test_ctor_invalid_input(self, arg, value): + with pytest.raises(TypeError) as e: + result = self._make_one(arg=value) + assert "Pass" in str(e.value) + + + def test_to_api_repr(self): + storage_descriptor = self._make_one( + input_format="input_format", + location_uri="location_uri", + output_format="output_format", + serde_info="TODO fix serde info", + ) + expected_repr = { + "inputFormat": "input_format", + "locationUri": "location_uri", + "outputFormat": "output_format", + "serdeInfo": "TODO fix serde info", + } + assert storage_descriptor.to_api_repr() == expected_repr + + + + + + ''' TODO, consider these when SERDE if finished... + @pytest.mark.parametrize( + "serde_info,expected", + [ + ({"key": "value"}, {"key": "value"}), + ([1, 2, 3], [1, 2, 3]), + (123, 123), + ("string", "string"), + ], + ) + ''' + +class TestSerDeInfo: + """Tests for the SerDeInfo class.""" + + @staticmethod + def _get_target_class(): + return SerDeInfo + + def _make_one(self, *args, **kwargs): + return self._get_target_class()(*args, **kwargs) + + @pytest.mark.parametrize( + "serialization_library,name,parameters", + [ + ("testpath.to.LazySimpleSerDe", None, None), + ("testpath.to.LazySimpleSerDe", "serde_name", None), + ("testpath.to.LazySimpleSerDe", None, {"key": "value"}), + ("testpath.to.LazySimpleSerDe", "serde_name", {"key": "value"}), + ], + ) + def test_ctor_valid_input(self, serialization_library, name, parameters): + serde_info = self._make_one( + serialization_library=serialization_library, + name=name, + parameters=parameters, + ) + assert serde_info._properties["serializationLibrary"] == serialization_library + assert serde_info._properties["name"] == name + assert serde_info._properties["parameters"] == parameters + # QUESTION: which makes more sense? check against the background dict + # OR check against the getter attribute? + assert serde_info.serialization_library == serialization_library + assert serde_info.name == name + assert serde_info.parameters == parameters + + @pytest.mark.parametrize( + "serialization_library,name,parameters", + [ + (123, None, None), + ("testpath.to.LazySimpleSerDe", 123, None), + ("testpath.to.LazySimpleSerDe", None, ['test', 'list']), + ("testpath.to.LazySimpleSerDe", None, 123), + ], + ) + def test_ctor_invalid_input(self, serialization_library, name, parameters): + with pytest.raises(TypeError) as e: + result = self._make_one( + serialization_library=serialization_library, + name=name, + parameters=parameters, + ) + + assert "Pass" in str(e.value) + + def test_to_api_repr(self): + serde_info = self._make_one( + serialization_library="testpath.to.LazySimpleSerDe", + name="serde_name", + parameters={"key": "value"}, + ) + expected_repr = { + "serializationLibrary": "testpath.to.LazySimpleSerDe", + "name": "serde_name", + "parameters": {"key": "value"}, + } + assert serde_info.to_api_repr() == expected_repr \ No newline at end of file From 0fcf42450810e8c0124c0f73598f6814df939dcd Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Thu, 3 Oct 2024 10:19:36 -0400 Subject: [PATCH 17/28] Update tests/unit/test_schema.py --- tests/unit/test_schema.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 60ffc9bf6..4e45f6fc6 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -1232,17 +1232,6 @@ def test_to_api_repr(self): - ''' TODO, consider these when SERDE if finished... - @pytest.mark.parametrize( - "serde_info,expected", - [ - ({"key": "value"}, {"key": "value"}), - ([1, 2, 3], [1, 2, 3]), - (123, 123), - ("string", "string"), - ], - ) - ''' class TestSerDeInfo: """Tests for the SerDeInfo class.""" From d7698d269ca2c65b613cf7b806eb1598f4e80fe7 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Fri, 11 Oct 2024 09:12:05 -0400 Subject: [PATCH 18/28] Update google/cloud/bigquery/_helpers.py Co-authored-by: Suzy Mueller --- google/cloud/bigquery/_helpers.py | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index e753eada4..ca3b220b8 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -1031,12 +1031,13 @@ def from_api_repr(cls, resource: dict): return config -def _isinstance_or_raise(value: Any, dtype: Optional[Union[Any, Tuple[Any, ...]]]): +def _isinstance_or_raise(value: Any, dtype: type, none_allowed: Optional[bool]): """Determine whether a value type matches a given datatype or None. Args: value (Any): Value to be checked. - dtype (Optional[Union[Any, Tuple[Any, ...]]]): Expected data type(s). + dtype (type): Expected data type(s). + none_allowed Optional(bool): whether value is allowed to be None. Returns: Any: Returns the input value if the type check is successful. @@ -1044,22 +1045,11 @@ def _isinstance_or_raise(value: Any, dtype: Optional[Union[Any, Tuple[Any, ...]] Raises: TypeError: If the input value's type does not match the expected data type(s). """ - - # Simplest case - if dtype is None and value is None: + if none_allowed and value is None: + return value + + if isinstance(value, dtype): return value - elif isinstance(dtype, tuple): - # Another simple case - if None in dtype and value is None: - return value - # Iterate through the tuple and check if value is an instance of any type - if not any(isinstance(value, t) for t in dtype if t is not None): - valid_types_str = ", ".join(str(t) for t in dtype if t is not None) - msg = f"Pass {value} as one of '{valid_types_str}' or None. Got {type(value)}." - raise TypeError(msg) - else: - if not isinstance(value, dtype): - msg = f"Pass {value} as a '{dtype}'. Got {type(value)}." - raise TypeError(msg) - return value + msg = f"Pass {value} as a '{dtype}' (or None). Got {type(value)}." # Add the 'or None' conditionally + raise TypeError(msg) From 43dc45ef17922ae23efe69b1b48351c865529f61 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Fri, 11 Oct 2024 15:12:37 +0000 Subject: [PATCH 19/28] updates logic and tests related to _isinstance_or_raise' --- google/cloud/bigquery/_helpers.py | 19 ++++++++++++----- tests/unit/test__helpers.py | 35 +++++++++++++++---------------- 2 files changed, 31 insertions(+), 23 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index ca3b220b8..b36c2b0c8 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -23,7 +23,7 @@ import re import os import warnings -from typing import Optional, Union, Any, Tuple +from typing import Optional, Union, Any, Tuple, Type from dateutil import relativedelta from google.cloud._helpers import UTC # type: ignore @@ -1031,13 +1031,18 @@ def from_api_repr(cls, resource: dict): return config -def _isinstance_or_raise(value: Any, dtype: type, none_allowed: Optional[bool]): +def _isinstance_or_raise( + value: Any, + dtype: Union[Type, Tuple[Type, ...]], + none_allowed: Optional[bool]=False, + ) -> Any: """Determine whether a value type matches a given datatype or None. Args: value (Any): Value to be checked. - dtype (type): Expected data type(s). - none_allowed Optional(bool): whether value is allowed to be None. + dtype (type): Expected data type or tuple of data types. + none_allowed Optional(bool): whether value is allowed to be None. Default + is False. Returns: Any: Returns the input value if the type check is successful. @@ -1051,5 +1056,9 @@ def _isinstance_or_raise(value: Any, dtype: type, none_allowed: Optional[bool]): if isinstance(value, dtype): return value - msg = f"Pass {value} as a '{dtype}' (or None). Got {type(value)}." # Add the 'or None' conditionally + or_none = '' + if none_allowed: + or_none = ' (or None)' + + msg = f"Pass {value} as a '{dtype}'{or_none}. Got {type(value)}." raise TypeError(msg) diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index fad28a0ad..e246e1df8 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -1666,35 +1666,34 @@ def test_w_env_var(self): class Test__isinstance_or_raise: @pytest.mark.parametrize( - "value,dtype,expected", + "value,dtype,none_allowed,expected", [ - (None, None, None), - ('hello world.uri', str, 'hello world.uri'), - (None, (str, None), None), - (None, (None, str), None), - (None, (str, None), None), - ('hello world.uri', (None, str), 'hello world.uri'), - ('hello world.uri', (str, None), 'hello world.uri'), + (None, str, True, None), + ('hello world.uri', str, True, 'hello world.uri'), + ('hello world.uri', str, False, 'hello world.uri'), + (None, (str, float), True, None), + ('hello world.uri', (str, float), True, 'hello world.uri'), + ('hello world.uri', (str, float), False, 'hello world.uri'), ], ) - def test__valid_isinstance_or_raise(self, value, dtype, expected): - result = _isinstance_or_raise(value, dtype) + def test__valid_isinstance_or_raise(self, value, dtype, none_allowed, expected): + result = _isinstance_or_raise(value, dtype, none_allowed=none_allowed) assert result == expected @pytest.mark.parametrize( - "value,dtype,expected", + "value,dtype,none_allowed,expected", [ - (None, str, pytest.raises(TypeError)), - ({"key": "value"}, str, pytest.raises(TypeError)), - ({"key": "value"}, None, pytest.raises(TypeError)), - ({"key": "value"}, (str, None), pytest.raises(TypeError)), - ({"key": "value"}, (None, str), pytest.raises(TypeError)), + (None, str, False, pytest.raises(TypeError)), + ({"key": "value"}, str, True, pytest.raises(TypeError)), + ({"key": "value"}, str, False, pytest.raises(TypeError)), + ({"key": "value"}, (str, float), True, pytest.raises(TypeError)), + ({"key": "value"}, (str, float), False, pytest.raises(TypeError)), ], ) - def test__invalid_isinstance_or_raise(self, value, dtype, expected): + def test__invalid_isinstance_or_raise(self, value, dtype, none_allowed, expected): with expected as e: - result = _isinstance_or_raise(value, dtype) + result = _isinstance_or_raise(value, dtype, none_allowed=none_allowed) assert result == e From 4f117a78e08e3305d838600a7930bf8f6bbfd6ef Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Fri, 11 Oct 2024 16:52:49 +0000 Subject: [PATCH 20/28] updates from_api_repr and a number of tests and cleans up miscellaneous cruft' --- google/cloud/bigquery/_helpers.py | 54 +++++------ google/cloud/bigquery/external_config.py | 23 +++-- google/cloud/bigquery/query.py | 1 - google/cloud/bigquery/schema.py | 116 ++++++++++++++--------- tests/unit/test__helpers.py | 14 ++- tests/unit/test_external_config.py | 7 +- tests/unit/test_schema.py | 57 ++++++----- 7 files changed, 151 insertions(+), 121 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index b36c2b0c8..97933a197 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -1007,35 +1007,11 @@ def _verify_job_config_type(job_config, expected_type, param_name="job_config"): ) -class ResourceBase: - """Base class providing the from_api_repr method.""" - - def __init__(self): - self._properties = {} - - @classmethod - def from_api_repr(cls, resource: dict): - """Factory: constructs an instance of the class (cls) - given its API representation. - - Args: - resource (Dict[str, Any]): - API representation of the object to be instantiated. - - Returns: - ResourceBase: An instance of the class initialized with data - from 'resource'. - """ - config = cls() - config._properties = copy.deepcopy(resource) - return config - - def _isinstance_or_raise( - value: Any, - dtype: Union[Type, Tuple[Type, ...]], - none_allowed: Optional[bool]=False, - ) -> Any: + value: Any, + dtype: Union[Type, Tuple[Type, ...], None], + none_allowed: Optional[bool] = False, +) -> Any: """Determine whether a value type matches a given datatype or None. Args: @@ -1055,10 +1031,26 @@ def _isinstance_or_raise( if isinstance(value, dtype): return value - - or_none = '' + + or_none = "" if none_allowed: - or_none = ' (or None)' + or_none = " (or None)" msg = f"Pass {value} as a '{dtype}'{or_none}. Got {type(value)}." raise TypeError(msg) + + +def _from_api_repr(obj, resource: dict): + """Factory: constructs an instance of the class (cls) + given its API representation. + + Args: + resource (Dict[str, Any]): + API representation of the object to be instantiated. + + Returns: + An instance of the class initialized with data from 'resource'. + """ + config = obj + config._properties = copy.deepcopy(resource) + return config diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 69888c3cf..b799f8ea0 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -30,7 +30,7 @@ _int_or_none, _str_or_none, _isinstance_or_raise, - ResourceBase, + _from_api_repr, ) from google.cloud.bigquery.format_options import AvroOptions, ParquetOptions from google.cloud.bigquery.schema import SchemaField @@ -1009,7 +1009,7 @@ def from_api_repr(cls, resource: dict) -> "ExternalConfig": return config -class ExternalCatalogDatasetOptions(ResourceBase): +class ExternalCatalogDatasetOptions: """Options defining open source compatible datasets living in the BigQuery catalog. Contains metadata of open source database, schema or namespace represented by the current dataset. @@ -1041,7 +1041,7 @@ def default_storage_location_uri(self) -> Any: @default_storage_location_uri.setter def default_storage_location_uri(self, value: str): - value = _isinstance_or_raise(value, (str, None)) + value = _isinstance_or_raise(value, str, none_allowed=True) self._properties["defaultStorageLocationUri"] = value @property @@ -1053,7 +1053,7 @@ def parameters(self) -> Any: @parameters.setter def parameters(self, value: dict[str, Any]): - value = _isinstance_or_raise(value, (dict, None)) + value = _isinstance_or_raise(value, dict, none_allowed=True) self._properties["parameters"] = value def to_api_repr(self) -> dict: @@ -1066,8 +1066,11 @@ def to_api_repr(self) -> dict: config = copy.deepcopy(self._properties) return config + def from_api_repr(self, resource): + return _from_api_repr(self, resource) -class ExternalCatalogTableOptions(ResourceBase): + +class ExternalCatalogTableOptions: """Metadata about open source compatible table. The fields contained in these options correspond to hive metastore's table level properties. @@ -1109,7 +1112,7 @@ def connection_id(self): @connection_id.setter def connection_id(self, value: Optional[str]): - value = _isinstance_or_raise(value, (str, None)) + value = _isinstance_or_raise(value, str, none_allowed=True) self._properties["connectionId"] = value @property @@ -1123,7 +1126,7 @@ def parameters(self) -> Any: @parameters.setter def parameters(self, value: Union[Dict[str, Any], None]): - value = _isinstance_or_raise(value, (dict, None)) + value = _isinstance_or_raise(value, dict, none_allowed=True) self._properties["parameters"] = value @property @@ -1135,9 +1138,8 @@ def storage_descriptor(self) -> Any: @storage_descriptor.setter def storage_descriptor(self, value: Optional[str]): - value = _isinstance_or_raise(value, (str, None)) + value = _isinstance_or_raise(value, str, none_allowed=True) self._properties["storageDescriptor"] = value - def to_api_repr(self) -> dict: """Build an API representation of this object. @@ -1148,3 +1150,6 @@ def to_api_repr(self) -> dict: """ config = copy.deepcopy(self._properties) return config + + def from_api_repr(self, resource): + return _from_api_repr(self, resource) diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index f1090a7dc..a95c91346 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -1003,7 +1003,6 @@ def __init__( ): self.name = name self.range_element_type = self._parse_range_element_type(range_element_type) - print(self.range_element_type.type_._type) self.start = start self.end = end diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 155c85bf1..aa97f7572 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -20,7 +20,10 @@ from typing import Any, Dict, Iterable, Optional, Union, cast from google.cloud.bigquery import standard_sql -from google.cloud.bigquery._helpers import ResourceBase, _isinstance_or_raise +from google.cloud.bigquery._helpers import ( + _isinstance_or_raise, + _from_api_repr, +) from google.cloud.bigquery.enums import StandardSqlTypeNames @@ -592,7 +595,7 @@ def to_api_repr(self) -> dict: return answer -class TableSchema(ResourceBase): +class TableSchema: """Schema of a table Args: @@ -601,7 +604,9 @@ class TableSchema(ResourceBase): definition in field schema. """ - def __init__(self, fields: Optional[list] = None, foreign_type_info: Optional[str] = None): + def __init__( + self, fields: Optional[list] = None, foreign_type_info: Optional[str] = None + ): self._properties = {} self.fields = fields self.foreign_type_info = foreign_type_info @@ -614,7 +619,7 @@ def fields(self) -> Any: @fields.setter def fields(self, value: list, dtype: str) -> str: - value = _isinstance_or_raise(value, (list, None)) + value = _isinstance_or_raise(value, list, none_allowed=True) self._properties["fields"] = value @property @@ -641,8 +646,11 @@ def to_api_repr(self) -> dict: """ return copy.deepcopy(self._properties) + def from_api_repr(self, resource): + return _from_api_repr(self, resource) -class ForeignTypeInfo(ResourceBase): + +class ForeignTypeInfo: """Metadata about the foreign data type definition such as the system in which the type is defined. @@ -664,7 +672,7 @@ def type_system(self): @type_system.setter def type_system(self, value: str): - value = _isinstance_or_raise(value, (str, None)) + value = _isinstance_or_raise(value, str, none_allowed=True) self._properties["typeSystem"] = value def to_api_repr(self) -> dict: @@ -676,8 +684,11 @@ def to_api_repr(self) -> dict: """ return copy.deepcopy(self._properties) + def from_api_repr(self, resource): + return _from_api_repr(self, resource) + -class StorageDescriptor(ResourceBase): +class StorageDescriptor: """Contains information about how a table's data is stored and accessed by open source query engines. @@ -697,63 +708,70 @@ class StorageDescriptor(ResourceBase): serdeInfo (Optional[Any]): Serializer and deserializer information. """ - def __init__(self, input_format: Optional[str] = None, location_uri: Optional[str] = None, output_format: Optional[str] = None, serde_info: Optional[Any] = None): + def __init__( + self, + input_format: Optional[str] = None, + location_uri: Optional[str] = None, + output_format: Optional[str] = None, + serde_info: Optional[Any] = None, + ): self._properties = {} self.input_format = input_format self.location_uri = location_uri self.output_format = output_format - self.serde_info = serde_info - + self.serde_info = serde_info + @property def input_format(self) -> Any: - '''Optional. Specifies the fully qualified class name of the InputFormat + """Optional. Specifies the fully qualified class name of the InputFormat (e.g. "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"). The maximum - length is 128 characters.''' + length is 128 characters.""" return self._properties.get("inputFormat") @input_format.setter def input_format(self, value: Optional[str]): - value = _isinstance_or_raise(value, (str, None)) - self._properties['inputFormat'] = value + value = _isinstance_or_raise(value, str, none_allowed=True) + self._properties["inputFormat"] = value @property def location_uri(self) -> Any: - '''Optional. The physical location of the table (e.g. `gs://spark- + """Optional. The physical location of the table (e.g. `gs://spark- dataproc-data/pangea-data/case_sensitive/` or `gs://spark-dataproc- - data/pangea-data/*`). The maximum length is 2056 bytes.''' + data/pangea-data/*`). The maximum length is 2056 bytes.""" return self._properties.get("locationUri") @location_uri.setter def location_uri(self, value: Optional[str]): - value = _isinstance_or_raise(value, (str, None)) - self._properties['locationUri'] = value + value = _isinstance_or_raise(value, str, none_allowed=True) + self._properties["locationUri"] = value @property def output_format(self) -> Any: - '''Optional. Specifies the fully qualified class name of the + """Optional. Specifies the fully qualified class name of the OutputFormat (e.g. "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"). - The maximum length is 128 characters.''' + The maximum length is 128 characters.""" return self._properties.get("outputFormat") @output_format.setter def output_format(self, value: Optional[str]): - value = _isinstance_or_raise(value, (str, None)) - self._properties['outputFormat'] = value + value = _isinstance_or_raise(value, str, none_allowed=True) + self._properties["outputFormat"] = value @property def serde_info(self) -> Any: - '''Optional. Serializer and deserializer information.''' + """Optional. Serializer and deserializer information.""" return self._properties.get("serdeInfo") @serde_info.setter def serde_info(self, value: Optional[Any]): - value = _isinstance_or_raise(value, (str, None)) #TODO fix, when serde class is done - self._properties['serdeInfo'] = value - + value = _isinstance_or_raise( + value, str, none_allowed=True + ) # TODO fix, when serde class is done + self._properties["serdeInfo"] = value def to_api_repr(self) -> dict: """Build an API representation of this object. @@ -764,8 +782,11 @@ def to_api_repr(self) -> dict: """ return copy.deepcopy(self._properties) + def from_api_repr(self, resource): + return _from_api_repr(self, resource) + -class SerDeInfo(ResourceBase): +class SerDeInfo: """Serializer and deserializer information. Args: @@ -780,49 +801,53 @@ class SerDeInfo(ResourceBase): parameters for the serialization library. Maximum size 10 Kib. """ - def __init__(self, serialization_library: str, name: Optional[str] = None, parameters: Optional[dict[str, str]] = None): + def __init__( + self, + serialization_library: str, + name: Optional[str] = None, + parameters: Optional[dict[str, str]] = None, + ): self._properties = {} - self.serialization_library = serialization_library + self.serialization_library = serialization_library self.name = name self.parameters = parameters @property def serialization_library(self) -> Any: - '''Required. Specifies a fully-qualified class name of the serialization + """Required. Specifies a fully-qualified class name of the serialization library that is responsible for the translation of data between table representation and the underlying low-level input and output format - structures. The maximum length is 256 characters.''' + structures. The maximum length is 256 characters.""" - return self._properties.get('serializationLibrary') + return self._properties.get("serializationLibrary") @serialization_library.setter def serialization_library(self, value: str): value = _isinstance_or_raise(value, str) - self._properties['serializationLibrary'] = value + self._properties["serializationLibrary"] = value - @property def name(self) -> Any: - '''Optional. Name of the SerDe. The maximum length is 256 characters.''' + """Optional. Name of the SerDe. The maximum length is 256 characters.""" - return self._properties.get('name') + return self._properties.get("name") @name.setter def name(self, value: Optional[str] = None): - value = _isinstance_or_raise(value, (str, None)) - self._properties['name'] = value + value = _isinstance_or_raise(value, str, none_allowed=True) + self._properties["name"] = value @property def parameters(self) -> Any: - '''Optional. Key-value pairs that define the initialization parameters - for the serialization library. Maximum size 10 Kib.''' + """Optional. Key-value pairs that define the initialization parameters + for the serialization library. Maximum size 10 Kib.""" - return self._properties.get('parameters') + return self._properties.get("parameters") @parameters.setter def parameters(self, value: Optional[dict[str, str]] = None): - value = _isinstance_or_raise(value, (dict, None)) - self._properties['parameters'] = value + value = _isinstance_or_raise(value, dict, none_allowed=True) + self._properties["parameters"] = value def to_api_repr(self) -> dict: """Build an API representation of this object. @@ -831,4 +856,7 @@ def to_api_repr(self) -> dict: Dict[str, Any]: A dictionary in the format used by the BigQuery API. """ - return copy.deepcopy(self._properties) \ No newline at end of file + return copy.deepcopy(self._properties) + + def from_api_repr(self, resource): + return _from_api_repr(self, resource) diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index e246e1df8..276f1991d 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -1669,16 +1669,16 @@ class Test__isinstance_or_raise: "value,dtype,none_allowed,expected", [ (None, str, True, None), - ('hello world.uri', str, True, 'hello world.uri'), - ('hello world.uri', str, False, 'hello world.uri'), + ("hello world.uri", str, True, "hello world.uri"), + ("hello world.uri", str, False, "hello world.uri"), (None, (str, float), True, None), - ('hello world.uri', (str, float), True, 'hello world.uri'), - ('hello world.uri', (str, float), False, 'hello world.uri'), + ("hello world.uri", (str, float), True, "hello world.uri"), + ("hello world.uri", (str, float), False, "hello world.uri"), ], ) def test__valid_isinstance_or_raise(self, value, dtype, none_allowed, expected): result = _isinstance_or_raise(value, dtype, none_allowed=none_allowed) - + assert result == expected @pytest.mark.parametrize( @@ -1694,7 +1694,5 @@ def test__valid_isinstance_or_raise(self, value, dtype, none_allowed, expected): def test__invalid_isinstance_or_raise(self, value, dtype, none_allowed, expected): with expected as e: result = _isinstance_or_raise(value, dtype, none_allowed=none_allowed) - - assert result == e - + assert result == e diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 790e3d3f6..6944eeaca 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -934,11 +934,11 @@ def test_ctor_explicit( def test_ctor_invalid_input(self): """Test ExternalCatalogDatasetOptions constructor with invalid input.""" with pytest.raises(TypeError) as e: - result = self._make_one(default_storage_location_uri=123) + self._make_one(default_storage_location_uri=123) assert "Pass" in str(e.value) with pytest.raises(TypeError) as e: - result = self._make_one(parameters=123) + self._make_one(parameters=123) assert "Pass" in str(e.value) def test_to_api_repr(self): @@ -966,7 +966,6 @@ def _get_target_class(): def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) - @pytest.mark.parametrize( "connection_id,parameters,storage_descriptor", [ @@ -1040,5 +1039,5 @@ def test_ctor_invalid_input( parameters=parameters, storage_descriptor=storage_descriptor, ) - + assert "Pass" in str(e.value) diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 4e45f6fc6..d29702e0a 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -16,7 +16,7 @@ from google.cloud.bigquery.standard_sql import StandardSqlStructType from google.cloud.bigquery.schema import ( PolicyTagList, - ForeignTypeInfo, + # ForeignTypeInfo, StorageDescriptor, SerDeInfo, ) @@ -1122,6 +1122,7 @@ class TestForeignTypeInfo: @staticmethod def _get_target_class(): from google.cloud.bigquery.schema import ForeignTypeInfo + return ForeignTypeInfo def _make_one(self, *args, **kw): @@ -1137,12 +1138,12 @@ def _make_one(self, *args, **kw): ) def test_ctor_valid_input(self, type_system, expected): result = self._make_one(type_system=type_system) - - assert result._properties['typeSystem'] == expected + + assert result._properties["typeSystem"] == expected def test_ctor_invalid_input(self): with pytest.raises(TypeError) as e: - result = self._make_one(type_system=123) + self._make_one(type_system=123) assert "Pass" in str(e.value) @pytest.mark.parametrize( @@ -1176,10 +1177,17 @@ def _make_one(self, *args, **kwargs): (None, "gs://test/path/", None, None), (None, None, "testpath.to.OrcOutputFormat", None), (None, None, None, "TODO fix serde info"), - ("testpath.to.OrcInputFormat", "gs://test/path/", "testpath.to.OrcOutputFormat", "TODO fix serde info"), + ( + "testpath.to.OrcInputFormat", + "gs://test/path/", + "testpath.to.OrcOutputFormat", + "TODO fix serde info", + ), ], ) - def test_ctor_valid_input(self, input_format, location_uri, output_format, serde_info): + def test_ctor_valid_input( + self, input_format, location_uri, output_format, serde_info + ): storage_descriptor = self._make_one( input_format=input_format, location_uri=location_uri, @@ -1197,22 +1205,27 @@ def test_ctor_valid_input(self, input_format, location_uri, output_format, serde assert storage_descriptor.output_format == output_format assert storage_descriptor.serde_info == serde_info - @pytest.mark.parametrize( - "arg,value", + "input_format,location_uri,output_format,serde_info", [ - ("input_format", 123), - ("location", 123), - ("output_format", 123), - ("serde_info", 123), - ] + (123, None, None, None), + (None, 123, None, None), + (None, None, 123, None), + (None, None, None, 123), + ], ) - def test_ctor_invalid_input(self, arg, value): + def test_ctor_invalid_input( + self, input_format, location_uri, output_format, serde_info + ): with pytest.raises(TypeError) as e: - result = self._make_one(arg=value) + self._make_one( + input_format=input_format, + location_uri=location_uri, + output_format=output_format, + serde_info=serde_info, + ) assert "Pass" in str(e.value) - def test_to_api_repr(self): storage_descriptor = self._make_one( input_format="input_format", @@ -1229,10 +1242,6 @@ def test_to_api_repr(self): assert storage_descriptor.to_api_repr() == expected_repr - - - - class TestSerDeInfo: """Tests for the SerDeInfo class.""" @@ -1272,18 +1281,18 @@ def test_ctor_valid_input(self, serialization_library, name, parameters): [ (123, None, None), ("testpath.to.LazySimpleSerDe", 123, None), - ("testpath.to.LazySimpleSerDe", None, ['test', 'list']), + ("testpath.to.LazySimpleSerDe", None, ["test", "list"]), ("testpath.to.LazySimpleSerDe", None, 123), ], ) def test_ctor_invalid_input(self, serialization_library, name, parameters): with pytest.raises(TypeError) as e: - result = self._make_one( + self._make_one( serialization_library=serialization_library, name=name, parameters=parameters, ) - + assert "Pass" in str(e.value) def test_to_api_repr(self): @@ -1297,4 +1306,4 @@ def test_to_api_repr(self): "name": "serde_name", "parameters": {"key": "value"}, } - assert serde_info.to_api_repr() == expected_repr \ No newline at end of file + assert serde_info.to_api_repr() == expected_repr From defa38c1da7baa22d4b50aafb689a7c99b741a09 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Mon, 14 Oct 2024 15:44:09 -0400 Subject: [PATCH 21/28] Update google/cloud/bigquery/_helpers.py Co-authored-by: Suzy Mueller --- google/cloud/bigquery/_helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 97933a197..03dd1fd5e 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -1009,7 +1009,7 @@ def _verify_job_config_type(job_config, expected_type, param_name="job_config"): def _isinstance_or_raise( value: Any, - dtype: Union[Type, Tuple[Type, ...], None], + dtype: Union[Type, Tuple[Type, ...]], none_allowed: Optional[bool] = False, ) -> Any: """Determine whether a value type matches a given datatype or None. From 14d1bd884ed6b81d4e276a90f92ab46e004128cb Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Wed, 30 Oct 2024 14:10:10 +0000 Subject: [PATCH 22/28] Most recent round of tweaks and experiments --- google/cloud/bigquery/_helpers.py | 4 +- google/cloud/bigquery/dataset.py | 28 +++++--- google/cloud/bigquery/external_config.py | 18 ++++- google/cloud/bigquery/table.py | 22 +++--- tests/unit/Untitled-2.py | 0 tests/unit/test__helpers.py | 23 +++++- tests/unit/test_dataset.py | 49 +++++++++++++ tests/unit/test_external_config.py | 17 +++++ tests/unit/test_table.py | 92 ++++++++++++++++++++++++ 9 files changed, 232 insertions(+), 21 deletions(-) create mode 100644 tests/unit/Untitled-2.py diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 03dd1fd5e..d1d018f7b 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -1040,7 +1040,7 @@ def _isinstance_or_raise( raise TypeError(msg) -def _from_api_repr(obj, resource: dict): +def _from_api_repr(cls, resource: dict): """Factory: constructs an instance of the class (cls) given its API representation. @@ -1051,6 +1051,6 @@ def _from_api_repr(obj, resource: dict): Returns: An instance of the class initialized with data from 'resource'. """ - config = obj + config = cls config._properties = copy.deepcopy(resource) return config diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index 6db620eeb..01f508466 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -23,6 +23,7 @@ import google.cloud._helpers # type: ignore from google.cloud.bigquery import _helpers +from google.cloud.bigquery._helpers import _isinstance_or_raise, _get_sub_prop from google.cloud.bigquery.model import ModelReference from google.cloud.bigquery.routine import Routine, RoutineReference from google.cloud.bigquery.table import Table, TableReference @@ -946,17 +947,28 @@ def external_catalog_dataset_options(self): BigQuery catalog. Contains metadata of open source database, schema or namespace represented by the current dataset.""" - return self._properties.get("externalCatalogDatasetOptions") + prop = _helpers._get_sub_prop( + self._properties, ["externalCatalogDatasetOptions"] + ) + # self._PROPERTY_TO_API_FIELD["external_catalog_dataset_options"] + # ) + + if prop is not None: + prop = ExternalCatalogDatasetOptions().from_api_repr(prop) + print("DINOSAUR dataset.py prop: ", prop, type(prop)) + return prop + + # prop = self._get_sub_prop("destinationEncryptionConfiguration") + # if prop is not None: + # prop = EncryptionConfiguration.from_api_repr(prop) + # return prop @external_catalog_dataset_options.setter def external_catalog_dataset_options(self, value): - if not isinstance(value, ExternalCatalogDatasetOptions) and value is not None: - raise ValueError( - "external_catalog_dataset_options must be an " - "ExternalCatalogDatasetOptions object or None. " - f"Got {repr(value)}." - ) - self._properties["externalCatalogDatasetOptions"] = value.to_api_repr() + value = _isinstance_or_raise(value, ExternalCatalogDatasetOptions) + self._properties[ + self._PROPERTY_TO_API_FIELD["external_catalog_dataset_options"] + ] = value.to_api_repr() table = _get_table_reference diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index b799f8ea0..cf3daceef 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -1066,6 +1066,7 @@ def to_api_repr(self) -> dict: config = copy.deepcopy(self._properties) return config + @classmethod def from_api_repr(self, resource): return _from_api_repr(self, resource) @@ -1151,5 +1152,18 @@ def to_api_repr(self) -> dict: config = copy.deepcopy(self._properties) return config - def from_api_repr(self, resource): - return _from_api_repr(self, resource) + @classmethod + def from_api_repr(cls, resource: dict) -> "TODO": + """Factory: constructs an instance of the class (cls) + given its API representation. + + Args: + resource (Dict[str, Any]): + API representation of the object to be instantiated. + + Returns: + An instance of the class initialized with data from 'resource'. + """ + config = cls() + config._properties = copy.deepcopy(resource) + return config diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 0846e08dd..6b9c63408 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -59,6 +59,7 @@ import google.cloud._helpers # type: ignore from google.cloud.bigquery import _helpers +from google.cloud.bigquery._helpers import _isinstance_or_raise from google.cloud.bigquery import _pandas_helpers from google.cloud.bigquery import _versions_helpers from google.cloud.bigquery import exceptions as bq_exceptions @@ -407,6 +408,7 @@ class Table(_TableBase): "view_query": "view", "require_partition_filter": "requirePartitionFilter", "table_constraints": "tableConstraints", + "external_catalog_table_options": "externalCatalogTableOptions", } def __init__(self, table_ref, schema=None) -> None: @@ -1006,17 +1008,21 @@ def external_catalog_table_options(self): BigQuery catalog. Contains metadata of open source database, schema or namespace represented by the current dataset.""" - return self._properties.get("externalCatalogTableOptions") + prop = self._properties.get( + self._PROPERTY_TO_API_FIELD["external_catalog_table_options"] + ) + if prop is not None: + prop = ExternalCatalogTableOptions.from_api_repr(prop) + return prop @external_catalog_table_options.setter def external_catalog_table_options(self, value): - if not isinstance(value, ExternalCatalogTableOptions) and value is not None: - raise ValueError( - "external_catalog_table_options must be an " - "ExternalCatalogTableOptions object or None. " - f"Got {repr(value)}." - ) - self._properties["externalCatalogTableOptions"] = value.to_api_repr() + value = _isinstance_or_raise( + value, ExternalCatalogTableOptions, none_allowed=False + ) + self._properties[ + self._PROPERTY_TO_API_FIELD["external_catalog_table_options"] + ] = value.to_api_repr() @classmethod def from_string(cls, full_table_id: str) -> "Table": diff --git a/tests/unit/Untitled-2.py b/tests/unit/Untitled-2.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 276f1991d..5343f8efd 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -24,7 +24,10 @@ from unittest import mock import google.api_core -from google.cloud.bigquery._helpers import _isinstance_or_raise +from google.cloud.bigquery._helpers import ( + _isinstance_or_raise, + _from_api_repr, +) @pytest.mark.skipif( @@ -1696,3 +1699,21 @@ def test__invalid_isinstance_or_raise(self, value, dtype, none_allowed, expected result = _isinstance_or_raise(value, dtype, none_allowed=none_allowed) assert result == e + + +class _MockClass: + def __init__(self): + self._properties = {} + + +@pytest.fixture +def mock_class(): + return _MockClass + + +class Test__from_api_repr: + def test_from_api_repr(self, mock_class): + resource = {"foo": "bar", "baz": {"qux": 1}} + config = _from_api_repr(mock_class, resource) + assert config._properties == resource + assert config._properties is not resource diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index c0164bc73..0c32939c2 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -1014,6 +1014,55 @@ def test_from_string_legacy_string(self): with self.assertRaises(ValueError): cls.from_string("string-project:string_dataset") + API_REPR = { + "datasetReference": {"projectId": "project", "datasetId": "dataset-id"}, + "labels": {}, + "externalCatalogDatasetOptions": { + "defaultStorageLocationUri": "gs://test-bucket/test-path", + "parameters": {"key": "value"}, + }, + } + + def test_external_catalog_dataset_options_setter(self): + from google.cloud.bigquery.external_config import ExternalCatalogDatasetOptions + + dataset = self._make_one(self.DS_REF) + external_dataset_catalog_options = ExternalCatalogDatasetOptions( + default_storage_location_uri="gs://test-bucket/test-path", + parameters={"key": "value"}, + ) + + # test the setter + dataset.external_catalog_dataset_options = external_dataset_catalog_options + expected = self.API_REPR + result = dataset.to_api_repr() + assert result == expected + + def test_external_catalog_dataset_options_getter(self): + from google.cloud.bigquery.external_config import ExternalCatalogDatasetOptions + + dataset = self._make_one(self.DS_REF) + external_dataset_catalog_options = ExternalCatalogDatasetOptions( + default_storage_location_uri="gs://test-bucket/test-path", + parameters={"key": "value"}, + ) + dataset.external_catalog_dataset_options = external_dataset_catalog_options + print("DINOSAUR test_dataset.py dataset: ", dataset, type(dataset)) + expected = external_dataset_catalog_options + result = dataset.external_catalog_dataset_options + + assert result == expected + + def test_external_catalog_dataset_options_from_api_repr(self): + from google.cloud.bigquery.external_config import ExternalCatalogDatasetOptions + + resource = self.API_REPR + + dataset = self._make_one(self.DS_REF) + dataset = dataset.from_api_repr(resource) + result = dataset.external_catalog_dataset_options + assert result == resource["externalCatalogDatasetOptions"] + def test__build_resource_w_custom_field(self): dataset = self._make_one(self.DS_REF) dataset._properties["newAlphaProperty"] = "unreleased property" diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 6944eeaca..03b5da69f 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -955,6 +955,14 @@ def test_to_api_repr(self): assert resource["defaultStorageLocationUri"] == default_storage_location_uri assert resource["parameters"] == parameters + def test_from_api_repr(self): + instance = self._make_one() + resource = { + "defaultStorageLocationUri": "gs://test-bucket/test-path", + "parameters": {"key": "value"}, + } + instance.from_api_repr(resource) + class TestExternalCatalogTableOptions: @staticmethod @@ -1041,3 +1049,12 @@ def test_ctor_invalid_input( ) assert "Pass" in str(e.value) + + def test_from_api_repr(self): + instance = self._make_one() + resource = { + "connectionId": "connection123", + "parameters": {"key": "value"}, + "storageDescriptor": "placeholder", + } + instance.from_api_repr(resource) diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index d6febcfb1..2b5645173 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -5822,3 +5822,95 @@ def test_table_reference_to_bqstorage_v1_stable(table_path): for klass in (mut.TableReference, mut.Table, mut.TableListItem): got = klass.from_string(table_path).to_bqstorage() assert got == expected + + +@pytest.fixture(scope="class") +def external_catalog_table_options(): + from google.cloud.bigquery.external_config import ExternalCatalogTableOptions + + return ExternalCatalogTableOptions( + connection_id="connection123", + parameters={"key": "value"}, + storage_descriptor="placeholder", + ) + + +class TestExternalCatalogTableOptions: + PROJECT = "project_id" + DS_ID = "dataset_id" + TABLE_NAME = "table_name" + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.table import Table + + return Table + + @classmethod + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_external_catalog_table_options_getter( + self, external_catalog_table_options + ): + from google.cloud.bigquery.external_config import ExternalCatalogTableOptions + + # create objects for the test + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + # Confirm that external catalog table options have not been set + assert table.external_catalog_table_options is None + + # Add an ExternalCatalogTableOptions object to the table. + table._properties[ + "externalCatalogTableOptions" + ] = external_catalog_table_options + table_repr = table.to_api_repr() + + # Extract the ecto object. + ecto_output = table_repr["externalCatalogTableOptions"] + + # Confirm that external catalog table options are an + # ExternalCatalogTableOptions object + assert isinstance(ecto_output, ExternalCatalogTableOptions) + + expected = { + "connectionId": "connection123", + "parameters": {"key": "value"}, + "storageDescriptor": "placeholder", + } + result = ecto_output.to_api_repr() + + # Confirm that the api_repr of the ecto_output matches the inputs + assert result == expected + + def test_external_catalog_table_options_setter( + self, external_catalog_table_options + ): + # from google.cloud.bigquery.external_config import ExternalCatalogTableOptions + + # create objects for the test + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + # Add an ExternalCatalogTableOptions object to the table. + table.external_catalog_table_options = external_catalog_table_options + expected = { + "tableReference": { + "projectId": "project_id", + "datasetId": "dataset_id", + "tableId": "table_name", + }, + "labels": {}, + "externalCatalogTableOptions": { + "connectionId": "connection123", + "parameters": {"key": "value"}, + "storageDescriptor": "placeholder", + }, + } + # Confirm that the api_repr of the ecto_output matches the inputs + result = table.to_api_repr() + assert result == expected From 1b7ba09bdef499be401252c3feb723a1839254f1 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Fri, 1 Nov 2024 19:53:54 +0000 Subject: [PATCH 23/28] Updates from futures import annotation. --- google/cloud/bigquery/external_config.py | 2 +- google/cloud/bigquery/schema.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index cf3daceef..4a42bf2fe 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -18,7 +18,7 @@ Job.configuration.query.tableDefinitions. """ -from __future__ import absolute_import +from __future__ import absolute_import, annotations import base64 import copy diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index aa97f7572..d17e39d12 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -14,6 +14,8 @@ """Schemas for BigQuery tables / queries.""" +from __future__ import annotations + import collections import copy import enum From 79bbeb22f040df5bf0a6e9bfd0af3ba4bbb9ed64 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Mon, 4 Nov 2024 14:59:51 +0000 Subject: [PATCH 24/28] Updates from_api_repr() and external_config tests --- google/cloud/bigquery/external_config.py | 15 +++- tests/unit/test_external_config.py | 88 +++++++++++++----------- 2 files changed, 62 insertions(+), 41 deletions(-) diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 4a42bf2fe..e322737c7 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -1067,9 +1067,20 @@ def to_api_repr(self) -> dict: return config @classmethod - def from_api_repr(self, resource): - return _from_api_repr(self, resource) + def from_api_repr(cls, resource: dict) -> "TODO": + """Factory: constructs an instance of the class (cls) + given its API representation. + + Args: + resource (Dict[str, Any]): + API representation of the object to be instantiated. + Returns: + An instance of the class initialized with data from 'resource'. + """ + config = cls() + config._properties = copy.deepcopy(resource) + return config class ExternalCatalogTableOptions: """Metadata about open source compatible table. The fields contained in these diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 03b5da69f..285c62ae4 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -18,6 +18,10 @@ import unittest from google.cloud.bigquery import external_config +from google.cloud.bigquery.external_config import ( + ExternalCatalogDatasetOptions, + ExternalCatalogTableOptions +) from google.cloud.bigquery import schema import pytest @@ -905,41 +909,41 @@ def _get_target_class(): def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) - def test_ctor_defaults(self): - """Test ExternalCatalogDatasetOptions constructor with default values.""" - instance = self._make_one() - - assert instance._properties["defaultStorageLocationUri"] is None - assert instance._properties["parameters"] is None - def test_ctor_explicit( + @pytest.mark.parametrize( + "default_storage_location_uri,parameters", + [ + ("gs://test-bucket/test-path", {"key": "value"}), # set all params + ("gs://test-bucket/test-path", None), # set only one parameter at a time + (None, {"key": "value"}), + (None, None), # use default parameters + ], + ) + def test_ctor_initialization( self, + default_storage_location_uri, + parameters, ): """Test ExternalCatalogDatasetOptions constructor with explicit values.""" - - default_storage_location_uri = "gs://test-bucket/test-path" - parameters = {"key": "value"} - instance = self._make_one( default_storage_location_uri=default_storage_location_uri, parameters=parameters, ) - assert ( - instance._properties["defaultStorageLocationUri"] - == default_storage_location_uri - ) - assert instance._properties["parameters"] == parameters + assert instance._properties == { + "defaultStorageLocationUri": default_storage_location_uri, + "parameters": parameters, + } def test_ctor_invalid_input(self): """Test ExternalCatalogDatasetOptions constructor with invalid input.""" with pytest.raises(TypeError) as e: self._make_one(default_storage_location_uri=123) - assert "Pass" in str(e.value) + assert "Pass " in str(e.value) with pytest.raises(TypeError) as e: self._make_one(parameters=123) - assert "Pass" in str(e.value) + assert "Pass " in str(e.value) def test_to_api_repr(self): """Test ExternalCatalogDatasetOptions.to_api_repr method.""" @@ -961,14 +965,15 @@ def test_from_api_repr(self): "defaultStorageLocationUri": "gs://test-bucket/test-path", "parameters": {"key": "value"}, } - instance.from_api_repr(resource) + result = instance.from_api_repr(resource) + + assert isinstance(result, ExternalCatalogDatasetOptions) + assert result._properties == resource class TestExternalCatalogTableOptions: @staticmethod def _get_target_class(): - from google.cloud.bigquery.external_config import ExternalCatalogTableOptions - return ExternalCatalogTableOptions def _make_one(self, *args, **kw): @@ -996,22 +1001,6 @@ def test_ctor_initialization(self, connection_id, parameters, storage_descriptor "storageDescriptor": storage_descriptor, } - def test_to_api_repr(self): - instance = self._make_one() - instance._properties = { - "connectionId": "connection123", - "parameters": {"key": "value"}, - "storageDescriptor": "placeholder", - } - - resource = instance.to_api_repr() - - assert resource == { - "connectionId": "connection123", - "parameters": {"key": "value"}, - "storageDescriptor": "placeholder", - } - @pytest.mark.parametrize( "connection_id, parameters, storage_descriptor", [ @@ -1048,7 +1037,24 @@ def test_ctor_invalid_input( storage_descriptor=storage_descriptor, ) - assert "Pass" in str(e.value) + assert "Pass " in str(e.value) + + def test_to_api_repr(self): + instance = self._make_one() + instance._properties = { + "connectionId": "connection123", + "parameters": {"key": "value"}, + "storageDescriptor": "placeholder", + } + + resource = instance.to_api_repr() + + assert resource == { + "connectionId": "connection123", + "parameters": {"key": "value"}, + "storageDescriptor": "placeholder", + } + def test_from_api_repr(self): instance = self._make_one() @@ -1057,4 +1063,8 @@ def test_from_api_repr(self): "parameters": {"key": "value"}, "storageDescriptor": "placeholder", } - instance.from_api_repr(resource) + result = instance.from_api_repr(resource) + + assert isinstance(result, ExternalCatalogTableOptions) + assert result._properties == resource + From d71d9045e43aefceadc1870a7a1934a3d743a7c0 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Mon, 4 Nov 2024 16:32:40 +0000 Subject: [PATCH 25/28] Updates external_catalog_dataset functions in dataset.py and tests. --- google/cloud/bigquery/dataset.py | 10 +--------- tests/unit/test_dataset.py | 26 ++++++++++++-------------- 2 files changed, 13 insertions(+), 23 deletions(-) diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index 01f508466..686b00a72 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -950,18 +950,11 @@ def external_catalog_dataset_options(self): prop = _helpers._get_sub_prop( self._properties, ["externalCatalogDatasetOptions"] ) - # self._PROPERTY_TO_API_FIELD["external_catalog_dataset_options"] - # ) if prop is not None: prop = ExternalCatalogDatasetOptions().from_api_repr(prop) - print("DINOSAUR dataset.py prop: ", prop, type(prop)) return prop - # prop = self._get_sub_prop("destinationEncryptionConfiguration") - # if prop is not None: - # prop = EncryptionConfiguration.from_api_repr(prop) - # return prop @external_catalog_dataset_options.setter def external_catalog_dataset_options(self, value): @@ -970,10 +963,9 @@ def external_catalog_dataset_options(self, value): self._PROPERTY_TO_API_FIELD["external_catalog_dataset_options"] ] = value.to_api_repr() - table = _get_table_reference + table = _get_table_reference model = _get_model_reference - routine = _get_routine_reference def __repr__(self): diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index 0c32939c2..0ed00db73 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -1027,13 +1027,12 @@ def test_external_catalog_dataset_options_setter(self): from google.cloud.bigquery.external_config import ExternalCatalogDatasetOptions dataset = self._make_one(self.DS_REF) - external_dataset_catalog_options = ExternalCatalogDatasetOptions( + ecdo_obj = ExternalCatalogDatasetOptions( default_storage_location_uri="gs://test-bucket/test-path", parameters={"key": "value"}, ) - # test the setter - dataset.external_catalog_dataset_options = external_dataset_catalog_options + dataset.external_catalog_dataset_options = ecdo_obj expected = self.API_REPR result = dataset.to_api_repr() assert result == expected @@ -1042,26 +1041,25 @@ def test_external_catalog_dataset_options_getter(self): from google.cloud.bigquery.external_config import ExternalCatalogDatasetOptions dataset = self._make_one(self.DS_REF) - external_dataset_catalog_options = ExternalCatalogDatasetOptions( + ecdo_obj = ExternalCatalogDatasetOptions( default_storage_location_uri="gs://test-bucket/test-path", parameters={"key": "value"}, ) - dataset.external_catalog_dataset_options = external_dataset_catalog_options - print("DINOSAUR test_dataset.py dataset: ", dataset, type(dataset)) - expected = external_dataset_catalog_options - result = dataset.external_catalog_dataset_options - + dataset.external_catalog_dataset_options = ecdo_obj + expected = ecdo_obj._properties + result = dataset.external_catalog_dataset_options._properties + assert result == expected def test_external_catalog_dataset_options_from_api_repr(self): from google.cloud.bigquery.external_config import ExternalCatalogDatasetOptions resource = self.API_REPR - - dataset = self._make_one(self.DS_REF) - dataset = dataset.from_api_repr(resource) - result = dataset.external_catalog_dataset_options - assert result == resource["externalCatalogDatasetOptions"] + klass = self._get_target_class() + dataset = klass.from_api_repr(resource) + result = dataset.external_catalog_dataset_options._properties + expected = resource["externalCatalogDatasetOptions"] + assert result == expected def test__build_resource_w_custom_field(self): dataset = self._make_one(self.DS_REF) From b0a7fb11b4c5b6423427a67af6694741877520cf Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Wed, 6 Nov 2024 16:29:53 +0000 Subject: [PATCH 26/28] Adds fixtures, tests, corrections to classes and tests --- google/cloud/bigquery/dataset.py | 8 +-- google/cloud/bigquery/external_config.py | 26 ++++--- google/cloud/bigquery/schema.py | 57 +++++++++++---- tests/unit/test_dataset.py | 4 +- tests/unit/test_external_config.py | 88 +++++++++++++++++------- tests/unit/test_schema.py | 33 ++++----- tests/unit/test_table.py | 44 +++++++++--- 7 files changed, 181 insertions(+), 79 deletions(-) diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index 686b00a72..c790c74e2 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -23,7 +23,7 @@ import google.cloud._helpers # type: ignore from google.cloud.bigquery import _helpers -from google.cloud.bigquery._helpers import _isinstance_or_raise, _get_sub_prop +from google.cloud.bigquery._helpers import _isinstance_or_raise from google.cloud.bigquery.model import ModelReference from google.cloud.bigquery.routine import Routine, RoutineReference from google.cloud.bigquery.table import Table, TableReference @@ -955,15 +955,15 @@ def external_catalog_dataset_options(self): prop = ExternalCatalogDatasetOptions().from_api_repr(prop) return prop - @external_catalog_dataset_options.setter def external_catalog_dataset_options(self, value): - value = _isinstance_or_raise(value, ExternalCatalogDatasetOptions) + value = _isinstance_or_raise( + value, ExternalCatalogDatasetOptions, none_allowed=True + ) self._properties[ self._PROPERTY_TO_API_FIELD["external_catalog_dataset_options"] ] = value.to_api_repr() - table = _get_table_reference model = _get_model_reference routine = _get_routine_reference diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index e322737c7..59b715995 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -30,10 +30,10 @@ _int_or_none, _str_or_none, _isinstance_or_raise, - _from_api_repr, + _get_sub_prop, ) from google.cloud.bigquery.format_options import AvroOptions, ParquetOptions -from google.cloud.bigquery.schema import SchemaField +from google.cloud.bigquery.schema import SchemaField, StorageDescriptor class ExternalSourceFormat(object): @@ -1067,7 +1067,7 @@ def to_api_repr(self) -> dict: return config @classmethod - def from_api_repr(cls, resource: dict) -> "TODO": + def from_api_repr(cls, resource: dict) -> ExternalCatalogDatasetOptions: """Factory: constructs an instance of the class (cls) given its API representation. @@ -1082,6 +1082,7 @@ def from_api_repr(cls, resource: dict) -> "TODO": config._properties = copy.deepcopy(resource) return config + class ExternalCatalogTableOptions: """Metadata about open source compatible table. The fields contained in these options correspond to hive metastore's table level properties. @@ -1104,7 +1105,7 @@ def __init__( connection_id: Optional[str] = None, parameters: Union[Dict[str, Any], None] = None, storage_descriptor: Optional[ - str + StorageDescriptor ] = None, # TODO implement StorageDescriptor, then correct this type hint ): self._properties = {} # type: Dict[str, Any] @@ -1146,12 +1147,19 @@ def storage_descriptor(self) -> Any: """Optional. A storage descriptor containing information about the physical storage of this table.""" - return self._properties.get("storageDescriptor") + prop = _get_sub_prop(self._properties, ["storageDescriptor"]) + + if prop is not None: + prop = StorageDescriptor().from_api_repr(prop) + return prop @storage_descriptor.setter - def storage_descriptor(self, value: Optional[str]): - value = _isinstance_or_raise(value, str, none_allowed=True) - self._properties["storageDescriptor"] = value + def storage_descriptor(self, value): + value = _isinstance_or_raise(value, StorageDescriptor, none_allowed=True) + if value is not None: + self._properties["storageDescriptor"] = value.to_api_repr() + else: + self._properties["storageDescriptor"] = value def to_api_repr(self) -> dict: """Build an API representation of this object. @@ -1164,7 +1172,7 @@ def to_api_repr(self) -> dict: return config @classmethod - def from_api_repr(cls, resource: dict) -> "TODO": + def from_api_repr(cls, resource: dict) -> ExternalCatalogTableOptions: """Factory: constructs an instance of the class (cls) given its API representation. diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index d17e39d12..dc6241214 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -25,6 +25,7 @@ from google.cloud.bigquery._helpers import ( _isinstance_or_raise, _from_api_repr, + _get_sub_prop, ) from google.cloud.bigquery.enums import StandardSqlTypeNames @@ -715,7 +716,7 @@ def __init__( input_format: Optional[str] = None, location_uri: Optional[str] = None, output_format: Optional[str] = None, - serde_info: Optional[Any] = None, + serde_info: Optional[SerDeInfo] = None, ): self._properties = {} self.input_format = input_format @@ -766,14 +767,20 @@ def output_format(self, value: Optional[str]): def serde_info(self) -> Any: """Optional. Serializer and deserializer information.""" - return self._properties.get("serdeInfo") + prop = _get_sub_prop(self._properties, ["serDeInfo"]) + print(f"DINOSAUR in SD: {prop}\n\n{self._properties}") + if prop is not None: + prop = SerDeInfo().from_api_repr(prop) + + return prop @serde_info.setter - def serde_info(self, value: Optional[Any]): - value = _isinstance_or_raise( - value, str, none_allowed=True - ) # TODO fix, when serde class is done - self._properties["serdeInfo"] = value + def serde_info(self, value): + value = _isinstance_or_raise(value, SerDeInfo, none_allowed=True) + if value is not None: + self._properties["serDeInfo"] = value.to_api_repr() + else: + self._properties["serDeInfo"] = value def to_api_repr(self) -> dict: """Build an API representation of this object. @@ -784,8 +791,21 @@ def to_api_repr(self) -> dict: """ return copy.deepcopy(self._properties) - def from_api_repr(self, resource): - return _from_api_repr(self, resource) + @classmethod + def from_api_repr(cls, resource: dict) -> StorageDescriptor: + """Factory: constructs an instance of the class (cls) + given its API representation. + + Args: + resource (Dict[str, Any]): + API representation of the object to be instantiated. + + Returns: + An instance of the class initialized with data from 'resource'. + """ + config = cls() + config._properties = copy.deepcopy(resource) + return config class SerDeInfo: @@ -825,7 +845,7 @@ def serialization_library(self) -> Any: @serialization_library.setter def serialization_library(self, value: str): - value = _isinstance_or_raise(value, str) + value = _isinstance_or_raise(value, str, none_allowed=False) self._properties["serializationLibrary"] = value @property @@ -860,5 +880,18 @@ def to_api_repr(self) -> dict: """ return copy.deepcopy(self._properties) - def from_api_repr(self, resource): - return _from_api_repr(self, resource) + @classmethod + def from_api_repr(cls, resource: dict) -> SerDeInfo: + """Factory: constructs an instance of the class (cls) + given its API representation. + + Args: + resource (Dict[str, Any]): + API representation of the object to be instantiated. + + Returns: + An instance of the class initialized with data from 'resource'. + """ + config = cls() + config._properties = copy.deepcopy(resource) + return config diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index 0ed00db73..09652b880 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -1048,12 +1048,10 @@ def test_external_catalog_dataset_options_getter(self): dataset.external_catalog_dataset_options = ecdo_obj expected = ecdo_obj._properties result = dataset.external_catalog_dataset_options._properties - + assert result == expected def test_external_catalog_dataset_options_from_api_repr(self): - from google.cloud.bigquery.external_config import ExternalCatalogDatasetOptions - resource = self.API_REPR klass = self._get_target_class() dataset = klass.from_api_repr(resource) diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 285c62ae4..4e5e285eb 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -14,15 +14,16 @@ import base64 import copy -from typing import Any, Dict +from typing import Any, Dict, Optional import unittest from google.cloud.bigquery import external_config from google.cloud.bigquery.external_config import ( ExternalCatalogDatasetOptions, - ExternalCatalogTableOptions + ExternalCatalogTableOptions, ) from google.cloud.bigquery import schema +from google.cloud.bigquery.schema import StorageDescriptor, SerDeInfo import pytest @@ -909,7 +910,6 @@ def _get_target_class(): def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) - @pytest.mark.parametrize( "default_storage_location_uri,parameters", [ @@ -971,6 +971,23 @@ def test_from_api_repr(self): assert result._properties == resource +@pytest.fixture +def _make_storage_descriptor(): + serdeinfo = SerDeInfo( + serialization_library="testpath.to.LazySimpleSerDe", + name="serde_lib_name", + parameters={"key": "value"}, + ) + + obj = StorageDescriptor( + input_format="testpath.to.OrcInputFormat", + location_uri="gs://test/path/", + output_format="testpath.to.OrcOutputFormat", + serde_info=serdeinfo, + ) + return obj + + class TestExternalCatalogTableOptions: @staticmethod def _get_target_class(): @@ -982,24 +999,38 @@ def _make_one(self, *args, **kw): @pytest.mark.parametrize( "connection_id,parameters,storage_descriptor", [ - ("connection123", {"key": "value"}, "placeholder"), # set all params + ( + "connection123", + {"key": "value"}, + "_make_storage_descriptor", + ), # set all params ("connection123", None, None), # set only one parameter at a time (None, {"key": "value"}, None), - (None, None, "placeholder"), + (None, None, "_make_storage_descriptor"), (None, None, None), # use default parameters ], ) - def test_ctor_initialization(self, connection_id, parameters, storage_descriptor): + def test_ctor_initialization( + self, connection_id, parameters, storage_descriptor, request + ): + if storage_descriptor == "_make_storage_descriptor": + storage_descriptor = request.getfixturevalue(storage_descriptor) + instance = self._make_one( connection_id=connection_id, parameters=parameters, storage_descriptor=storage_descriptor, ) - assert instance._properties == { - "connectionId": connection_id, - "parameters": parameters, - "storageDescriptor": storage_descriptor, - } + + assert instance._properties["connectionId"] == connection_id + assert instance._properties["parameters"] == parameters + if storage_descriptor is not None: + assert ( + instance._properties["storageDescriptor"] + == storage_descriptor.to_api_repr() + ) + else: + assert instance._properties["storageDescriptor"] == storage_descriptor @pytest.mark.parametrize( "connection_id, parameters, storage_descriptor", @@ -1007,13 +1038,13 @@ def test_ctor_initialization(self, connection_id, parameters, storage_descriptor pytest.param( 123, {"test_key": "test_value"}, - "test placeholder", + "_make_storage_descriptor", id="connection_id-invalid-type", ), pytest.param( "connection123", 123, - "test placeholder", + "_make_storage_descriptor", id="parameters-invalid-type", ), pytest.param( @@ -1028,8 +1059,11 @@ def test_ctor_invalid_input( self, connection_id: str, parameters: Dict[str, Any], - storage_descriptor: str, + storage_descriptor: Optional[StorageDescriptor], + request, ): + if storage_descriptor == "_make_storage_descriptor": + storage_descriptor = request.getfixturevalue(storage_descriptor) with pytest.raises(TypeError) as e: external_config.ExternalCatalogTableOptions( connection_id=connection_id, @@ -1039,32 +1073,36 @@ def test_ctor_invalid_input( assert "Pass " in str(e.value) - def test_to_api_repr(self): + def test_to_api_repr(self, _make_storage_descriptor): instance = self._make_one() + instance._properties = { "connectionId": "connection123", "parameters": {"key": "value"}, - "storageDescriptor": "placeholder", + "storageDescriptor": _make_storage_descriptor.to_api_repr(), } resource = instance.to_api_repr() - - assert resource == { + expected = { "connectionId": "connection123", "parameters": {"key": "value"}, - "storageDescriptor": "placeholder", + "storageDescriptor": _make_storage_descriptor.to_api_repr(), } + assert resource == expected - - def test_from_api_repr(self): + def test_from_api_repr(self, _make_storage_descriptor): instance = self._make_one() + storage_descriptor = _make_storage_descriptor resource = { "connectionId": "connection123", "parameters": {"key": "value"}, - "storageDescriptor": "placeholder", + "storageDescriptor": storage_descriptor, } result = instance.from_api_repr(resource) - assert isinstance(result, ExternalCatalogTableOptions) - assert result._properties == resource - + assert result._properties["connectionId"] == "connection123" + assert result._properties["parameters"] == {"key": "value"} + assert ( + result._properties["storageDescriptor"].to_api_repr() + == storage_descriptor.to_api_repr() + ) diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index d29702e0a..fa5cf3fa4 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -1169,6 +1169,12 @@ def _get_target_class(): def _make_one(self, *args, **kwargs): return self._get_target_class()(*args, **kwargs) + SERDEINFO = SerDeInfo( + serialization_library="testpath.to.LazySimpleSerDe", + name="serde_lib_name", + parameters={"key": "value"}, + ) + @pytest.mark.parametrize( "input_format,location_uri,output_format,serde_info", [ @@ -1176,12 +1182,12 @@ def _make_one(self, *args, **kwargs): ("testpath.to.OrcInputFormat", None, None, None), (None, "gs://test/path/", None, None), (None, None, "testpath.to.OrcOutputFormat", None), - (None, None, None, "TODO fix serde info"), + (None, None, None, SERDEINFO), ( "testpath.to.OrcInputFormat", "gs://test/path/", "testpath.to.OrcOutputFormat", - "TODO fix serde info", + SERDEINFO, ), ], ) @@ -1197,13 +1203,12 @@ def test_ctor_valid_input( assert storage_descriptor._properties["inputFormat"] == input_format assert storage_descriptor._properties["locationUri"] == location_uri assert storage_descriptor._properties["outputFormat"] == output_format - assert storage_descriptor._properties["serdeInfo"] == serde_info - # QUESTION: which makes more sense? check against the background dict - # OR check against the getter attribute? - assert storage_descriptor.input_format == input_format - assert storage_descriptor.location_uri == location_uri - assert storage_descriptor.output_format == output_format - assert storage_descriptor.serde_info == serde_info + if serde_info is not None: + assert ( + storage_descriptor._properties["serDeInfo"] == serde_info.to_api_repr() + ) + else: + assert storage_descriptor._properties["serDeInfo"] == serde_info @pytest.mark.parametrize( "input_format,location_uri,output_format,serde_info", @@ -1231,14 +1236,15 @@ def test_to_api_repr(self): input_format="input_format", location_uri="location_uri", output_format="output_format", - serde_info="TODO fix serde info", + serde_info=self.SERDEINFO, ) expected_repr = { "inputFormat": "input_format", "locationUri": "location_uri", "outputFormat": "output_format", - "serdeInfo": "TODO fix serde info", + "serDeInfo": self.SERDEINFO.to_api_repr(), } + assert storage_descriptor.to_api_repr() == expected_repr @@ -1270,11 +1276,6 @@ def test_ctor_valid_input(self, serialization_library, name, parameters): assert serde_info._properties["serializationLibrary"] == serialization_library assert serde_info._properties["name"] == name assert serde_info._properties["parameters"] == parameters - # QUESTION: which makes more sense? check against the background dict - # OR check against the getter attribute? - assert serde_info.serialization_library == serialization_library - assert serde_info.name == name - assert serde_info.parameters == parameters @pytest.mark.parametrize( "serialization_library,name,parameters", diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 2b5645173..dafeb9aa0 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -31,6 +31,7 @@ from google.cloud.bigquery import exceptions from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.schema import SerDeInfo, StorageDescriptor def _mock_client(): @@ -2011,7 +2012,7 @@ def _make_one( path=None, schema=None, table=None, - **kwargs + **kwargs, ): from google.cloud.bigquery.table import TableReference @@ -5824,14 +5825,31 @@ def test_table_reference_to_bqstorage_v1_stable(table_path): assert got == expected -@pytest.fixture(scope="class") -def external_catalog_table_options(): +@pytest.fixture +def _make_storage_descriptor(): + serdeinfo = SerDeInfo( + serialization_library="testpath.to.LazySimpleSerDe", + name="serde_lib_name", + parameters={"key": "value"}, + ) + + obj = StorageDescriptor( + input_format="testpath.to.OrcInputFormat", + location_uri="gs://test/path/", + output_format="testpath.to.OrcOutputFormat", + serde_info=serdeinfo, + ) + return obj + + +@pytest.fixture() +def external_catalog_table_options(_make_storage_descriptor): from google.cloud.bigquery.external_config import ExternalCatalogTableOptions return ExternalCatalogTableOptions( connection_id="connection123", parameters={"key": "value"}, - storage_descriptor="placeholder", + storage_descriptor=_make_storage_descriptor, ) @@ -5851,7 +5869,10 @@ def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def test_external_catalog_table_options_getter( - self, external_catalog_table_options + self, + external_catalog_table_options, + _make_storage_descriptor, + request, ): from google.cloud.bigquery.external_config import ExternalCatalogTableOptions @@ -5876,21 +5897,24 @@ def test_external_catalog_table_options_getter( # ExternalCatalogTableOptions object assert isinstance(ecto_output, ExternalCatalogTableOptions) + storage_descriptor = request.getfixturevalue("_make_storage_descriptor") + expected = { "connectionId": "connection123", "parameters": {"key": "value"}, - "storageDescriptor": "placeholder", + "storageDescriptor": storage_descriptor.to_api_repr(), } result = ecto_output.to_api_repr() # Confirm that the api_repr of the ecto_output matches the inputs + print(f"DINOSAUR : {result}\n\n{expected}") assert result == expected def test_external_catalog_table_options_setter( - self, external_catalog_table_options + self, + external_catalog_table_options, + _make_storage_descriptor, ): - # from google.cloud.bigquery.external_config import ExternalCatalogTableOptions - # create objects for the test dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) @@ -5908,7 +5932,7 @@ def test_external_catalog_table_options_setter( "externalCatalogTableOptions": { "connectionId": "connection123", "parameters": {"key": "value"}, - "storageDescriptor": "placeholder", + "storageDescriptor": _make_storage_descriptor.to_api_repr(), }, } # Confirm that the api_repr of the ecto_output matches the inputs From d0d96fa2bcf364278758461563302004a5bed579 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Wed, 6 Nov 2024 19:14:51 +0000 Subject: [PATCH 27/28] Updates comments and addes a to_api_repr test --- tests/unit/test_dataset.py | 10 ++++++++++ tests/unit/test_external_config.py | 4 ++++ tests/unit/test_schema.py | 15 +++++++++++---- 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index 09652b880..228122a52 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -1052,6 +1052,14 @@ def test_external_catalog_dataset_options_getter(self): assert result == expected def test_external_catalog_dataset_options_from_api_repr(self): + resource = self.API_REPR + klass = self._get_target_class() + dataset = klass.from_api_repr(resource) + result = dataset.external_catalog_dataset_options.to_api_repr() + expected = resource["externalCatalogDatasetOptions"] + assert result == expected + + def test_external_catalog_dataset_options_to_api_repr(self): resource = self.API_REPR klass = self._get_target_class() dataset = klass.from_api_repr(resource) @@ -1059,6 +1067,8 @@ def test_external_catalog_dataset_options_from_api_repr(self): expected = resource["externalCatalogDatasetOptions"] assert result == expected + + def test__build_resource_w_custom_field(self): dataset = self._make_one(self.DS_REF) dataset._properties["newAlphaProperty"] = "unreleased property" diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 4e5e285eb..22c5e4427 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -939,10 +939,13 @@ def test_ctor_invalid_input(self): """Test ExternalCatalogDatasetOptions constructor with invalid input.""" with pytest.raises(TypeError) as e: self._make_one(default_storage_location_uri=123) + + # Looking for the first word from the string "Pass as..." assert "Pass " in str(e.value) with pytest.raises(TypeError) as e: self._make_one(parameters=123) + # Looking for the first word from the string "Pass as..." assert "Pass " in str(e.value) def test_to_api_repr(self): @@ -1071,6 +1074,7 @@ def test_ctor_invalid_input( storage_descriptor=storage_descriptor, ) + # Looking for the first word from the string "Pass as..." assert "Pass " in str(e.value) def test_to_api_repr(self, _make_storage_descriptor): diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index fa5cf3fa4..e299341af 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -1144,7 +1144,9 @@ def test_ctor_valid_input(self, type_system, expected): def test_ctor_invalid_input(self): with pytest.raises(TypeError) as e: self._make_one(type_system=123) - assert "Pass" in str(e.value) + + # Looking for the first word from the string "Pass as..." + assert "Pass " in str(e.value) @pytest.mark.parametrize( "type_system,expected", @@ -1229,7 +1231,9 @@ def test_ctor_invalid_input( output_format=output_format, serde_info=serde_info, ) - assert "Pass" in str(e.value) + + # Looking for the first word from the string "Pass as..." + assert "Pass " in str(e.value) def test_to_api_repr(self): storage_descriptor = self._make_one( @@ -1246,6 +1250,7 @@ def test_to_api_repr(self): } assert storage_descriptor.to_api_repr() == expected_repr + # TODO: needs a from_api_repr() test. class TestSerDeInfo: @@ -1293,8 +1298,8 @@ def test_ctor_invalid_input(self, serialization_library, name, parameters): name=name, parameters=parameters, ) - - assert "Pass" in str(e.value) + # Looking for the first word from the string "Pass as..." + assert "Pass " in str(e.value) def test_to_api_repr(self): serde_info = self._make_one( @@ -1308,3 +1313,5 @@ def test_to_api_repr(self): "parameters": {"key": "value"}, } assert serde_info.to_api_repr() == expected_repr + + # TODO: needs a from_api_repr() test. \ No newline at end of file From 116de788e03c3dbe26f50545d14e5ebe529d4973 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Wed, 13 Nov 2024 10:03:19 +0000 Subject: [PATCH 28/28] Revises test for additional clarity --- tests/unit/test_dataset.py | 34 +++++++++++++++++++--------------- tests/unit/test_schema.py | 3 ++- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index 228122a52..24f1a9c87 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -650,6 +650,16 @@ class TestDataset(unittest.TestCase): DS_ID = "dataset-id" DS_REF = DatasetReference(PROJECT, DS_ID) KMS_KEY_NAME = "projects/1/locations/us/keyRings/1/cryptoKeys/1" + DEFAULT_STORAGE_LOCATION_URI = "gs://test-bucket/test-path" + PARAMETERS = {"key": "value"} + API_REPR = { + "datasetReference": {"projectId": "project", "datasetId": "dataset-id"}, + "labels": {}, + "externalCatalogDatasetOptions": { + "defaultStorageLocationUri": DEFAULT_STORAGE_LOCATION_URI, + "parameters": PARAMETERS, + }, + } @staticmethod def _get_target_class(): @@ -1014,27 +1024,23 @@ def test_from_string_legacy_string(self): with self.assertRaises(ValueError): cls.from_string("string-project:string_dataset") - API_REPR = { - "datasetReference": {"projectId": "project", "datasetId": "dataset-id"}, - "labels": {}, - "externalCatalogDatasetOptions": { - "defaultStorageLocationUri": "gs://test-bucket/test-path", - "parameters": {"key": "value"}, - }, - } - def test_external_catalog_dataset_options_setter(self): from google.cloud.bigquery.external_config import ExternalCatalogDatasetOptions dataset = self._make_one(self.DS_REF) + + # GIVEN the parameters DEFAULT_STORAGE_LOCATION_URI and PARAMETERS + # WHEN an ExternalCatalogDatasetOptions obj is created + # and added to a dataset. ecdo_obj = ExternalCatalogDatasetOptions( - default_storage_location_uri="gs://test-bucket/test-path", - parameters={"key": "value"}, + default_storage_location_uri=self.DEFAULT_STORAGE_LOCATION_URI, + parameters=self.PARAMETERS, ) - dataset.external_catalog_dataset_options = ecdo_obj - expected = self.API_REPR + + # THEN the api representation of the dataset will match API_REPR result = dataset.to_api_repr() + expected = self.API_REPR assert result == expected def test_external_catalog_dataset_options_getter(self): @@ -1067,8 +1073,6 @@ def test_external_catalog_dataset_options_to_api_repr(self): expected = resource["externalCatalogDatasetOptions"] assert result == expected - - def test__build_resource_w_custom_field(self): dataset = self._make_one(self.DS_REF) dataset._properties["newAlphaProperty"] = "unreleased property" diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index e299341af..ee0538b3f 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -1250,6 +1250,7 @@ def test_to_api_repr(self): } assert storage_descriptor.to_api_repr() == expected_repr + # TODO: needs a from_api_repr() test. @@ -1314,4 +1315,4 @@ def test_to_api_repr(self): } assert serde_info.to_api_repr() == expected_repr - # TODO: needs a from_api_repr() test. \ No newline at end of file + # TODO: needs a from_api_repr() test.