diff --git a/README.md b/README.md index 6acee1f6..1074aabd 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![codecov](https://codecov.io/gh/mitchelllisle/sparkdantic/graph/badge.svg?token=O6PPQX4FEX)](https://codecov.io/gh/mitchelllisle/sparkdantic) [![PyPI version](https://badge.fury.io/py/sparkdantic.svg)](https://badge.fury.io/py/sparkdantic) -> 1️⃣ version: 2.6.0 +> 1️⃣ version: 2.6.1 > ✍️ author: Mitchell Lisle diff --git a/pyproject.toml b/pyproject.toml index 039cb5c9..fadd356e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "sparkdantic" -version = "2.6.0" +version = "2.6.1" description = "A pydantic -> spark schema library" authors = ["Mitchell Lisle "] readme = "README.md" diff --git a/setup.cfg b/setup.cfg index fed74a56..a789ac79 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 2.6.0 +current_version = 2.6.1 commit = True tag = False diff --git a/src/sparkdantic/__init__.py b/src/sparkdantic/__init__.py index 99fd24a9..49436f6b 100644 --- a/src/sparkdantic/__init__.py +++ b/src/sparkdantic/__init__.py @@ -1,4 +1,4 @@ -__version__ = '2.6.0' +__version__ = '2.6.1' __author__ = 'Mitchell Lisle' __email__ = 'm.lisle90@gmail.com' diff --git a/src/sparkdantic/model.py b/src/sparkdantic/model.py index d826b5e2..fe4368aa 100644 --- a/src/sparkdantic/model.py +++ b/src/sparkdantic/model.py @@ -206,9 +206,6 @@ def create_json_spark_schema( annotation_or_return_type = _get_annotation_or_return_type(info) field_type = _get_union_type_arg(annotation_or_return_type) - description = getattr(info, 'description', None) - comment = {'comment': description} if description else {} - spark_type: Union[str, Dict[str, Any]] try: @@ -248,7 +245,7 @@ def create_json_spark_schema( 'name': name, 'type': spark_type, 'nullable': nullable, - 'metadata': comment, + 'metadata': _json_field_metadata(info), } fields.append(struct_field) return { @@ -627,3 +624,14 @@ def json_schema_to_ddl(json_schema: Dict[str, Any]) -> str: field_ddls.append(field_ddl) return ','.join(field_ddls) + + +def _json_field_metadata(info: ComputedFieldInfo) -> dict[str, str]: + description = getattr(info, 'description', None) + metadata = {'comment': description} if description else {} + + examples = getattr(info, 'examples', None) + if examples: + metadata['examples'] = examples + + return metadata diff --git a/tests/test_field_descriptions.py b/tests/test_field_descriptions.py deleted file mode 100644 index f6f0cf82..00000000 --- a/tests/test_field_descriptions.py +++ /dev/null @@ -1,53 +0,0 @@ -from pyspark.sql.types import StringType, StructField, StructType - -from sparkdantic import SparkField, SparkModel - - -class DescriptionModel(SparkModel): - field_with_description: str = SparkField(description='This is a test description.') - field_without_description: str = SparkField() - - -def test_spark_schema_contains_field_descriptions(): - expected_schema = StructType( - [ - StructField( - 'field_with_description', - StringType(), - False, - metadata={'comment': 'This is a test description.'}, - ), - StructField( - 'field_without_description', - StringType(), - False, - metadata={}, - ), - ] - ) - - actual_schema = DescriptionModel.model_spark_schema() - assert actual_schema == expected_schema - - -def test_spark_schema_json_contains_field_descriptions(): - expected_json_schema = { - 'type': 'struct', - 'fields': [ - { - 'name': 'field_with_description', - 'type': 'string', - 'nullable': False, - 'metadata': {'comment': 'This is a test description.'}, - }, - { - 'name': 'field_without_description', - 'type': 'string', - 'nullable': False, - 'metadata': {}, - }, - ], - } - - actual_json_schema = DescriptionModel.model_json_spark_schema() - assert actual_json_schema == expected_json_schema diff --git a/tests/test_field_metadata.py b/tests/test_field_metadata.py new file mode 100644 index 00000000..64d0d06c --- /dev/null +++ b/tests/test_field_metadata.py @@ -0,0 +1,88 @@ +from pyspark.sql.types import StringType, StructField, StructType + +from sparkdantic import SparkField, SparkModel + + +class DescriptionModel(SparkModel): + field_with_description: str = SparkField(description='This is a test description.') + field_with_examples: str = SparkField(examples=['test']) + field_with_description_and_examples: str = SparkField( + description='testing description', examples=['testing example'] + ) + + field_without_metadata: str = SparkField() + + +def test_spark_schema_contains_field_metadata(): + expected_schema = StructType( + [ + StructField( + 'field_with_description', + StringType(), + False, + metadata={'comment': 'This is a test description.'}, + ), + StructField( + 'field_with_examples', + StringType(), + False, + metadata={'examples': ['test']}, + ), + StructField( + 'field_with_description_and_examples', + StringType(), + False, + metadata={ + 'comment': 'testing description', + 'examples': ['testing example'], + }, + ), + StructField( + 'field_without_metadata', + StringType(), + False, + metadata={}, + ), + ] + ) + + actual_schema = DescriptionModel.model_spark_schema() + assert actual_schema == expected_schema + + +def test_spark_schema_json_contains_field_metadata(): + expected_json_schema = { + 'type': 'struct', + 'fields': [ + { + 'name': 'field_with_description', + 'type': 'string', + 'nullable': False, + 'metadata': {'comment': 'This is a test description.'}, + }, + { + 'name': 'field_with_examples', + 'type': 'string', + 'nullable': False, + 'metadata': {'examples': ['test']}, + }, + { + 'name': 'field_with_description_and_examples', + 'type': 'string', + 'nullable': False, + 'metadata': { + 'comment': 'testing description', + 'examples': ['testing example'], + }, + }, + { + 'name': 'field_without_metadata', + 'type': 'string', + 'nullable': False, + 'metadata': {}, + }, + ], + } + + actual_json_schema = DescriptionModel.model_json_spark_schema() + assert actual_json_schema == expected_json_schema