From 4f082b724f22b2bb99b5d304d5f7a0e525e20fa3 Mon Sep 17 00:00:00 2001 From: Andre Manoel Date: Fri, 6 Mar 2026 12:49:14 -0300 Subject: [PATCH 1/2] fix: handle discriminated unions in oneOf pruning validator The pruning validator modifies instances in-place during oneOf validation. When trying a wrong variant, it strips properties needed by the correct variant, causing all variants to fail. Add a discriminator-aware oneOf validator that reads the discriminator mapping to select the correct variant directly, skipping the try-all-variants loop that causes the corruption. Fixes #375 --- .../processing/gsonschema/validators.py | 45 ++++++++++++++- .../processing/gsonschema/test_validators.py | 57 +++++++++++++++++++ 2 files changed, 101 insertions(+), 1 deletion(-) diff --git a/packages/data-designer-engine/src/data_designer/engine/processing/gsonschema/validators.py b/packages/data-designer-engine/src/data_designer/engine/processing/gsonschema/validators.py index 9a167537c..210d65cda 100644 --- a/packages/data-designer-engine/src/data_designer/engine/processing/gsonschema/validators.py +++ b/packages/data-designer-engine/src/data_designer/engine/processing/gsonschema/validators.py @@ -59,6 +59,39 @@ def prune_additional_properties( logger.info(f"{n_removed} unspecified properties removed from data object.") +def _validate_one_of_with_discriminator( + validator: Any, one_of: list[JSONSchemaT], instance: DataObjectT, schema: JSONSchemaT +) -> Any: + """Validate a oneOf using the discriminator to select the correct variant. + + Standard oneOf tries all variants, which combined with in-place pruning + can corrupt the instance (pruning from a failed variant removes properties + needed by the correct variant). When a discriminator is present, this + validator selects the matching variant directly. + """ + discriminator = schema.get("discriminator") + if not discriminator or not isinstance(discriminator, dict) or not isinstance(instance, dict): + yield from lazy.jsonschema.Draft202012Validator.VALIDATORS["oneOf"](validator, one_of, instance, schema) + return + + prop_name = discriminator.get("propertyName") + mapping = discriminator.get("mapping", {}) + if not prop_name or prop_name not in instance or not mapping: + yield from lazy.jsonschema.Draft202012Validator.VALIDATORS["oneOf"](validator, one_of, instance, schema) + return + + matched_ref = mapping.get(str(instance[prop_name])) + if matched_ref is None: + yield lazy.jsonschema.ValidationError( + f"{instance[prop_name]!r} is not a valid value for discriminator {prop_name!r}", + ) + return + + matched_schema = {"$ref": matched_ref} + errs = list(validator.descend(instance, matched_schema)) + yield from errs + + def extend_jsonschema_validator_with_pruning(validator): """Modify behavior of a jsonschema.Validator to use pruning. @@ -67,6 +100,10 @@ def extend_jsonschema_validator_with_pruning(validator): extra, unspecified fiends when `additionalProperties: False` is set in the validating schema. + When a oneOf has a discriminator, the discriminator is used to select + the correct variant directly, preventing in-place pruning from + corrupting the instance during failed variant checks. + Args: validator (Type[jsonschema.Validator): A validator class to extend with pruning behavior. @@ -75,7 +112,13 @@ def extend_jsonschema_validator_with_pruning(validator): Type[jsonschema.Validator]: A validator class that will prune extra fields. """ - return lazy.jsonschema.validators.extend(validator, {"additionalProperties": prune_additional_properties}) + return lazy.jsonschema.validators.extend( + validator, + { + "additionalProperties": prune_additional_properties, + "oneOf": _validate_one_of_with_discriminator, + }, + ) def _get_decimal_info_from_anyof(schema: dict) -> tuple[bool, int | None]: diff --git a/packages/data-designer-engine/tests/engine/processing/gsonschema/test_validators.py b/packages/data-designer-engine/tests/engine/processing/gsonschema/test_validators.py index 7b95e3396..a0ca23fb3 100644 --- a/packages/data-designer-engine/tests/engine/processing/gsonschema/test_validators.py +++ b/packages/data-designer-engine/tests/engine/processing/gsonschema/test_validators.py @@ -198,6 +198,63 @@ def test_invalid_data_type(): validate(data, schema, pruning=True, no_extra_properties=True) +DISCRIMINATED_UNION_SCHEMA = { + "type": "object", + "properties": { + "items": { + "type": "array", + "items": { + "oneOf": [{"$ref": "#/$defs/AlphaItem"}, {"$ref": "#/$defs/BetaItem"}], + "discriminator": { + "propertyName": "kind", + "mapping": {"alpha": "#/$defs/AlphaItem", "beta": "#/$defs/BetaItem"}, + }, + }, + }, + }, + "$defs": { + "AlphaItem": { + "type": "object", + "properties": { + "kind": {"type": "string", "const": "alpha"}, + "name": {"type": "string"}, + "alpha_detail": {"type": "string"}, + }, + "required": ["kind", "name", "alpha_detail"], + }, + "BetaItem": { + "type": "object", + "properties": { + "kind": {"type": "string", "const": "beta"}, + "name": {"type": "string"}, + "beta_tags": {"type": "array", "items": {"type": "string"}}, + }, + "required": ["kind", "name", "beta_tags"], + }, + }, +} + + +@pytest.mark.parametrize( + "item,expected_keys", + [ + ({"kind": "alpha", "name": "A", "alpha_detail": "d", "beta_tags": ["leak"]}, {"kind", "name", "alpha_detail"}), + ({"kind": "beta", "name": "B", "beta_tags": ["t"], "alpha_detail": "leak"}, {"kind", "name", "beta_tags"}), + ], + ids=["alpha_with_leaked_beta_field", "beta_with_leaked_alpha_field"], +) +def test_discriminated_union_prunes_leaked_properties(item: dict, expected_keys: set) -> None: + data = {"items": [item]} + result = validate(data, DISCRIMINATED_UNION_SCHEMA, pruning=True, no_extra_properties=True) + assert set(result["items"][0].keys()) == expected_keys + + +def test_discriminated_union_invalid_discriminator_value() -> None: + data = {"items": [{"kind": "gamma", "name": "G"}]} + with pytest.raises(JSONSchemaValidationError): + validate(data, DISCRIMINATED_UNION_SCHEMA, pruning=True, no_extra_properties=True) + + def test_normalize_decimal_anyof_fields() -> None: """Test that Decimal-like anyOf fields are normalized to floats with proper precision.""" schema = { From 302776fe5515af88d752aba6ba3c2565d6166cd5 Mon Sep 17 00:00:00 2001 From: Andre Manoel Date: Fri, 6 Mar 2026 13:12:57 -0300 Subject: [PATCH 2/2] test: add regression test for non-discriminated oneOf fallback --- .../processing/gsonschema/test_validators.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/packages/data-designer-engine/tests/engine/processing/gsonschema/test_validators.py b/packages/data-designer-engine/tests/engine/processing/gsonschema/test_validators.py index a0ca23fb3..ee9d18647 100644 --- a/packages/data-designer-engine/tests/engine/processing/gsonschema/test_validators.py +++ b/packages/data-designer-engine/tests/engine/processing/gsonschema/test_validators.py @@ -255,6 +255,24 @@ def test_discriminated_union_invalid_discriminator_value() -> None: validate(data, DISCRIMINATED_UNION_SCHEMA, pruning=True, no_extra_properties=True) +def test_non_discriminated_one_of_fallback() -> None: + schema = { + "type": "object", + "properties": { + "value": { + "oneOf": [ + {"type": "string"}, + {"type": "number"}, + ], + }, + }, + } + assert validate({"value": "hello"}, schema, pruning=True)["value"] == "hello" + assert validate({"value": 42}, schema, pruning=True)["value"] == 42 + with pytest.raises(JSONSchemaValidationError): + validate({"value": []}, schema, pruning=True) + + def test_normalize_decimal_anyof_fields() -> None: """Test that Decimal-like anyOf fields are normalized to floats with proper precision.""" schema = {