Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,39 @@ def prune_additional_properties(
logger.info(f"{n_removed} unspecified properties removed from data object.")


def _validate_one_of_with_discriminator(
validator: Any, one_of: list[JSONSchemaT], instance: DataObjectT, schema: JSONSchemaT
) -> Any:
Comment on lines +62 to +64
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Return type annotation too broad for a generator

_validate_one_of_with_discriminator is a generator function (uses yield/yield from), so its return type should reflect that rather than Any. This improves type-checker support and documents the generator contract.

Suggested change
def _validate_one_of_with_discriminator(
validator: Any, one_of: list[JSONSchemaT], instance: DataObjectT, schema: JSONSchemaT
) -> Any:
def _validate_one_of_with_discriminator(
validator: Any, one_of: list[JSONSchemaT], instance: DataObjectT, schema: JSONSchemaT
) -> Iterator[lazy.jsonschema.ValidationError]:

You'll also need to add Iterator to the typing import at the top of the file.

Prompt To Fix With AI
This is a comment left during a code review.
Path: packages/data-designer-engine/src/data_designer/engine/processing/gsonschema/validators.py
Line: 62-64

Comment:
Return type annotation too broad for a generator

`_validate_one_of_with_discriminator` is a generator function (uses `yield`/`yield from`), so its return type should reflect that rather than `Any`. This improves type-checker support and documents the generator contract.

```suggestion
def _validate_one_of_with_discriminator(
    validator: Any, one_of: list[JSONSchemaT], instance: DataObjectT, schema: JSONSchemaT
) -> Iterator[lazy.jsonschema.ValidationError]:
```

You'll also need to add `Iterator` to the `typing` import at the top of the file.

How can I resolve this? If you propose a fix, please make it concise.

"""Validate a oneOf using the discriminator to select the correct variant.

Standard oneOf tries all variants, which combined with in-place pruning
can corrupt the instance (pruning from a failed variant removes properties
needed by the correct variant). When a discriminator is present, this
validator selects the matching variant directly.
"""
discriminator = schema.get("discriminator")
if not discriminator or not isinstance(discriminator, dict) or not isinstance(instance, dict):
yield from lazy.jsonschema.Draft202012Validator.VALIDATORS["oneOf"](validator, one_of, instance, schema)
return

prop_name = discriminator.get("propertyName")
mapping = discriminator.get("mapping", {})
if not prop_name or prop_name not in instance or not mapping:
yield from lazy.jsonschema.Draft202012Validator.VALIDATORS["oneOf"](validator, one_of, instance, schema)
return

matched_ref = mapping.get(str(instance[prop_name]))
if matched_ref is None:
yield lazy.jsonschema.ValidationError(
f"{instance[prop_name]!r} is not a valid value for discriminator {prop_name!r}",
)
return

matched_schema = {"$ref": matched_ref}
errs = list(validator.descend(instance, matched_schema))
yield from errs


def extend_jsonschema_validator_with_pruning(validator):
"""Modify behavior of a jsonschema.Validator to use pruning.

Expand All @@ -67,6 +100,10 @@ def extend_jsonschema_validator_with_pruning(validator):
extra, unspecified fiends when `additionalProperties: False` is
set in the validating schema.

When a oneOf has a discriminator, the discriminator is used to select
the correct variant directly, preventing in-place pruning from
corrupting the instance during failed variant checks.

Args:
validator (Type[jsonschema.Validator): A validator class
to extend with pruning behavior.
Expand All @@ -75,7 +112,13 @@ def extend_jsonschema_validator_with_pruning(validator):
Type[jsonschema.Validator]: A validator class that will
prune extra fields.
"""
return lazy.jsonschema.validators.extend(validator, {"additionalProperties": prune_additional_properties})
return lazy.jsonschema.validators.extend(
validator,
{
"additionalProperties": prune_additional_properties,
"oneOf": _validate_one_of_with_discriminator,
},
)


def _get_decimal_info_from_anyof(schema: dict) -> tuple[bool, int | None]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,81 @@ def test_invalid_data_type():
validate(data, schema, pruning=True, no_extra_properties=True)


DISCRIMINATED_UNION_SCHEMA = {
"type": "object",
"properties": {
"items": {
"type": "array",
"items": {
"oneOf": [{"$ref": "#/$defs/AlphaItem"}, {"$ref": "#/$defs/BetaItem"}],
"discriminator": {
"propertyName": "kind",
"mapping": {"alpha": "#/$defs/AlphaItem", "beta": "#/$defs/BetaItem"},
},
},
},
},
"$defs": {
"AlphaItem": {
"type": "object",
"properties": {
"kind": {"type": "string", "const": "alpha"},
"name": {"type": "string"},
"alpha_detail": {"type": "string"},
},
"required": ["kind", "name", "alpha_detail"],
},
"BetaItem": {
"type": "object",
"properties": {
"kind": {"type": "string", "const": "beta"},
"name": {"type": "string"},
"beta_tags": {"type": "array", "items": {"type": "string"}},
},
"required": ["kind", "name", "beta_tags"],
},
},
}


@pytest.mark.parametrize(
"item,expected_keys",
[
({"kind": "alpha", "name": "A", "alpha_detail": "d", "beta_tags": ["leak"]}, {"kind", "name", "alpha_detail"}),
({"kind": "beta", "name": "B", "beta_tags": ["t"], "alpha_detail": "leak"}, {"kind", "name", "beta_tags"}),
],
ids=["alpha_with_leaked_beta_field", "beta_with_leaked_alpha_field"],
)
def test_discriminated_union_prunes_leaked_properties(item: dict, expected_keys: set) -> None:
data = {"items": [item]}
result = validate(data, DISCRIMINATED_UNION_SCHEMA, pruning=True, no_extra_properties=True)
assert set(result["items"][0].keys()) == expected_keys


def test_discriminated_union_invalid_discriminator_value() -> None:
data = {"items": [{"kind": "gamma", "name": "G"}]}
with pytest.raises(JSONSchemaValidationError):
validate(data, DISCRIMINATED_UNION_SCHEMA, pruning=True, no_extra_properties=True)


def test_non_discriminated_one_of_fallback() -> None:
schema = {
"type": "object",
"properties": {
"value": {
"oneOf": [
{"type": "string"},
{"type": "number"},
],
},
},
}
assert validate({"value": "hello"}, schema, pruning=True)["value"] == "hello"
assert validate({"value": 42}, schema, pruning=True)["value"] == 42
with pytest.raises(JSONSchemaValidationError):
validate({"value": []}, schema, pruning=True)


def test_normalize_decimal_anyof_fields() -> None:
"""Test that Decimal-like anyOf fields are normalized to floats with proper precision."""
schema = {
Expand Down