Skip to content

Commit 4414601

Browse files
committed
Always enable metaschema format checking
In the implementation prior to this change, passing `--disable-formats` would impact not only the "actual" schema validator, but also the validator built to evaluate the schema against its metaschema. As a result, `--disable-formats *` and similar would enable schemas to run which previously should have been caught as invalid. Furthermore, the customized format checker which had extensions for date and time evaluation added was used, and any other customizations to format checking would implicitly be shared with the metaschema check. To resolve, refactor format checker building to allow it to be used more directly for the metaschema check, and add test cases to confirm that a bad regex in a `pattern` is always rejected, even when `--disable-formats regex` or similar is used.
1 parent cc12d98 commit 4414601

File tree

3 files changed

+51
-22
lines changed

3 files changed

+51
-22
lines changed

src/check_jsonschema/formats/__init__.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -66,13 +66,10 @@ def make_format_checker(
6666
if not opts.enabled:
6767
return None
6868

69-
# copy the base checker
70-
base_checker = get_base_format_checker(schema_dialect)
71-
checker = copy.deepcopy(base_checker)
69+
# customize around regex checking first
70+
checker = format_checker_for_regex_impl(opts.regex_impl)
7271

73-
# replace the regex check
74-
del checker.checkers["regex"]
75-
checker.checks("regex")(opts.regex_impl.check_format)
72+
# add other custom format checks
7673
checker.checks("date-time")(validate_rfc3339)
7774
checker.checks("time")(validate_time)
7875

@@ -83,3 +80,18 @@ def make_format_checker(
8380
del checker.checkers[checkname]
8481

8582
return checker
83+
84+
85+
def format_checker_for_regex_impl(
86+
regex_impl: RegexImplementation, schema_dialect: str | None = None
87+
) -> jsonschema.FormatChecker:
88+
# convert to a schema-derived format checker, and copy it
89+
# for safe modification
90+
base_checker = get_base_format_checker(schema_dialect)
91+
checker = copy.deepcopy(base_checker)
92+
93+
# replace the regex check
94+
del checker.checkers["regex"]
95+
checker.checks("regex")(regex_impl.check_format)
96+
97+
return checker

src/check_jsonschema/schema_loader/main.py

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import jsonschema
1010

1111
from ..builtin_schemas import get_builtin_schema
12-
from ..formats import FormatOptions, make_format_checker
12+
from ..formats import FormatOptions, format_checker_for_regex_impl, make_format_checker
1313
from ..parsers import ParserSet
1414
from ..regex_variants import RegexImplementation
1515
from ..utils import is_url_ish
@@ -153,10 +153,7 @@ def _get_validator(
153153
) -> jsonschema.protocols.Validator:
154154
retrieval_uri = self.get_schema_retrieval_uri()
155155
schema = self.get_schema()
156-
157-
schema_dialect = schema.get("$schema")
158-
if schema_dialect is not None and not isinstance(schema_dialect, str):
159-
schema_dialect = None
156+
schema_dialect = _dialect_of_schema(schema)
160157

161158
# format checker (which may be None)
162159
format_checker = make_format_checker(format_opts, schema_dialect)
@@ -170,12 +167,8 @@ def _get_validator(
170167
if self.validator_class is None:
171168
# get the correct validator class and check the schema under its metaschema
172169
validator_cls = jsonschema.validators.validator_for(schema)
173-
_check_schema(
174-
validator_cls,
175-
schema,
176-
format_checker=format_checker,
177-
regex_impl=regex_impl,
178-
)
170+
171+
_check_schema(validator_cls, schema, regex_impl=regex_impl)
179172
else:
180173
# for a user-provided validator class, don't check_schema
181174
# on the grounds that it might *not* be valid but the user wants to use
@@ -206,28 +199,40 @@ def _check_schema(
206199
validator_cls: type[jsonschema.protocols.Validator],
207200
schema: dict[str, t.Any],
208201
*,
209-
format_checker: jsonschema.FormatChecker | None,
210202
regex_impl: RegexImplementation,
211203
) -> None:
212204
"""A variant definition of Validator.check_schema which uses the regex
213205
implementation and format checker specified."""
206+
# construct the metaschema validator class (with customized regex impl)
214207
schema_validator_cls = jsonschema.validators.validator_for(
215208
validator_cls.META_SCHEMA, default=validator_cls
216209
)
217210
schema_validator_cls = _extend_with_pattern_implementation(
218211
schema_validator_cls, regex_impl
219212
)
220213

221-
if format_checker is None:
222-
format_checker = schema_validator_cls.FORMAT_CHECKER
214+
# construct a specialized format checker (again, customized regex impl)
215+
metaschema_dialect = _dialect_of_schema(validator_cls.META_SCHEMA)
216+
format_checker = format_checker_for_regex_impl(regex_impl, metaschema_dialect)
223217

218+
# now, construct and apply the actual validator
224219
schema_validator = schema_validator_cls(
225220
validator_cls.META_SCHEMA, format_checker=format_checker
226221
)
227222
for error in schema_validator.iter_errors(schema):
228223
raise jsonschema.exceptions.SchemaError.create_from(error)
229224

230225

226+
def _dialect_of_schema(schema: dict[str, t.Any] | bool) -> str | None:
227+
if not isinstance(schema, dict):
228+
return None
229+
230+
schema_dialect = schema.get("$schema")
231+
if schema_dialect is not None and not isinstance(schema_dialect, str):
232+
schema_dialect = None
233+
return schema_dialect
234+
235+
231236
class BuiltinSchemaLoader(SchemaLoader):
232237
def __init__(self, schema_name: str, *, base_uri: str | None = None) -> None:
233238
self.schema_name = schema_name

tests/acceptance/test_invalid_schema_files.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
import pytest
2+
3+
14
def test_checker_non_json_schemafile(run_line, tmp_path):
25
foo = tmp_path / "foo.json"
36
bar = tmp_path / "bar.json"
@@ -31,13 +34,22 @@ def test_checker_invalid_schemafile_scheme(run_line, tmp_path):
3134
assert "only supports http, https" in res.stderr
3235

3336

34-
def test_checker_invalid_schemafile_due_to_bad_regex(run_line, tmp_path):
37+
@pytest.mark.parametrize(
38+
"add_args",
39+
[
40+
pytest.param([], id="noargs"),
41+
# ensure that this works even when regex checking is disabled
42+
pytest.param(["--disable-formats", "*"], id="all-formats-disabled"),
43+
pytest.param(["--disable-formats", "regex"], id="regex-format-disabled"),
44+
],
45+
)
46+
def test_checker_invalid_schemafile_due_to_bad_regex(run_line, tmp_path, add_args):
3547
foo = tmp_path / "foo.json"
3648
bar = tmp_path / "bar.json"
3749
# too many backslash escapes -- not a valid Unicode-mode regex
3850
foo.write_text(r'{"properties": {"foo": {"pattern": "\\\\p{N}"}}}')
3951
bar.write_text("{}")
4052

41-
res = run_line(["check-jsonschema", "--schemafile", str(foo), str(bar)])
53+
res = run_line(["check-jsonschema", "--schemafile", str(foo), str(bar), *add_args])
4254
assert res.exit_code == 1
4355
assert "schemafile was not valid" in res.stderr

0 commit comments

Comments
 (0)