diff --git a/CHANGELOG.md b/CHANGELOG.md index c7d7677b..180d7b94 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### 🛠 Breaking changes ### 🎉 New features +- Add `validate_query_filter_to_json`, a query-filter validator that returns JSON for frontends ([#224](https://github.com/roostorg/osprey/pull/224) by [@haileyok](https://github.com/haileyok)) - Add Postgres execution result store ([#171](https://github.com/roostorg/osprey/pull/171) by [@serendipty01](https://github.com/serendipty01)) - Add `ParseInt` UDF — converts a numeric string to an integer ([#190](https://github.com/roostorg/osprey/pull/190) by [@bealsbe](https://github.com/bealsbe)) - Add `StringSlice` UDF which extracts a substring by index range ([#189](https://github.com/roostorg/osprey/pull/189) by [@bealsbe](https://github.com/bealsbe)) diff --git a/osprey_worker/src/osprey/engine/query_language/query_filter_json.py b/osprey_worker/src/osprey/engine/query_language/query_filter_json.py new file mode 100644 index 00000000..854e4348 --- /dev/null +++ b/osprey_worker/src/osprey/engine/query_language/query_filter_json.py @@ -0,0 +1,150 @@ +from typing import Any, Dict, Mapping, Type + +from osprey.engine.ast import grammar +from osprey.engine.ast_validator.validation_context import ( + ValidatedSources, + ValidationError, + ValidationFailed, +) + +from . import parse_query_to_validated_ast + +# `parse_query_to_validated_ast` prepends `Query = ` so a bare expression +# parses as a module-level assignment; subtract this from line-1 columns so +# reported spans match the caller's source. +_QUERY_PREFIX_LEN = len('Query = ') + +_COMPARATOR_KINDS: Mapping[Type[grammar.Comparator], str] = { + grammar.Equals: 'Equals', + grammar.NotEquals: 'NotEquals', + grammar.LessThan: 'LessThan', + grammar.LessThanEquals: 'LessThanEquals', + grammar.GreaterThan: 'GreaterThan', + grammar.GreaterThanEquals: 'GreaterThanEquals', + grammar.In: 'In', + grammar.NotIn: 'NotIn', +} + +_BOOLEAN_OPERAND_KINDS: Mapping[Type[grammar.BooleanOperand], str] = { + grammar.And: 'And', + grammar.Or: 'Or', +} + +_UNARY_OPERATOR_KINDS: Mapping[Type[grammar.UnaryOperator], str] = { + grammar.Not: 'Not', + grammar.USub: 'USub', +} + + +def validate_query_filter_to_json(source: str, rules_sources: ValidatedSources) -> Dict[str, Any]: + """Validate an SML query-filter source and return errors + AST as JSON. + + Returns a dict with: + error: list of `{kind, message, hint, span}` - one entry per + parse or validation failure. `kind` is 'syntax' for parse + errors, 'validation' for semantic errors. + + ast: the serialized expression tree on success. Will return `None` when + an error is encountered. + """ + try: + validated_sources = parse_query_to_validated_ast(source, rules_sources=rules_sources) + except ValidationFailed as e: + return {'errors': [_serialize_error(err) for err in e.errors], 'ast': None} + + return {'errors': [], 'ast': _unwrap_query_ast(validated_sources)} + + +def _serialize_span(span: grammar.Span) -> Dict[str, Any]: + col = span.start_pos + 1 + if span.start_line == 1: + col = max(1, col - _QUERY_PREFIX_LEN) + return {'start': {'line': span.start_line, 'col': col}} + + +def _serialize_error(e: ValidationError) -> Dict[str, Any]: + kind = 'syntax' if e.validator_class is None else 'validation' + return { + 'kind': kind, + 'message': e.message, + 'hint': e.hint or None, + 'span': _serialize_span(e.span), + } + + +def _serialize_expression(node: grammar.Expression) -> Dict[str, Any]: + span = _serialize_span(node.span) + + if isinstance(node, grammar.String): + return {'kind': 'String', 'value': node.value, 'span': span} + if isinstance(node, grammar.Number): + return {'kind': 'Number', 'value': node.value, 'span': span} + if isinstance(node, grammar.Boolean): + return {'kind': 'Boolean', 'value': node.value, 'span': span} + if isinstance(node, grammar.None_): + return {'kind': 'None', 'span': span} + if isinstance(node, grammar.List): + return { + 'kind': 'List', + 'items': [_serialize_expression(i) for i in node.items], + 'span': span, + } + if isinstance(node, grammar.Name): + return {'kind': 'Name', 'identifier': node.identifier, 'span': span} + if isinstance(node, grammar.Attribute): + return { + 'kind': 'Attribute', + 'name': _serialize_expression(node.name), + 'attribute': node.attribute, + 'span': span, + } + if isinstance(node, grammar.BinaryComparison): + return { + 'kind': 'BinaryComparison', + 'left': _serialize_expression(node.left), + 'right': _serialize_expression(node.right), + 'comparator': _COMPARATOR_KINDS[type(node.comparator)], + 'span': span, + } + if isinstance(node, grammar.BooleanOperation): + return { + 'kind': 'BooleanOperation', + 'operand': _BOOLEAN_OPERAND_KINDS[type(node.operand)], + 'values': [_serialize_expression(v) for v in node.values], + 'span': span, + } + if isinstance(node, grammar.UnaryOperation): + return { + 'kind': 'UnaryOperation', + 'operator': _UNARY_OPERATOR_KINDS[type(node.operator)], + 'operand': _serialize_expression(node.operand), + 'span': span, + } + if isinstance(node, grammar.Call): + return { + 'kind': 'Call', + 'func': _serialize_expression(node.func), + 'arguments': [ + { + 'kind': 'Keyword', + 'name': arg.name, + 'value': _serialize_expression(arg.value), + 'span': _serialize_span(arg.span), + } + for arg in node.arguments + ], + 'span': span, + } + + raise NotImplementedError(f'Cannot serialize AST node type: {type(node).__name__}') + + +def _unwrap_query_ast(validated_sources: ValidatedSources) -> Dict[str, Any]: + ast_root = validated_sources.sources.get_entry_point().ast_root + assign = ast_root.statements[0] + assert isinstance(assign, grammar.Assign) + return { + 'kind': 'Root', + 'expression': _serialize_expression(assign.value), + 'span': _serialize_span(assign.value.span), + } diff --git a/osprey_worker/src/osprey/engine/query_language/tests/test_query_filter_json.py b/osprey_worker/src/osprey/engine/query_language/tests/test_query_filter_json.py new file mode 100644 index 00000000..154ad0bc --- /dev/null +++ b/osprey_worker/src/osprey/engine/query_language/tests/test_query_filter_json.py @@ -0,0 +1,210 @@ +from typing import Any, Callable, List + +import pytest +from osprey.engine.ast_validator.validators.imports_must_not_have_cycles import ImportsMustNotHaveCycles +from osprey.engine.ast_validator.validators.unique_stored_names import UniqueStoredNames +from osprey.engine.ast_validator.validators.validate_call_kwargs import ValidateCallKwargs +from osprey.engine.ast_validator.validators.validate_dynamic_calls_have_annotated_rvalue import ( + ValidateDynamicCallsHaveAnnotatedRValue, +) +from osprey.engine.ast_validator.validators.validate_static_types import ValidateStaticTypes +from osprey.engine.ast_validator.validators.variables_must_be_defined import VariablesMustBeDefined +from osprey.engine.query_language.query_filter_json import validate_query_filter_to_json +from osprey.engine.query_language.tests.conftest import MakeRulesSourcesFunction + +pytestmark: List[Callable[[Any], Any]] = [ + pytest.mark.use_standard_rules_validators(), + pytest.mark.use_validators( + [ + UniqueStoredNames, + ValidateStaticTypes, + ValidateCallKwargs, + ImportsMustNotHaveCycles, + ValidateDynamicCallsHaveAnnotatedRValue, + VariablesMustBeDefined, + ] + ), +] + + +_STR_FEATURE = ('UserName', '"default"') +_INT_FEATURE = ('ActionCount', '0') + + +def test_binary_comparison_success(make_rules_sources: MakeRulesSourcesFunction) -> None: + result = validate_query_filter_to_json('UserName == "alice"', make_rules_sources([_STR_FEATURE])) + + assert result['errors'] == [] + assert result['ast'] == { + 'kind': 'Root', + 'span': {'start': {'line': 1, 'col': 1}}, + 'expression': { + 'kind': 'BinaryComparison', + 'span': {'start': {'line': 1, 'col': 1}}, + 'comparator': 'Equals', + 'left': { + 'kind': 'Name', + 'span': {'start': {'line': 1, 'col': 1}}, + 'identifier': 'UserName', + }, + 'right': { + 'kind': 'String', + 'span': {'start': {'line': 1, 'col': 13}}, + 'value': 'alice', + }, + }, + } + + +@pytest.mark.parametrize( + ('comparator_source', 'expected_kind'), + [ + ('==', 'Equals'), + ('!=', 'NotEquals'), + ('<', 'LessThan'), + ('<=', 'LessThanEquals'), + ('>', 'GreaterThan'), + ('>=', 'GreaterThanEquals'), + ], +) +def test_comparator_kinds( + make_rules_sources: MakeRulesSourcesFunction, comparator_source: str, expected_kind: str +) -> None: + result = validate_query_filter_to_json( + f'ActionCount {comparator_source} 1', make_rules_sources([_INT_FEATURE]) + ) + assert result['errors'] == [] + assert result['ast'] is not None + assert result['ast']['expression']['comparator'] == expected_kind + + +def test_in_and_not_in(make_rules_sources: MakeRulesSourcesFunction) -> None: + in_result = validate_query_filter_to_json('"alice" in UserName', make_rules_sources([_STR_FEATURE])) + assert in_result['errors'] == [] + assert in_result['ast'] is not None + assert in_result['ast']['expression']['comparator'] == 'In' + + not_in_result = validate_query_filter_to_json( + '"alice" not in UserName', make_rules_sources([_STR_FEATURE]) + ) + assert not_in_result['errors'] == [] + assert not_in_result['ast'] is not None + assert not_in_result['ast']['expression']['comparator'] == 'NotIn' + + +def test_boolean_operation_and(make_rules_sources: MakeRulesSourcesFunction) -> None: + result = validate_query_filter_to_json( + 'UserName == "a" and ActionCount > 5', make_rules_sources([_STR_FEATURE, _INT_FEATURE]) + ) + assert result['errors'] == [] + expr = result['ast']['expression'] + assert expr['kind'] == 'BooleanOperation' + assert expr['operand'] == 'And' + assert len(expr['values']) == 2 + assert all(v['kind'] == 'BinaryComparison' for v in expr['values']) + + +def test_boolean_operation_or_flattens(make_rules_sources: MakeRulesSourcesFunction) -> None: + # `A or B or C` flattens into a single BooleanOperation with three values, + # matching Python's own ast.BoolOp shape. + result = validate_query_filter_to_json( + 'UserName == "a" or UserName == "b" or UserName == "c"', make_rules_sources([_STR_FEATURE]) + ) + assert result['errors'] == [] + expr = result['ast']['expression'] + assert expr['kind'] == 'BooleanOperation' + assert expr['operand'] == 'Or' + assert len(expr['values']) == 3 + + +def test_unary_not(make_rules_sources: MakeRulesSourcesFunction) -> None: + result = validate_query_filter_to_json('not ActionCount == 1', make_rules_sources([_INT_FEATURE])) + assert result['errors'] == [] + expr = result['ast']['expression'] + assert expr['kind'] == 'UnaryOperation' + assert expr['operator'] == 'Not' + assert expr['operand']['kind'] == 'BinaryComparison' + + +def test_unary_usub(make_rules_sources: MakeRulesSourcesFunction) -> None: + result = validate_query_filter_to_json('-ActionCount == -1', make_rules_sources([_INT_FEATURE])) + assert result['errors'] == [] + expr = result['ast']['expression'] + assert expr['kind'] == 'BinaryComparison' + assert expr['left']['kind'] == 'UnaryOperation' + assert expr['left']['operator'] == 'USub' + + +def test_list_literal(make_rules_sources: MakeRulesSourcesFunction) -> None: + # `x in ["a", "b", "c"]` — the list is a literal on the right, the feature + # on the left. Osprey's grammar for `in` normalises these into a + # BinaryComparison; the serializer preserves the shape. + result = validate_query_filter_to_json( + 'UserName in ["a", "b", "c"]', make_rules_sources([_STR_FEATURE]) + ) + assert result['errors'] == [] + expr = result['ast']['expression'] + assert expr['kind'] == 'BinaryComparison' + list_side = expr['right'] if expr['right']['kind'] == 'List' else expr['left'] + assert list_side['kind'] == 'List' + assert [item['value'] for item in list_side['items']] == ['a', 'b', 'c'] + + +def test_boolean_and_none_literals(make_rules_sources: MakeRulesSourcesFunction) -> None: + bool_result = validate_query_filter_to_json( + 'IsVerified == True', make_rules_sources([('IsVerified', 'True')]) + ) + assert bool_result['errors'] == [] + assert bool_result['ast']['expression']['right'] == { + 'kind': 'Boolean', + 'span': {'start': {'line': 1, 'col': 15}}, + 'value': True, + } + + none_result = validate_query_filter_to_json( + 'Nullable == None', make_rules_sources([('Nullable', 'None')]) + ) + assert none_result['errors'] == [] + assert none_result['ast']['expression']['right']['kind'] == 'None' + + +def test_unknown_feature_is_validation_error(make_rules_sources: MakeRulesSourcesFunction) -> None: + result = validate_query_filter_to_json('Unknown == "x"', make_rules_sources([_STR_FEATURE])) + + assert result['ast'] is None + assert len(result['errors']) >= 1 + assert all(e['kind'] == 'validation' for e in result['errors']) + + +def test_all_literal_comparison_is_validation_error(make_rules_sources: MakeRulesSourcesFunction) -> None: + result = validate_query_filter_to_json('1 == 2', make_rules_sources([_INT_FEATURE])) + + assert result['ast'] is None + assert len(result['errors']) >= 1 + assert all(e['kind'] == 'validation' for e in result['errors']) + + +def test_syntax_error_is_reported(make_rules_sources: MakeRulesSourcesFunction) -> None: + # `a b` — invalid expression syntax (two names with no operator). Python's + # parser raises SyntaxError which osprey surfaces as OspreySyntaxError. + result = validate_query_filter_to_json('UserName UserName', make_rules_sources([_STR_FEATURE])) + + assert result['ast'] is None + assert len(result['errors']) == 1 + assert result['errors'][0]['kind'] == 'syntax' + + +def test_span_columns_are_relative_to_user_input(make_rules_sources: MakeRulesSourcesFunction) -> None: + # The `Query = ` prefix osprey adds internally is 8 characters. The user + # sees their source starting at col 1, so `UserName` should report as + # starting at col 1 even though osprey internally sees it at col 9. + result = validate_query_filter_to_json('UserName == "x"', make_rules_sources([_STR_FEATURE])) + name_span = result['ast']['expression']['left']['span'] + assert name_span == {'start': {'line': 1, 'col': 1}} + + +def test_error_hint_is_null_when_absent(make_rules_sources: MakeRulesSourcesFunction) -> None: + # Ensure the `hint: None` JSON field is present (not omitted) when the + # underlying error carried no hint. + result = validate_query_filter_to_json('UserName UserName', make_rules_sources([_STR_FEATURE])) + assert 'hint' in result['errors'][0] diff --git a/uv.lock b/uv.lock index d410aafb..20a73952 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 3 +revision = 2 requires-python = ">=3.11" resolution-markers = [ "python_full_version >= '4' and platform_machine == 'x86_64'", @@ -498,7 +498,7 @@ dependencies = [ [[package]] name = "example-plugins" version = "0.1.0" -source = { editable = "example_plugins" } +source = { virtual = "example_plugins" } dependencies = [ { name = "pluggy" }, ] @@ -925,6 +925,7 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/a3/1c/c42834d4fee45c5cf2d9546e97e879a1cbcdecfd16eb1a12144dcb91edae/grpcio-1.49.1.tar.gz", hash = "sha256:d4725fc9ec8e8822906ae26bb26f5546891aa7fbc3443de970cc556d43a5c99f", size = 22059239, upload-time = "2022-09-22T03:02:44.376Z" } wheels = [ + { url = "https://files.pythonhosted.org/packages/2d/e2/aaccddb8b06637625d847dbb5fe76ec3d15a74d89d983f5202f3666706e3/grpcio-1.49.1-cp311-cp311-linux_armv7l.whl", hash = "sha256:9fb17ff8c0d56099ac6ebfa84f670c5a62228d6b5c695cf21c02160c2ac1446b", size = 73399185, upload-time = "2022-09-22T02:57:56.219Z" }, { url = "https://files.pythonhosted.org/packages/90/0f/4d614d59f500835cbd27cb90743fb6b299098b0f22b8fd058d3586c933c0/grpcio-1.49.1-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:075f2d06e3db6b48a2157a1bcd52d6cbdca980dd18988fe6afdb41795d51625f", size = 4296299, upload-time = "2022-09-22T02:58:01.417Z" }, { url = "https://files.pythonhosted.org/packages/4d/ea/359a98f8b3b4ff9a2f457a0d20ed81775a64149fbb7617177ed23d9d10c9/grpcio-1.49.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc79b2b37d779ac42341ddef40ad5bf0966a64af412c89fc2b062e3ddabb093f", size = 4656437, upload-time = "2022-09-22T02:58:06.23Z" }, { url = "https://files.pythonhosted.org/packages/fc/89/4952d2dff95f5b95db5943b2d1b55c82a485830b992f52f212b33616b523/grpcio-1.49.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:49b301740cf5bc8fed4fee4c877570189ae3951432d79fa8e524b09353659811", size = 4888051, upload-time = "2022-09-22T02:58:11.411Z" }, @@ -1053,6 +1054,7 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/6c/e4/3416d25aebc4477141a491fae2c9494c5e0437a706c59103a936aac7d072/grpcio-tools-1.49.1.tar.gz", hash = "sha256:84cc64e5b46bad43d5d7bd2fd772b656eba0366961187a847e908e2cb735db91", size = 2252679, upload-time = "2022-09-22T03:03:00.279Z" } wheels = [ + { url = "https://files.pythonhosted.org/packages/e4/c1/ba298fe650b67c9e31a7ad88b2fe1d8d22ff2c6a9e131604054835397dfc/grpcio_tools-1.49.1-cp311-cp311-linux_armv7l.whl", hash = "sha256:9e5c13809ab2f245398e8446c4c3b399a62d591db651e46806cccf52a700452e", size = 36912892, upload-time = "2022-09-22T03:00:51.237Z" }, { url = "https://files.pythonhosted.org/packages/9c/8b/a45a39bf7d1c4956d48179831e4da88c3f6ee14dbdcb273e575bbeb7de20/grpcio_tools-1.49.1-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:ab3d0ee9623720ee585fdf3753b3755d3144a4a8ae35bca8e3655fa2f41056be", size = 2025040, upload-time = "2022-09-22T03:00:55.219Z" }, { url = "https://files.pythonhosted.org/packages/6d/7f/89dc6036b91f8cbada98b06801ac2f5db60885000feaf88f9d7cabe665b7/grpcio_tools-1.49.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13e13b3643e7577a3ec13b79689eb4d7548890b1e104c04b9ed6557a3c3dd452", size = 2370982, upload-time = "2022-09-22T03:00:59.807Z" }, { url = "https://files.pythonhosted.org/packages/01/98/4730bfff6bcd3163db8c3d70689e19a1a5f419152316edfc1f13ff06a5d7/grpcio_tools-1.49.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a64bab81b220c50033f584f57978ebbea575f09c1ccee765cd5c462177988098", size = 2731915, upload-time = "2022-09-22T03:01:05.44Z" }, @@ -1570,7 +1572,7 @@ wheels = [ [[package]] name = "osprey-rpc" version = "0.1.0" -source = { editable = "osprey_rpc" } +source = { virtual = "osprey_rpc" } [[package]] name = "osprey-worker" @@ -1584,7 +1586,7 @@ dependencies = [ [package.metadata] requires-dist = [ { name = "flask-cors", specifier = ">=6.0.1" }, - { name = "osprey-rpc", editable = "osprey_rpc" }, + { name = "osprey-rpc", virtual = "osprey_rpc" }, ] [[package]]