Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### 🛠 Breaking changes

### 🎉 New features
- Add `validate_query_filter_to_json`, a query-filter validator that returns JSON for frontends ([#224](https://github.com/roostorg/osprey/pull/224) by [@haileyok](https://github.com/haileyok))
- Add Postgres execution result store ([#171](https://github.com/roostorg/osprey/pull/171) by [@serendipty01](https://github.com/serendipty01))
- Add `ParseInt` UDF — converts a numeric string to an integer ([#190](https://github.com/roostorg/osprey/pull/190) by [@bealsbe](https://github.com/bealsbe))
- Add `StringSlice` UDF which extracts a substring by index range ([#189](https://github.com/roostorg/osprey/pull/189) by [@bealsbe](https://github.com/bealsbe))
Expand Down
150 changes: 150 additions & 0 deletions osprey_worker/src/osprey/engine/query_language/query_filter_json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
from typing import Any, Dict, Mapping, Type

from osprey.engine.ast import grammar
from osprey.engine.ast_validator.validation_context import (
ValidatedSources,
ValidationError,
ValidationFailed,
)

from . import parse_query_to_validated_ast

# `parse_query_to_validated_ast` prepends `Query = ` so a bare expression
# parses as a module-level assignment; subtract this from line-1 columns so
# reported spans match the caller's source.
_QUERY_PREFIX_LEN = len('Query = ')

_COMPARATOR_KINDS: Mapping[Type[grammar.Comparator], str] = {
grammar.Equals: 'Equals',
grammar.NotEquals: 'NotEquals',
grammar.LessThan: 'LessThan',
grammar.LessThanEquals: 'LessThanEquals',
grammar.GreaterThan: 'GreaterThan',
grammar.GreaterThanEquals: 'GreaterThanEquals',
grammar.In: 'In',
grammar.NotIn: 'NotIn',
}

_BOOLEAN_OPERAND_KINDS: Mapping[Type[grammar.BooleanOperand], str] = {
grammar.And: 'And',
grammar.Or: 'Or',
}

_UNARY_OPERATOR_KINDS: Mapping[Type[grammar.UnaryOperator], str] = {
grammar.Not: 'Not',
grammar.USub: 'USub',
}


def validate_query_filter_to_json(source: str, rules_sources: ValidatedSources) -> Dict[str, Any]:
"""Validate an SML query-filter source and return errors + AST as JSON.

Returns a dict with:
error: list of `{kind, message, hint, span}` - one entry per
parse or validation failure. `kind` is 'syntax' for parse
errors, 'validation' for semantic errors.

ast: the serialized expression tree on success. Will return `None` when
an error is encountered.
"""
try:
validated_sources = parse_query_to_validated_ast(source, rules_sources=rules_sources)
except ValidationFailed as e:
return {'errors': [_serialize_error(err) for err in e.errors], 'ast': None}

return {'errors': [], 'ast': _unwrap_query_ast(validated_sources)}


def _serialize_span(span: grammar.Span) -> Dict[str, Any]:
col = span.start_pos + 1
if span.start_line == 1:
col = max(1, col - _QUERY_PREFIX_LEN)
return {'start': {'line': span.start_line, 'col': col}}


def _serialize_error(e: ValidationError) -> Dict[str, Any]:
kind = 'syntax' if e.validator_class is None else 'validation'
return {
'kind': kind,
'message': e.message,
'hint': e.hint or None,
'span': _serialize_span(e.span),
}


def _serialize_expression(node: grammar.Expression) -> Dict[str, Any]:
span = _serialize_span(node.span)

if isinstance(node, grammar.String):
return {'kind': 'String', 'value': node.value, 'span': span}
if isinstance(node, grammar.Number):
return {'kind': 'Number', 'value': node.value, 'span': span}
if isinstance(node, grammar.Boolean):
return {'kind': 'Boolean', 'value': node.value, 'span': span}
if isinstance(node, grammar.None_):
return {'kind': 'None', 'span': span}
if isinstance(node, grammar.List):
return {
'kind': 'List',
'items': [_serialize_expression(i) for i in node.items],
'span': span,
}
if isinstance(node, grammar.Name):
return {'kind': 'Name', 'identifier': node.identifier, 'span': span}
if isinstance(node, grammar.Attribute):
return {
'kind': 'Attribute',
'name': _serialize_expression(node.name),
'attribute': node.attribute,
'span': span,
}
if isinstance(node, grammar.BinaryComparison):
return {
'kind': 'BinaryComparison',
'left': _serialize_expression(node.left),
'right': _serialize_expression(node.right),
'comparator': _COMPARATOR_KINDS[type(node.comparator)],
'span': span,
}
if isinstance(node, grammar.BooleanOperation):
return {
'kind': 'BooleanOperation',
'operand': _BOOLEAN_OPERAND_KINDS[type(node.operand)],
'values': [_serialize_expression(v) for v in node.values],
'span': span,
}
if isinstance(node, grammar.UnaryOperation):
return {
'kind': 'UnaryOperation',
'operator': _UNARY_OPERATOR_KINDS[type(node.operator)],
'operand': _serialize_expression(node.operand),
'span': span,
}
if isinstance(node, grammar.Call):
return {
'kind': 'Call',
'func': _serialize_expression(node.func),
'arguments': [
{
'kind': 'Keyword',
'name': arg.name,
'value': _serialize_expression(arg.value),
'span': _serialize_span(arg.span),
}
for arg in node.arguments
],
'span': span,
}

raise NotImplementedError(f'Cannot serialize AST node type: {type(node).__name__}')


def _unwrap_query_ast(validated_sources: ValidatedSources) -> Dict[str, Any]:
ast_root = validated_sources.sources.get_entry_point().ast_root
assign = ast_root.statements[0]
assert isinstance(assign, grammar.Assign)
return {
'kind': 'Root',
'expression': _serialize_expression(assign.value),
'span': _serialize_span(assign.value.span),
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
from typing import Any, Callable, List

import pytest
from osprey.engine.ast_validator.validators.imports_must_not_have_cycles import ImportsMustNotHaveCycles
from osprey.engine.ast_validator.validators.unique_stored_names import UniqueStoredNames
from osprey.engine.ast_validator.validators.validate_call_kwargs import ValidateCallKwargs
from osprey.engine.ast_validator.validators.validate_dynamic_calls_have_annotated_rvalue import (
ValidateDynamicCallsHaveAnnotatedRValue,
)
from osprey.engine.ast_validator.validators.validate_static_types import ValidateStaticTypes
from osprey.engine.ast_validator.validators.variables_must_be_defined import VariablesMustBeDefined
from osprey.engine.query_language.query_filter_json import validate_query_filter_to_json
from osprey.engine.query_language.tests.conftest import MakeRulesSourcesFunction

pytestmark: List[Callable[[Any], Any]] = [
pytest.mark.use_standard_rules_validators(),
pytest.mark.use_validators(
[
UniqueStoredNames,
ValidateStaticTypes,
ValidateCallKwargs,
ImportsMustNotHaveCycles,
ValidateDynamicCallsHaveAnnotatedRValue,
VariablesMustBeDefined,
]
),
]


_STR_FEATURE = ('UserName', '"default"')
_INT_FEATURE = ('ActionCount', '0')


def test_binary_comparison_success(make_rules_sources: MakeRulesSourcesFunction) -> None:
result = validate_query_filter_to_json('UserName == "alice"', make_rules_sources([_STR_FEATURE]))

assert result['errors'] == []
assert result['ast'] == {
'kind': 'Root',
'span': {'start': {'line': 1, 'col': 1}},
'expression': {
'kind': 'BinaryComparison',
'span': {'start': {'line': 1, 'col': 1}},
'comparator': 'Equals',
'left': {
'kind': 'Name',
'span': {'start': {'line': 1, 'col': 1}},
'identifier': 'UserName',
},
'right': {
'kind': 'String',
'span': {'start': {'line': 1, 'col': 13}},
'value': 'alice',
},
},
}


@pytest.mark.parametrize(
('comparator_source', 'expected_kind'),
[
('==', 'Equals'),
('!=', 'NotEquals'),
('<', 'LessThan'),
('<=', 'LessThanEquals'),
('>', 'GreaterThan'),
('>=', 'GreaterThanEquals'),
],
)
def test_comparator_kinds(
make_rules_sources: MakeRulesSourcesFunction, comparator_source: str, expected_kind: str
) -> None:
result = validate_query_filter_to_json(
f'ActionCount {comparator_source} 1', make_rules_sources([_INT_FEATURE])
)
assert result['errors'] == []
assert result['ast'] is not None
assert result['ast']['expression']['comparator'] == expected_kind


def test_in_and_not_in(make_rules_sources: MakeRulesSourcesFunction) -> None:
in_result = validate_query_filter_to_json('"alice" in UserName', make_rules_sources([_STR_FEATURE]))
assert in_result['errors'] == []
assert in_result['ast'] is not None
assert in_result['ast']['expression']['comparator'] == 'In'

not_in_result = validate_query_filter_to_json(
'"alice" not in UserName', make_rules_sources([_STR_FEATURE])
)
assert not_in_result['errors'] == []
assert not_in_result['ast'] is not None
assert not_in_result['ast']['expression']['comparator'] == 'NotIn'


def test_boolean_operation_and(make_rules_sources: MakeRulesSourcesFunction) -> None:
result = validate_query_filter_to_json(
'UserName == "a" and ActionCount > 5', make_rules_sources([_STR_FEATURE, _INT_FEATURE])
)
assert result['errors'] == []
expr = result['ast']['expression']
assert expr['kind'] == 'BooleanOperation'
assert expr['operand'] == 'And'
assert len(expr['values']) == 2
assert all(v['kind'] == 'BinaryComparison' for v in expr['values'])


def test_boolean_operation_or_flattens(make_rules_sources: MakeRulesSourcesFunction) -> None:
# `A or B or C` flattens into a single BooleanOperation with three values,
# matching Python's own ast.BoolOp shape.
result = validate_query_filter_to_json(
'UserName == "a" or UserName == "b" or UserName == "c"', make_rules_sources([_STR_FEATURE])
)
assert result['errors'] == []
expr = result['ast']['expression']
assert expr['kind'] == 'BooleanOperation'
assert expr['operand'] == 'Or'
assert len(expr['values']) == 3


def test_unary_not(make_rules_sources: MakeRulesSourcesFunction) -> None:
result = validate_query_filter_to_json('not ActionCount == 1', make_rules_sources([_INT_FEATURE]))
assert result['errors'] == []
expr = result['ast']['expression']
assert expr['kind'] == 'UnaryOperation'
assert expr['operator'] == 'Not'
assert expr['operand']['kind'] == 'BinaryComparison'


def test_unary_usub(make_rules_sources: MakeRulesSourcesFunction) -> None:
result = validate_query_filter_to_json('-ActionCount == -1', make_rules_sources([_INT_FEATURE]))
assert result['errors'] == []
expr = result['ast']['expression']
assert expr['kind'] == 'BinaryComparison'
assert expr['left']['kind'] == 'UnaryOperation'
assert expr['left']['operator'] == 'USub'


def test_list_literal(make_rules_sources: MakeRulesSourcesFunction) -> None:
# `x in ["a", "b", "c"]` — the list is a literal on the right, the feature
# on the left. Osprey's grammar for `in` normalises these into a
# BinaryComparison; the serializer preserves the shape.
result = validate_query_filter_to_json(
'UserName in ["a", "b", "c"]', make_rules_sources([_STR_FEATURE])
)
assert result['errors'] == []
expr = result['ast']['expression']
assert expr['kind'] == 'BinaryComparison'
list_side = expr['right'] if expr['right']['kind'] == 'List' else expr['left']
assert list_side['kind'] == 'List'
assert [item['value'] for item in list_side['items']] == ['a', 'b', 'c']


def test_boolean_and_none_literals(make_rules_sources: MakeRulesSourcesFunction) -> None:
bool_result = validate_query_filter_to_json(
'IsVerified == True', make_rules_sources([('IsVerified', 'True')])
)
assert bool_result['errors'] == []
assert bool_result['ast']['expression']['right'] == {
'kind': 'Boolean',
'span': {'start': {'line': 1, 'col': 15}},
'value': True,
}

none_result = validate_query_filter_to_json(
'Nullable == None', make_rules_sources([('Nullable', 'None')])
)
assert none_result['errors'] == []
assert none_result['ast']['expression']['right']['kind'] == 'None'


def test_unknown_feature_is_validation_error(make_rules_sources: MakeRulesSourcesFunction) -> None:
result = validate_query_filter_to_json('Unknown == "x"', make_rules_sources([_STR_FEATURE]))

assert result['ast'] is None
assert len(result['errors']) >= 1
assert all(e['kind'] == 'validation' for e in result['errors'])


def test_all_literal_comparison_is_validation_error(make_rules_sources: MakeRulesSourcesFunction) -> None:
result = validate_query_filter_to_json('1 == 2', make_rules_sources([_INT_FEATURE]))

assert result['ast'] is None
assert len(result['errors']) >= 1
assert all(e['kind'] == 'validation' for e in result['errors'])


def test_syntax_error_is_reported(make_rules_sources: MakeRulesSourcesFunction) -> None:
# `a b` — invalid expression syntax (two names with no operator). Python's
# parser raises SyntaxError which osprey surfaces as OspreySyntaxError.
result = validate_query_filter_to_json('UserName UserName', make_rules_sources([_STR_FEATURE]))

assert result['ast'] is None
assert len(result['errors']) == 1
assert result['errors'][0]['kind'] == 'syntax'


def test_span_columns_are_relative_to_user_input(make_rules_sources: MakeRulesSourcesFunction) -> None:
# The `Query = ` prefix osprey adds internally is 8 characters. The user
# sees their source starting at col 1, so `UserName` should report as
# starting at col 1 even though osprey internally sees it at col 9.
result = validate_query_filter_to_json('UserName == "x"', make_rules_sources([_STR_FEATURE]))
name_span = result['ast']['expression']['left']['span']
assert name_span == {'start': {'line': 1, 'col': 1}}


def test_error_hint_is_null_when_absent(make_rules_sources: MakeRulesSourcesFunction) -> None:
# Ensure the `hint: None` JSON field is present (not omitted) when the
# underlying error carried no hint.
result = validate_query_filter_to_json('UserName UserName', make_rules_sources([_STR_FEATURE]))
assert 'hint' in result['errors'][0]
Loading
Loading