diff --git a/pygeoapi/api/__init__.py b/pygeoapi/api/__init__.py index f96220080..1bef627c0 100644 --- a/pygeoapi/api/__init__.py +++ b/pygeoapi/api/__init__.py @@ -1282,6 +1282,17 @@ def describe_collections(api: API, request: APIRequest, } } + filter_dims = p.get_dims() + if filter_dims: + collection['filter_dims'] = {} + for key, value in filter_dims.items(): + collection['filter_dims'][key] = { + 'id': key, + 'type': 'Dimension', + 'name': value['title'], + 'values': value['values'] + } + for qt in p.get_query_types(): data_query = { 'link': { @@ -1500,6 +1511,38 @@ def validate_bbox(value=None) -> list: return bbox +def validate_filter_dims(query_string=None) -> dict: + if query_string is None: + LOGGER.debug('dims is empty') + return {} + + if not isinstance(query_string, str): + msg = 'dimension query must be string' + LOGGER.debug(msg) + raise ValueError(msg) + checked = {} + for pair in query_string.split(','): + if ':' not in pair: + msg = """filter dimension and value must be separated by a colon ':' """ # noqa + LOGGER.debug(msg) + raise ValueError(msg) + + key, value = map(str.strip, pair.split(':', 1)) + if not key or not value: + msg = f"""Empty key or value in pair: '{pair}'""" + LOGGER.debug(msg) + raise ValueError(msg) + + if key in checked: + msg = f"""Duplicate key found: '{key}'""" + LOGGER.debug(msg) + raise ValueError(msg) + + checked[key] = value + + return checked + + def validate_datetime(resource_def, datetime_=None) -> str: """ Helper function to validate temporal parameter diff --git a/pygeoapi/api/environmental_data_retrieval.py b/pygeoapi/api/environmental_data_retrieval.py index b1642d02c..b80042636 100644 --- a/pygeoapi/api/environmental_data_retrieval.py +++ b/pygeoapi/api/environmental_data_retrieval.py @@ -57,7 +57,7 @@ ) from . import (APIRequest, API, F_COVERAGEJSON, F_HTML, F_JSON, F_JSONLD, - validate_datetime, validate_bbox) + validate_datetime, validate_bbox, validate_filter_dims) LOGGER = logging.getLogger(__name__) @@ -298,6 +298,11 @@ def get_collection_edr_query(api: API, request: APIRequest, if isinstance(parameternames, str): parameternames = parameternames.split(',') + LOGGER.debug('Processing dims parameter') + dims = request.params.get('dims') + if dims: + dims = validate_filter_dims(dims) + bbox = None if query_type in ['cube', 'locations']: LOGGER.debug('Processing cube bbox') @@ -364,6 +369,7 @@ def get_collection_edr_query(api: API, request: APIRequest, format_=request.format, datetime_=datetime_, select_properties=parameternames, + dims=dims, wkt=wkt, z=z, bbox=bbox, diff --git a/pygeoapi/openapi.py b/pygeoapi/openapi.py index 6492e3b8c..281a35407 100644 --- a/pygeoapi/openapi.py +++ b/pygeoapi/openapi.py @@ -609,6 +609,21 @@ def get_oas_30_parameters(cfg: dict, locale_: str): 'type': 'string' } }, + 'dims': { + 'name': 'dims', + 'in': 'query', + 'description': 'Allows to select dims from multi dimensional EDR', + 'required': False, + 'style': 'form', + 'explode': False, + 'schema': { + 'type': 'string', + 'items': { + 'type': 'string' + }, + 'format': 'dim1:value1,dim2:value2' + } + }, 'bbox': { 'name': 'bbox', 'in': 'query', diff --git a/pygeoapi/provider/base.py b/pygeoapi/provider/base.py index 538c076a9..be6f11834 100644 --- a/pygeoapi/provider/base.py +++ b/pygeoapi/provider/base.py @@ -74,6 +74,7 @@ def __init__(self, provider_def): self.properties = provider_def.get('properties', []) self.file_types = provider_def.get('file_types', []) self._fields = {} + self._dims = {} self.filename = None # for coverage providers diff --git a/pygeoapi/provider/base_edr.py b/pygeoapi/provider/base_edr.py index 96f6417fb..cb458fbb0 100644 --- a/pygeoapi/provider/base_edr.py +++ b/pygeoapi/provider/base_edr.py @@ -94,6 +94,7 @@ def query(self, **kwargs): :param wkt: `shapely.geometry` WKT geometry :param datetime_: temporal (datestamp or extent) :param select_properties: list of parameters + :param dims: dims to select data from :param z: vertical level(s) :param format_: data format of output :param bbox: bbox geometry (for cube queries) diff --git a/pygeoapi/provider/xarray_.py b/pygeoapi/provider/xarray_.py index 9ed2726b1..c3c909960 100644 --- a/pygeoapi/provider/xarray_.py +++ b/pygeoapi/provider/xarray_.py @@ -100,6 +100,7 @@ def __init__(self, provider_def): self.axes = self._coverage_properties['axes'] self.get_fields() + self.get_dims() except Exception as err: LOGGER.warning(err) raise ProviderConnectionError(err) @@ -123,6 +124,28 @@ def get_fields(self): return self._fields + def get_dims(self): + fields = [self.time_field, self.x_field, self.y_field] + if not self._dims: + for key, value in self._data.coords.items(): + if key not in fields: + LOGGER.debug('Adding filterable dim') + dtype = value.dtype + if dtype.name.startswith('float'): + dtype = 'float' + elif dtype.name.startswith('int'): + dtype = 'int' + else: + dtype = 'str' + LOGGER.debug(f"""key: {key} with type: {type(value.values.tolist()[0])}""") # noqa + self._dims[key] = { + 'type': type(value.values.tolist()[0]), + 'title': value.attrs.get('long_name'), + 'x-ogc-unit': value.attrs.get('units'), + 'values': value.values.tolist() + } + return self._dims + def query(self, properties=[], subsets={}, bbox=[], bbox_crs=4326, datetime_=None, format_='json', **kwargs): """ diff --git a/pygeoapi/provider/xarray_edr.py b/pygeoapi/provider/xarray_edr.py index f5bf543f2..d901b8bbf 100644 --- a/pygeoapi/provider/xarray_edr.py +++ b/pygeoapi/provider/xarray_edr.py @@ -31,7 +31,8 @@ import numpy as np -from pygeoapi.provider.base import ProviderNoDataError, ProviderQueryError +from pygeoapi.provider.base import (ProviderNoDataError, ProviderQueryError, + ProviderInvalidQueryError) from pygeoapi.provider.base_edr import BaseEDRProvider from pygeoapi.provider.xarray_ import ( _to_datetime_string, @@ -66,6 +67,7 @@ def position(self, **kwargs): :param wkt: `shapely.geometry` WKT geometry :param datetime_: temporal (datestamp or extent) :param select_properties: list of parameters + :param dims: dict of dimensions to filter :param z: vertical level(s) :param format_: data format of output @@ -96,6 +98,8 @@ def position(self, **kwargs): LOGGER.debug('Processing parameter-name') select_properties = kwargs.get('select_properties') + dims = kwargs.get('dims') + # example of fetching instance passed # TODO: apply accordingly instance = kwargs.get('instance') @@ -114,6 +118,30 @@ def position(self, **kwargs): else: data = self._data + if dims: + string_query = {} + if isinstance(dims, dict): + for coord, level in dims.items(): + if coord in self._dims: + if self._dims[coord]['type'](level) in self._dims[coord]['values']: # noqa + if self._dims[coord]['type'] == str: + string_query[coord] = self._dims[coord]['type'](level) # noqa + else: + query_params[coord] = self._dims[coord]['type'](level) # noqa + else: + raise ProviderInvalidQueryError( + user_msg=( + f"Invalid Value '{level}' for Dimension Parameter '{coord}'. " # noqa + f"Valid Values are '{self._dims[coord]['values']}'" # noqa + ) + ) + + data = data.sel(string_query) + else: + raise ProviderInvalidQueryError(user_msg=f"""Invalid Dimension Parameter '{coord}'""") # noqa + + LOGGER.debug(query_params) + if self.time_field in query_params: remaining_query = { key: val for key, val in query_params.items() @@ -150,6 +178,7 @@ def position(self, **kwargs): bbox = wkt.bounds out_meta = { 'bbox': [bbox[0], bbox[1], bbox[2], bbox[3]], + 'dims': dims, "time": time, "driver": "xarray", "height": height, @@ -203,6 +232,8 @@ def cube(self, **kwargs): if datetime_ is not None: query_params[self.time_field] = self._make_datetime(datetime_) + dims = kwargs.get('dims') + LOGGER.debug(f'query parameters: {query_params}') try: if select_properties: @@ -210,6 +241,29 @@ def cube(self, **kwargs): data = self._data[[*select_properties]] else: data = self._data + + if dims: + string_query = {} + if isinstance(dims, dict): + for coord, level in dims.items(): + if coord in self._dims: + if self._dims[coord]['type'](level) in self._dims[coord]['values']: # noqa + if self._dims[coord]['type'] == str: + string_query[coord] = self._dims[coord]['type'](level) # noqa + else: + query_params[coord] = self._dims[coord]['type'](level) # noqa + else: + raise ProviderInvalidQueryError( + user_msg=( + f"Invalid Value '{level}' for Dimension Parameter '{coord}'. " # noqa + f"Valid Values are '{self._dims[coord]['values']}'" # noqa + ) + ) + + data = data.sel(string_query) + else: + raise ProviderInvalidQueryError(user_msg=f"""Invalid Dimension Parameter '{coord}'""") # noqa + data = data.sel(query_params) data = _convert_float32_to_float64(data) except KeyError: @@ -226,6 +280,7 @@ def cube(self, **kwargs): data.coords[self.x_field].values[-1], data.coords[self.y_field].values[-1] ], + 'dims': dims, "time": time, "driver": "xarray", "height": height, diff --git a/tests/api/test_api.py b/tests/api/test_api.py index 90492b613..7e5c0e0b2 100644 --- a/tests/api/test_api.py +++ b/tests/api/test_api.py @@ -40,9 +40,9 @@ from pygeoapi.api import ( API, APIRequest, FORMAT_TYPES, F_HTML, F_JSON, F_JSONLD, F_GZIP, - __version__, validate_bbox, validate_datetime, evaluate_limit, - validate_subset, landing_page, openapi_, conformance, describe_collections, - get_collection_schema, + __version__, validate_bbox, validate_filter_dims, validate_datetime, + evaluate_limit, validate_subset, landing_page, openapi_, conformance, + describe_collections, get_collection_schema, ) from pygeoapi.util import yaml_load, get_api_rules, get_base_url @@ -778,6 +778,39 @@ def test_validate_bbox(): validate_bbox('1,2,6,4,5,3') +def test_validate_filter_dims(): + with pytest.raises(ValueError) as error: + validate_filter_dims(123) + assert error.type == ValueError + assert error.match('dimension query must be string') + + assert validate_filter_dims('key1:val1') == {'key1': 'val1'} + with pytest.raises(ValueError) as error: + validate_filter_dims('key1val1') + assert error.type == ValueError + assert error.match("filter dimension and value must be separated by a colon ':' ") # noqa + + assert validate_filter_dims('key1:val1,key2:val2') == {'key1': 'val1', + 'key2': 'val2'} + with pytest.raises(ValueError) as error: + validate_filter_dims('key1:val1,key1:val2') + assert error.match("""Duplicate key found: 'key1'""") + + with pytest.raises(ValueError) as error: + validate_filter_dims(':val1,key1:val2') + assert error.match("Empty key or value in pair: ':val1'") + + with pytest.raises(ValueError) as error: + validate_filter_dims('key1:,key1:val2') + assert error.match("Empty key or value in pair: 'key1:'") + + with pytest.raises(ValueError) as error: + validate_filter_dims('') + assert error.match("filter dimension and value must be separated by a colon ':' ") # noqa + + assert validate_filter_dims(None) == {} + + def test_validate_datetime(): config = yaml_load(''' temporal: