Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Key completion for dataset access #3973

Merged
merged 9 commits into from
Jul 4, 2024
2 changes: 2 additions & 0 deletions RELEASE.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
* Updated error message for invalid catalog entries.
* Updated error message for catalog entries when the dataset class is not found with hints on how to resolve the issue.
* Fixed a bug in the `DataCatalog` `shallow_copy()` method to ensure it returns the type of the used catalog and doesn't cast it to `DataCatalog`.
* Implemented key completion support for accessing datasets in the `DataCatalog`.


## Breaking changes to the API

Expand Down
20 changes: 14 additions & 6 deletions kedro/io/data_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,28 +105,36 @@ def __init__(
"""Return a _FrozenDatasets instance from some datasets collections.
Each collection could either be another _FrozenDatasets or a dictionary.
"""
self._original_names: set[str] = set()
for collection in datasets_collections:
if isinstance(collection, _FrozenDatasets):
self.__dict__.update(collection.__dict__)
self._original_names.update(collection._original_names)
else:
# Non-word characters in dataset names are replaced with `__`
# for easy access to transcoded/prefixed datasets.
self.__dict__.update(
{
_sub_nonword_chars(dataset_name): dataset
for dataset_name, dataset in collection.items()
}
)
for dataset_name, dataset in collection.items():
self.__dict__[_sub_nonword_chars(dataset_name)] = dataset
self._original_names.add(dataset_name)

# Don't allow users to add/change attributes on the fly
def __setattr__(self, key: str, value: Any) -> None:
if key == "_original_names":
super().__setattr__(key, value)
return
msg = "Operation not allowed! "
if key in self.__dict__:
msg += "Please change datasets through configuration."
else:
msg += "Please use DataCatalog.add() instead."
raise AttributeError(msg)

def _ipython_key_completions_(self) -> list[str]:
return list(self._original_names)
datajoely marked this conversation as resolved.
Show resolved Hide resolved

def __getitem__(self, key: str) -> Any:
return self.__dict__[_sub_nonword_chars(key)]
noklam marked this conversation as resolved.
Show resolved Hide resolved


class DataCatalog:
"""``DataCatalog`` stores instances of ``AbstractDataset`` implementations
Expand Down
24 changes: 24 additions & 0 deletions tests/io/test_data_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,30 @@ class MyDataCatalog(DataCatalog):
copy = data_catalog.shallow_copy()
assert isinstance(copy, MyDataCatalog)

def test_key_completions(self, data_catalog_from_config):
"""Test catalog.datasets key completions"""
assert isinstance(data_catalog_from_config.datasets["boats"], CSVDataset)
assert isinstance(data_catalog_from_config.datasets["cars"], CSVDataset)
data_catalog_from_config.add_feed_dict(
{
"params:model_options": [1, 2, 4],
"params:model_options.random_state": [0, 42, 67],
}
)
assert isinstance(
data_catalog_from_config.datasets["params:model_options"], MemoryDataset
)
assert isinstance(
data_catalog_from_config.datasets["params__model_options.random_state"],
MemoryDataset,
)
assert set(data_catalog_from_config.datasets._ipython_key_completions_()) == {
"boats",
"cars",
"params:model_options",
"params:model_options.random_state",
}


class TestDataCatalogFromConfig:
def test_from_sane_config(self, data_catalog_from_config, dummy_dataframe):
Expand Down