From 7966783f73d008e0d68a447f5c668de1a05913f9 Mon Sep 17 00:00:00 2001 From: BastienGimbert Date: Wed, 29 Oct 2025 19:22:02 +0100 Subject: [PATCH] feat: add list_organization_datasets method and corresponding test --- src/huggingface_hub/hf_api.py | 47 +++++++++++++++++++++++++++++++++++ tests/test_hf_api.py | 6 +++++ 2 files changed, 53 insertions(+) diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py index 17b55ce3a5..8245f161aa 100644 --- a/src/huggingface_hub/hf_api.py +++ b/src/huggingface_hub/hf_api.py @@ -9580,6 +9580,52 @@ def list_organization_members(self, organization: str, token: Union[bool, str, N ): yield User(**member) + @validate_hf_hub_args + def list_organization_datasets(self, organization: str, *, sort: Optional[str] = None, search: Optional[str] = None, token: Union[bool, str, None] = None) -> Iterable[DatasetInfo]: + """ + List datasets of an organization on the Hub. + + Args: + organization (`str`): + Name of the organization to list the datasets of. + sort (`str`, *optional*): + Sorting criteria for the datasets. Supported values include: + `modified`, `created`, `alphabetical`, `likes`, `downloads`, + `most_rows`, `least_rows`. + search (`str`, *optional*): + Search query to filter datasets by name or description. + token (`bool` or `str`, *optional*): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + To disable authentication, pass `False`. + + Returns: + `Iterable[DatasetInfo]`: A generator yielding [`DatasetInfo`] objects + for each dataset in the organization. + + Raises: + [`HfHubHTTPError`]: + HTTP 404 If the organization does not exist on the Hub. + """ + params: dict[str, Any] = {} + if sort is not None: + params["sort"] = sort + if search is not None: + params["search"] = search + + r = get_session().get( + f"{constants.ENDPOINT}/api/organizations/{organization}/datasets-json", + params=params, + headers=self._build_hf_headers(token=token), + ) + hf_raise_for_status(r) + data = r.json() + for ds in data.get("datasets", []): + if "siblings" not in ds: + ds["siblings"] = None + yield DatasetInfo(**ds) + def list_user_followers(self, username: str, token: Union[bool, str, None] = None) -> Iterable[User]: """ Get the list of followers of a user on the Hub. @@ -10845,6 +10891,7 @@ def _parse_revision_from_pr_url(pr_url: str) -> str: get_organization_overview = api.get_organization_overview list_organization_followers = api.list_organization_followers list_organization_members = api.list_organization_members +list_organization_datasets = api.list_organization_datasets list_user_followers = api.list_user_followers list_user_following = api.list_user_following diff --git a/tests/test_hf_api.py b/tests/test_hf_api.py index e700dde3b0..39adb6680f 100644 --- a/tests/test_hf_api.py +++ b/tests/test_hf_api.py @@ -4198,6 +4198,12 @@ def test_organization_followers(self) -> None: assert first_follower.fullname assert first_follower.avatar_url + def test_list_organization_datasets(self) -> None: + datasets = list(self.api.list_organization_datasets("openai", sort="downloads", search="gsm")) + assert isinstance(datasets, list) + if datasets: + assert hasattr(datasets[0], "id") + def test_user_followers(self) -> None: followers = self.api.list_user_followers("clem") assert len(list(followers)) > 500