From 3f5083073b9748aeed8a68a8c52f833aac31bd16 Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Wed, 14 Jan 2026 14:26:08 +0000 Subject: [PATCH 01/54] feat: update OAuth 2.0 providers --- .../connected_services/api.spec.yaml | 7 +- .../connected_services/apispec.py | 6 +- .../connected_services/provider_adapters.py | 232 +++++++++--------- 3 files changed, 122 insertions(+), 123 deletions(-) diff --git a/components/renku_data_services/connected_services/api.spec.yaml b/components/renku_data_services/connected_services/api.spec.yaml index d783c68b3..59b88afa0 100644 --- a/components/renku_data_services/connected_services/api.spec.yaml +++ b/components/renku_data_services/connected_services/api.spec.yaml @@ -428,9 +428,10 @@ components: enum: - "gitlab" - "github" - - "drive" - - "onedrive" - - "dropbox" + - "google" + # - "drive" + # - "onedrive" + # - "dropbox" - "generic_oidc" example: "gitlab" ApplicationSlug: diff --git a/components/renku_data_services/connected_services/apispec.py b/components/renku_data_services/connected_services/apispec.py index f6f9c5417..9f26b8751 100644 --- a/components/renku_data_services/connected_services/apispec.py +++ b/components/renku_data_services/connected_services/apispec.py @@ -1,6 +1,6 @@ # generated by datamodel-codegen: # filename: api.spec.yaml -# timestamp: 2025-09-05T11:16:18+00:00 +# timestamp: 2026-01-14T14:25:25+00:00 from __future__ import annotations @@ -31,9 +31,7 @@ class AppInstallation(BaseAPISpec): class ProviderKind(Enum): gitlab = "gitlab" github = "github" - drive = "drive" - onedrive = "onedrive" - dropbox = "dropbox" + google = "google" generic_oidc = "generic_oidc" diff --git a/components/renku_data_services/connected_services/provider_adapters.py b/components/renku_data_services/connected_services/provider_adapters.py index b49fea2a2..88ebdbe86 100644 --- a/components/renku_data_services/connected_services/provider_adapters.py +++ b/components/renku_data_services/connected_services/provider_adapters.py @@ -120,119 +120,119 @@ def api_validate_app_installations_response(self, response: Response) -> models. return external_models.GitHubAppInstallationList.model_validate(response.json()).to_app_installation_list() -class GoogleDriveAdapter(ProviderAdapter): - """Adapter for Google Drive OAuth2 clients.""" - - user_info_endpoint = "userinfo" - - @property - def authorization_url(self) -> str: - """The authorization URL for the OAuth2 protocol.""" - return "https://accounts.google.com/o/oauth2/auth" - - @property - def authorization_url_extra_params(self) -> dict[str, str]: - """Extra parameters to add to the auth url.""" - return {"access_type": "offline"} - - @property - def token_endpoint_url(self) -> str: - """The token endpoint URL for the OAuth2 protocol.""" - return "https://oauth2.googleapis.com/token" - - @property - def api_url(self) -> str: - """The URL used for API calls on the Resource Server.""" - return "https://www.googleapis.com/oauth2/v2/" - - @property - def api_common_headers(self) -> dict[str, str] | None: - """The HTTP headers used for API calls on the Resource Server.""" - return { - "Accept": "application/json", - "Content-Type": "application/json", - } - - def api_validate_account_response(self, response: Response) -> models.ConnectedAccount: - """Validates and returns the connected account response from the Resource Server.""" - return external_models.GoogleDriveConnectedAccount.model_validate(response.json()).to_connected_account() - - -class OneDriveAdapter(ProviderAdapter): - """Adapter for One Drive OAuth2 clients.""" - - user_info_endpoint = "userinfo" - - @property - def authorization_url(self) -> str: - """The authorization URL for the OAuth2 protocol.""" - return "https://login.microsoftonline.com/common/oauth2/v2.0/authorize" - - @property - def authorization_url_extra_params(self) -> dict[str, str]: - """Extra parameters to add to the auth url.""" - return {"access_type": "offline"} - - @property - def token_endpoint_url(self) -> str: - """The token endpoint URL for the OAuth2 protocol.""" - return "https://login.microsoftonline.com/common/oauth2/v2.0/token" - - @property - def api_url(self) -> str: - """The URL used for API calls on the Resource Server.""" - return "https://graph.microsoft.com/oidc/" - - @property - def api_common_headers(self) -> dict[str, str] | None: - """The HTTP headers used for API calls on the Resource Server.""" - return { - "Accept": "application/json", - "Content-Type": "application/json", - } - - def api_validate_account_response(self, response: Response) -> models.ConnectedAccount: - """Validates and returns the connected account response from the Resource Server.""" - return external_models.OneDriveConnectedAccount.model_validate(response.json()).to_connected_account() - - -class DropboxAdapter(ProviderAdapter): - """Adapter for Dropbox OAuth2 clients.""" - - user_info_endpoint = "userinfo" - user_info_method = "POST" - - @property - def authorization_url(self) -> str: - """The authorization URL for the OAuth2 protocol.""" - return "https://www.dropbox.com/oauth2/authorize" - - @property - def authorization_url_extra_params(self) -> dict[str, str]: - """Extra parameters to add to the auth url.""" - return {"access_type": "offline"} - - @property - def token_endpoint_url(self) -> str: - """The token endpoint URL for the OAuth2 protocol.""" - return "https://api.dropboxapi.com/oauth2/token" - - @property - def api_url(self) -> str: - """The URL used for API calls on the Resource Server.""" - return "https://api.dropboxapi.com/2/openid/" - - @property - def api_common_headers(self) -> dict[str, str] | None: - """The HTTP headers used for API calls on the Resource Server.""" - return { - "Accept": "application/json", - "Content-Type": "application/json", - } - - def api_validate_account_response(self, response: Response) -> models.ConnectedAccount: - """Validates and returns the connected account response from the Resource Server.""" - return external_models.DropboxConnectedAccount.model_validate(response.json()).to_connected_account() +# class GoogleDriveAdapter(ProviderAdapter): +# """Adapter for Google Drive OAuth2 clients.""" + +# user_info_endpoint = "userinfo" + +# @property +# def authorization_url(self) -> str: +# """The authorization URL for the OAuth2 protocol.""" +# return "https://accounts.google.com/o/oauth2/auth" + +# @property +# def authorization_url_extra_params(self) -> dict[str, str]: +# """Extra parameters to add to the auth url.""" +# return {"access_type": "offline"} + +# @property +# def token_endpoint_url(self) -> str: +# """The token endpoint URL for the OAuth2 protocol.""" +# return "https://oauth2.googleapis.com/token" + +# @property +# def api_url(self) -> str: +# """The URL used for API calls on the Resource Server.""" +# return "https://www.googleapis.com/oauth2/v2/" + +# @property +# def api_common_headers(self) -> dict[str, str] | None: +# """The HTTP headers used for API calls on the Resource Server.""" +# return { +# "Accept": "application/json", +# "Content-Type": "application/json", +# } + +# def api_validate_account_response(self, response: Response) -> models.ConnectedAccount: +# """Validates and returns the connected account response from the Resource Server.""" +# return external_models.GoogleDriveConnectedAccount.model_validate(response.json()).to_connected_account() + + +# class OneDriveAdapter(ProviderAdapter): +# """Adapter for One Drive OAuth2 clients.""" + +# user_info_endpoint = "userinfo" + +# @property +# def authorization_url(self) -> str: +# """The authorization URL for the OAuth2 protocol.""" +# return "https://login.microsoftonline.com/common/oauth2/v2.0/authorize" + +# @property +# def authorization_url_extra_params(self) -> dict[str, str]: +# """Extra parameters to add to the auth url.""" +# return {"access_type": "offline"} + +# @property +# def token_endpoint_url(self) -> str: +# """The token endpoint URL for the OAuth2 protocol.""" +# return "https://login.microsoftonline.com/common/oauth2/v2.0/token" + +# @property +# def api_url(self) -> str: +# """The URL used for API calls on the Resource Server.""" +# return "https://graph.microsoft.com/oidc/" + +# @property +# def api_common_headers(self) -> dict[str, str] | None: +# """The HTTP headers used for API calls on the Resource Server.""" +# return { +# "Accept": "application/json", +# "Content-Type": "application/json", +# } + +# def api_validate_account_response(self, response: Response) -> models.ConnectedAccount: +# """Validates and returns the connected account response from the Resource Server.""" +# return external_models.OneDriveConnectedAccount.model_validate(response.json()).to_connected_account() + + +# class DropboxAdapter(ProviderAdapter): +# """Adapter for Dropbox OAuth2 clients.""" + +# user_info_endpoint = "userinfo" +# user_info_method = "POST" + +# @property +# def authorization_url(self) -> str: +# """The authorization URL for the OAuth2 protocol.""" +# return "https://www.dropbox.com/oauth2/authorize" + +# @property +# def authorization_url_extra_params(self) -> dict[str, str]: +# """Extra parameters to add to the auth url.""" +# return {"access_type": "offline"} + +# @property +# def token_endpoint_url(self) -> str: +# """The token endpoint URL for the OAuth2 protocol.""" +# return "https://api.dropboxapi.com/oauth2/token" + +# @property +# def api_url(self) -> str: +# """The URL used for API calls on the Resource Server.""" +# return "https://api.dropboxapi.com/2/openid/" + +# @property +# def api_common_headers(self) -> dict[str, str] | None: +# """The HTTP headers used for API calls on the Resource Server.""" +# return { +# "Accept": "application/json", +# "Content-Type": "application/json", +# } + +# def api_validate_account_response(self, response: Response) -> models.ConnectedAccount: +# """Validates and returns the connected account response from the Resource Server.""" +# return external_models.DropboxConnectedAccount.model_validate(response.json()).to_connected_account() class GenericOidcAdapter(ProviderAdapter): @@ -316,9 +316,9 @@ def __get_httpx_client(cls) -> Client: _adapter_map: dict[models.ProviderKind, type[ProviderAdapter]] = { models.ProviderKind.gitlab: GitLabAdapter, models.ProviderKind.github: GitHubAdapter, - models.ProviderKind.drive: GoogleDriveAdapter, - models.ProviderKind.onedrive: OneDriveAdapter, - models.ProviderKind.dropbox: DropboxAdapter, + # models.ProviderKind.drive: GoogleDriveAdapter, + # models.ProviderKind.onedrive: OneDriveAdapter, + # models.ProviderKind.dropbox: DropboxAdapter, models.ProviderKind.generic_oidc: GenericOidcAdapter, } From 2410b8002685d1d3da3af33d1c9f605675558fcf Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Wed, 14 Jan 2026 14:42:17 +0000 Subject: [PATCH 02/54] add migration --- .../connected_services/external_models.py | 54 +++++++++---------- .../connected_services/models.py | 9 ++-- ...26c2f3_upgrade_oauth_provider_kind_enum.py | 39 ++++++++++++++ 3 files changed, 71 insertions(+), 31 deletions(-) create mode 100644 components/renku_data_services/migrations/versions/58ad5426c2f3_upgrade_oauth_provider_kind_enum.py diff --git a/components/renku_data_services/connected_services/external_models.py b/components/renku_data_services/connected_services/external_models.py index 19cb51b5a..95362257b 100644 --- a/components/renku_data_services/connected_services/external_models.py +++ b/components/renku_data_services/connected_services/external_models.py @@ -63,43 +63,43 @@ def to_app_installation_list(self) -> models.AppInstallationList: ) -class GoogleDriveConnectedAccount(BaseModel): - """OAuth2 connected account model for google drive.""" +# class GoogleDriveConnectedAccount(BaseModel): +# """OAuth2 connected account model for google drive.""" - name: str - email: str +# name: str +# email: str - def to_connected_account(self) -> models.ConnectedAccount: - """Returns the corresponding ConnectedAccount object.""" - return models.ConnectedAccount(username=self.name, web_url=f"mailto:{self.email}") +# def to_connected_account(self) -> models.ConnectedAccount: +# """Returns the corresponding ConnectedAccount object.""" +# return models.ConnectedAccount(username=self.name, web_url=f"mailto:{self.email}") -class OneDriveConnectedAccount(BaseModel): - """OAuth2 connected account model for onedrive.""" +# class OneDriveConnectedAccount(BaseModel): +# """OAuth2 connected account model for onedrive.""" - givenname: str - familyname: str - email: str +# givenname: str +# familyname: str +# email: str - def to_connected_account(self) -> models.ConnectedAccount: - """Returns the corresponding ConnectedAccount object.""" - return models.ConnectedAccount( - username=" ".join(filter(None, [self.givenname, self.familyname])), web_url=f"mailto:{self.email}" - ) +# def to_connected_account(self) -> models.ConnectedAccount: +# """Returns the corresponding ConnectedAccount object.""" +# return models.ConnectedAccount( +# username=" ".join(filter(None, [self.givenname, self.familyname])), web_url=f"mailto:{self.email}" +# ) -class DropboxConnectedAccount(BaseModel): - """OAuth2 connected account model for dropbox.""" +# class DropboxConnectedAccount(BaseModel): +# """OAuth2 connected account model for dropbox.""" - family_name: str | None - given_name: str | None - email: str +# family_name: str | None +# given_name: str | None +# email: str - def to_connected_account(self) -> models.ConnectedAccount: - """Returns the corresponding ConnectedAccount object.""" - return models.ConnectedAccount( - username=" ".join(filter(None, [self.given_name, self.family_name])), web_url=f"mailto:{self.email}" - ) +# def to_connected_account(self) -> models.ConnectedAccount: +# """Returns the corresponding ConnectedAccount object.""" +# return models.ConnectedAccount( +# username=" ".join(filter(None, [self.given_name, self.family_name])), web_url=f"mailto:{self.email}" +# ) class GenericOIDCConnectedAccount(BaseModel): diff --git a/components/renku_data_services/connected_services/models.py b/components/renku_data_services/connected_services/models.py index c7f3768a0..51ddc0ef0 100644 --- a/components/renku_data_services/connected_services/models.py +++ b/components/renku_data_services/connected_services/models.py @@ -15,9 +15,10 @@ class ProviderKind(StrEnum): gitlab = "gitlab" github = "github" - drive = "drive" - onedrive = "onedrive" - dropbox = "dropbox" + google = "google" + # drive = "drive" + # onedrive = "onedrive" + # dropbox = "dropbox" generic_oidc = "generic_oidc" @@ -99,7 +100,7 @@ class ConnectedAccount: web_url: str -class OAuth2TokenSet(dict): +class OAuth2TokenSet(dict[str, Any]): """OAuth2 token set model.""" @classmethod diff --git a/components/renku_data_services/migrations/versions/58ad5426c2f3_upgrade_oauth_provider_kind_enum.py b/components/renku_data_services/migrations/versions/58ad5426c2f3_upgrade_oauth_provider_kind_enum.py new file mode 100644 index 000000000..ad1520a5e --- /dev/null +++ b/components/renku_data_services/migrations/versions/58ad5426c2f3_upgrade_oauth_provider_kind_enum.py @@ -0,0 +1,39 @@ +"""upgrade oauth provider kind enum + +Revision ID: 58ad5426c2f3 +Revises: 9b18adb58e63 +Create Date: 2026-01-14 14:35:29.539830 + +""" + +from alembic import op + +# revision identifiers, used by Alembic. +revision = "58ad5426c2f3" +down_revision = "9b18adb58e63" +branch_labels = None +depends_on = None + +# NOTE: Postgres does not allow removing values from an enum + + +def upgrade() -> None: + op.execute("DELETE FROM connected_services.oauth2_clients WHERE kind = 'drive'") + op.execute("DELETE FROM connected_services.oauth2_clients WHERE kind = 'onedrive'") + op.execute("DELETE FROM connected_services.oauth2_clients WHERE kind = 'dropbox'") + op.execute("ALTER TYPE providerkind RENAME TO providerkind_old") + op.execute("CREATE TYPE providerkind AS ENUM ('gitlab', 'github', 'google', 'generic_oidc')") + op.execute( + "ALTER TABLE connected_services.oauth2_clients ALTER COLUMN kind SET DATA TYPE providerkind USING kind::text::providerkind" + ) + op.execute("DROP TYPE providerkind_old CASCADE") + + +def downgrade() -> None: + op.execute("DELETE FROM connected_services.oauth2_clients WHERE kind = 'google'") + op.execute("ALTER TYPE providerkind RENAME TO providerkind_old") + op.execute("CREATE TYPE providerkind AS ENUM ('gitlab', 'github', 'drive', 'onedrive', 'dropbox', 'generic_oidc')") + op.execute( + "ALTER TABLE connected_services.oauth2_clients ALTER COLUMN kind SET DATA TYPE providerkind USING kind::text::providerkind" + ) + op.execute("DROP TYPE providerkind_old CASCADE") From 6cf939eb3e340147600c699d6fe614bbf1d5d074 Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Wed, 14 Jan 2026 15:33:05 +0000 Subject: [PATCH 03/54] fix --- ...426c2f3_upgrade_oauth_provider_kind_enum.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/components/renku_data_services/migrations/versions/58ad5426c2f3_upgrade_oauth_provider_kind_enum.py b/components/renku_data_services/migrations/versions/58ad5426c2f3_upgrade_oauth_provider_kind_enum.py index ad1520a5e..8c38fd8c9 100644 --- a/components/renku_data_services/migrations/versions/58ad5426c2f3_upgrade_oauth_provider_kind_enum.py +++ b/components/renku_data_services/migrations/versions/58ad5426c2f3_upgrade_oauth_provider_kind_enum.py @@ -7,6 +7,9 @@ """ from alembic import op +from sqlalchemy.exc import OperationalError + +from renku_data_services.app_config import logging # revision identifiers, used by Alembic. revision = "58ad5426c2f3" @@ -14,13 +17,22 @@ branch_labels = None depends_on = None +logger = logging.getLogger(__name__) + # NOTE: Postgres does not allow removing values from an enum def upgrade() -> None: - op.execute("DELETE FROM connected_services.oauth2_clients WHERE kind = 'drive'") - op.execute("DELETE FROM connected_services.oauth2_clients WHERE kind = 'onedrive'") - op.execute("DELETE FROM connected_services.oauth2_clients WHERE kind = 'dropbox'") + connection = op.get_bind() + with connection.begin_nested() as tx: + try: + op.execute("DELETE FROM connected_services.oauth2_clients WHERE kind = 'drive'") + op.execute("DELETE FROM connected_services.oauth2_clients WHERE kind = 'onedrive'") + op.execute("DELETE FROM connected_services.oauth2_clients WHERE kind = 'dropbox'") + tx.commit() + except OperationalError as err: + logger.debug(f"Skipped DELETE section from migration of the connected_services.oauth2_clients table: {err}") + tx.rollback() op.execute("ALTER TYPE providerkind RENAME TO providerkind_old") op.execute("CREATE TYPE providerkind AS ENUM ('gitlab', 'github', 'google', 'generic_oidc')") op.execute( From db0496eebb61dfa0bb80afb10c32a2b4188184b7 Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Thu, 15 Jan 2026 09:43:46 +0000 Subject: [PATCH 04/54] adjust error when not implemented --- .../connected_services/provider_adapters.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/components/renku_data_services/connected_services/provider_adapters.py b/components/renku_data_services/connected_services/provider_adapters.py index 88ebdbe86..f4efec955 100644 --- a/components/renku_data_services/connected_services/provider_adapters.py +++ b/components/renku_data_services/connected_services/provider_adapters.py @@ -313,6 +313,7 @@ def __get_httpx_client(cls) -> Client: return cls._httpx_client +# TODO: test that this map is exhaustive _adapter_map: dict[models.ProviderKind, type[ProviderAdapter]] = { models.ProviderKind.gitlab: GitLabAdapter, models.ProviderKind.github: GitHubAdapter, @@ -330,5 +331,7 @@ def get_provider_adapter(client: schemas.OAuth2ClientORM) -> ProviderAdapter: if not client.url: raise errors.ValidationError(message=f"URL not defined for provider {client.id}.") - adapter_class = _adapter_map[client.kind] + adapter_class = _adapter_map.get(client.kind) + if adapter_class is None: + raise errors.ProgrammingError(message=f"Provider adapter not implemented for kind {client.kind}.") return adapter_class(client_url=client.url, oidc_issuer_url=client.oidc_issuer_url) From cc51d0781b25ea1bfa00d6bdfcac1ed7dabeff3f Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Thu, 15 Jan 2026 09:49:06 +0000 Subject: [PATCH 05/54] reactivate google adapters --- .../connected_services/external_models.py | 11 +++++ .../connected_services/provider_adapters.py | 44 +++++++++++++++++-- 2 files changed, 52 insertions(+), 3 deletions(-) diff --git a/components/renku_data_services/connected_services/external_models.py b/components/renku_data_services/connected_services/external_models.py index 95362257b..1e64e2da9 100644 --- a/components/renku_data_services/connected_services/external_models.py +++ b/components/renku_data_services/connected_services/external_models.py @@ -63,6 +63,17 @@ def to_app_installation_list(self) -> models.AppInstallationList: ) +class GoogleConnectedAccount(BaseModel): + """OAuth2 connected account model for Google.""" + + name: str + email: str + + def to_connected_account(self) -> models.ConnectedAccount: + """Returns the corresponding ConnectedAccount object.""" + return models.ConnectedAccount(username=self.name, web_url=f"mailto:{self.email}") + + # class GoogleDriveConnectedAccount(BaseModel): # """OAuth2 connected account model for google drive.""" diff --git a/components/renku_data_services/connected_services/provider_adapters.py b/components/renku_data_services/connected_services/provider_adapters.py index f4efec955..a3eac6855 100644 --- a/components/renku_data_services/connected_services/provider_adapters.py +++ b/components/renku_data_services/connected_services/provider_adapters.py @@ -1,6 +1,5 @@ """Adapters for each kind of OAuth2 client.""" -import logging from abc import ABC, abstractmethod from typing import Any from urllib.parse import urljoin, urlparse, urlunparse @@ -8,11 +7,10 @@ from httpx import Client, Response from renku_data_services import errors +from renku_data_services.app_config import logging from renku_data_services.connected_services import external_models, models from renku_data_services.connected_services import orm as schemas -logger = logging.getLogger(__name__) - class ProviderAdapter(ABC): """Defines the functionality of OAuth2 client adapters.""" @@ -120,6 +118,46 @@ def api_validate_app_installations_response(self, response: Response) -> models. return external_models.GitHubAppInstallationList.model_validate(response.json()).to_app_installation_list() +class GoogleAdapter(ProviderAdapter): + """Adapter for Google OAuth2 clients.""" + + user_info_endpoint = "userinfo" + + @property + def authorization_url(self) -> str: + """The authorization URL for the OAuth2 protocol.""" + return "https://accounts.google.com/o/oauth2/auth" + + @property + def authorization_url_extra_params(self) -> dict[str, str]: + """Extra parameters to add to the auth url.""" + return {"access_type": "offline"} + + @property + def token_endpoint_url(self) -> str: + """The token endpoint URL for the OAuth2 protocol.""" + return "https://oauth2.googleapis.com/token" + + @property + def api_url(self) -> str: + """The URL used for API calls on the Resource Server.""" + return "https://www.googleapis.com/oauth2/v2/" + + @property + def api_common_headers(self) -> dict[str, str] | None: + """The HTTP headers used for API calls on the Resource Server.""" + return { + "Accept": "application/json", + "Content-Type": "application/json", + } + + def api_validate_account_response(self, response: Response) -> models.ConnectedAccount: + """Validates and returns the connected account response from the Resource Server.""" + logger = logging.getLogger(self.__class__.__name__) + logger.warning(f"Account response: {response.json()}") + return external_models.GoogleConnectedAccount.model_validate(response.json()).to_connected_account() + + # class GoogleDriveAdapter(ProviderAdapter): # """Adapter for Google Drive OAuth2 clients.""" From 1794569bcf0718501365d070905828885fa9d88e Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Thu, 15 Jan 2026 09:55:15 +0000 Subject: [PATCH 06/54] oops --- .../renku_data_services/connected_services/provider_adapters.py | 1 + 1 file changed, 1 insertion(+) diff --git a/components/renku_data_services/connected_services/provider_adapters.py b/components/renku_data_services/connected_services/provider_adapters.py index a3eac6855..8824760fa 100644 --- a/components/renku_data_services/connected_services/provider_adapters.py +++ b/components/renku_data_services/connected_services/provider_adapters.py @@ -355,6 +355,7 @@ def __get_httpx_client(cls) -> Client: _adapter_map: dict[models.ProviderKind, type[ProviderAdapter]] = { models.ProviderKind.gitlab: GitLabAdapter, models.ProviderKind.github: GitHubAdapter, + models.ProviderKind.google: GoogleAdapter, # models.ProviderKind.drive: GoogleDriveAdapter, # models.ProviderKind.onedrive: OneDriveAdapter, # models.ProviderKind.dropbox: DropboxAdapter, From b439034ec7d6519df3ce84f9b40eed4d5932f48b Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Thu, 15 Jan 2026 10:32:03 +0000 Subject: [PATCH 07/54] adjust --- .../renku_data_services/connected_services/blueprints.py | 2 +- .../renku_data_services/connected_services/external_models.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/components/renku_data_services/connected_services/blueprints.py b/components/renku_data_services/connected_services/blueprints.py index 73d871a5d..8a71f3588 100644 --- a/components/renku_data_services/connected_services/blueprints.py +++ b/components/renku_data_services/connected_services/blueprints.py @@ -202,7 +202,7 @@ async def _get_account(_: Request, user: base_models.APIUser, connection_id: ULI account = await client.get_connected_account() match account: case OAuthHttpError() as err: - raise errors.InvalidTokenError(message=f"OAuth error getting the connected accoun: {err}") + raise errors.InvalidTokenError(message=f"OAuth error getting the connected account: {err}") case account: return validated_json(apispec.ConnectedAccount, account) diff --git a/components/renku_data_services/connected_services/external_models.py b/components/renku_data_services/connected_services/external_models.py index 1e64e2da9..83c89ec85 100644 --- a/components/renku_data_services/connected_services/external_models.py +++ b/components/renku_data_services/connected_services/external_models.py @@ -66,12 +66,11 @@ def to_app_installation_list(self) -> models.AppInstallationList: class GoogleConnectedAccount(BaseModel): """OAuth2 connected account model for Google.""" - name: str email: str def to_connected_account(self) -> models.ConnectedAccount: """Returns the corresponding ConnectedAccount object.""" - return models.ConnectedAccount(username=self.name, web_url=f"mailto:{self.email}") + return models.ConnectedAccount(username=self.email, web_url="") # class GoogleDriveConnectedAccount(BaseModel): From a105f2018e9161bb7f95c69522058c544e0ee234 Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Thu, 15 Jan 2026 14:24:22 +0000 Subject: [PATCH 08/54] wip: placeholder to handle oauth phase 1 --- components/renku_data_services/notebooks/core_sessions.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/components/renku_data_services/notebooks/core_sessions.py b/components/renku_data_services/notebooks/core_sessions.py index 2b216eedc..2503b3934 100644 --- a/components/renku_data_services/notebooks/core_sessions.py +++ b/components/renku_data_services/notebooks/core_sessions.py @@ -270,7 +270,13 @@ async def get_data_sources( dcs: dict[str, RCloneStorage] = {} dcs_secrets: dict[str, list[DataConnectorSecret]] = {} user_secret_key: str | None = None + + logger = logging.getLogger(get_data_sources.__name__) + async for dc in data_connectors_stream: + if dc.data_connector.storage.configuration["type"] == "drive": + logger.warning(f"Skipping drive DC {str(dc.data_connector.id)}.") + continue mount_folder = ( dc.data_connector.storage.target_path if PurePosixPath(dc.data_connector.storage.target_path).is_absolute() From ad792d78fb0deaa72625f9a1f4793dc5368cab2a Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Thu, 15 Jan 2026 15:19:10 +0000 Subject: [PATCH 09/54] wip: handle access token (no refresh) --- .../notebooks/core_sessions.py | 50 +++++++++++++++++-- 1 file changed, 47 insertions(+), 3 deletions(-) diff --git a/components/renku_data_services/notebooks/core_sessions.py b/components/renku_data_services/notebooks/core_sessions.py index 2503b3934..f2de4df35 100644 --- a/components/renku_data_services/notebooks/core_sessions.py +++ b/components/renku_data_services/notebooks/core_sessions.py @@ -21,6 +21,11 @@ from renku_data_services.app_config import logging from renku_data_services.base_models import RESET, AnonymousAPIUser, APIUser, AuthenticatedAPIUser, ResetType from renku_data_services.base_models.metrics import MetricsService +from renku_data_services.connected_services.db import ConnectedServicesRepository +from renku_data_services.connected_services.models import OAuth2TokenSet, ProviderKind +from renku_data_services.connected_services.oauth_http import ( + OAuthHttpFactoryError, +) from renku_data_services.crc.db import ClusterRepository, ResourcePoolRepository from renku_data_services.crc.models import ( ClusterSettings, @@ -263,6 +268,8 @@ async def get_data_sources( work_dir: PurePosixPath, data_connectors_overrides: list[SessionDataConnectorOverride], user_repo: UserRepo, + connected_services_repo: ConnectedServicesRepository, + image_check_repo: ImageCheckRepository, ) -> SessionExtraResources: """Generate cloud storage related resources.""" data_sources: list[DataSource] = [] @@ -274,9 +281,43 @@ async def get_data_sources( logger = logging.getLogger(get_data_sources.__name__) async for dc in data_connectors_stream: + configuration = dc.data_connector.storage.configuration if dc.data_connector.storage.configuration["type"] == "drive": - logger.warning(f"Skipping drive DC {str(dc.data_connector.id)}.") - continue + # TODO: move some logic to the repo, see how it is done for images + providers = await connected_services_repo.get_oauth2_clients(user=user) + drive_provider = next(filter(lambda p: p.kind == ProviderKind.google, providers), None) + connections = await connected_services_repo.get_oauth2_connections(user=user) + drive_connection = next( + filter(lambda c: drive_provider is not None and c.provider_id == drive_provider.id, connections), None + ) + if drive_connection is None: + logger.warning( + f"Skipping Google Drive DC {str(dc.data_connector.id)} because no OAuth connection found." + ) + continue + token_set: OAuth2TokenSet | None = None + client_or_error = await image_check_repo.oauth_client_factory.for_user_connection( + user=user, connection_id=drive_connection.id + ) + match client_or_error: + case OAuthHttpFactoryError() as err: + logger.info(f"Error getting oauth client for user={user} connection={drive_connection.id}: {err}") + case client: + token_set = await client.get_token() + if not token_set: + logger.warning( + f"Skipping Google Drive DC {str(dc.data_connector.id)} because the connection is not active." + ) + continue + logger.warning(f"Adjusting rclone configuration for DC {str(dc.data_connector.id)}.") + configuration["drive"] = configuration.get("drive") or "drive" + token_config = { + "access_token": token_set.access_token, + "token_type": "Bearer", + } + if token_set.expires_at_iso: + token_config["expiry"] = token_set.expires_at_iso + configuration["token"] = json.dumps(token_config) mount_folder = ( dc.data_connector.storage.target_path if PurePosixPath(dc.data_connector.storage.target_path).is_absolute() @@ -285,7 +326,7 @@ async def get_data_sources( dcs[str(dc.data_connector.id)] = RCloneStorage( source_path=dc.data_connector.storage.source_path, mount_folder=mount_folder, - configuration=dc.data_connector.storage.configuration, + configuration=configuration, readonly=dc.data_connector.storage.readonly, name=dc.data_connector.name, secrets={str(secret.secret_id): secret.name for secret in dc.secrets}, @@ -828,6 +869,9 @@ async def start_session( work_dir=work_dir, data_connectors_overrides=launch_request.data_connectors_overrides or [], user_repo=user_repo, + # TODO: maybe get the dependency explicitly + connected_services_repo=image_check_repo.connected_services_repo, + image_check_repo=image_check_repo, ) ) From c6678ffde290a5593901e8d2a6f64b467ca5d341 Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Thu, 15 Jan 2026 15:37:17 +0000 Subject: [PATCH 10/54] fixes --- components/renku_data_services/notebooks/core_sessions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/components/renku_data_services/notebooks/core_sessions.py b/components/renku_data_services/notebooks/core_sessions.py index f2de4df35..3319de207 100644 --- a/components/renku_data_services/notebooks/core_sessions.py +++ b/components/renku_data_services/notebooks/core_sessions.py @@ -304,13 +304,13 @@ async def get_data_sources( logger.info(f"Error getting oauth client for user={user} connection={drive_connection.id}: {err}") case client: token_set = await client.get_token() - if not token_set: + if not token_set or not token_set.access_token: logger.warning( f"Skipping Google Drive DC {str(dc.data_connector.id)} because the connection is not active." ) continue logger.warning(f"Adjusting rclone configuration for DC {str(dc.data_connector.id)}.") - configuration["drive"] = configuration.get("drive") or "drive" + configuration["scope"] = configuration.get("drive") or "drive" token_config = { "access_token": token_set.access_token, "token_type": "Bearer", From 1b467754317986989116dc1f7b950155364afaed Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Fri, 16 Jan 2026 09:21:30 +0000 Subject: [PATCH 11/54] exp: token_endpoint --- .../connected_services/blueprints.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/components/renku_data_services/connected_services/blueprints.py b/components/renku_data_services/connected_services/blueprints.py index 8a71f3588..c7819b7ba 100644 --- a/components/renku_data_services/connected_services/blueprints.py +++ b/components/renku_data_services/connected_services/blueprints.py @@ -245,3 +245,13 @@ async def _get_installations( return body, installations_list.total_count return "/oauth2/connections//installations", ["GET"], _get_installations + + def post_token_endpoint(self) -> BlueprintFactoryResponse: + """OAuth 2.0 token endpoint to support applications running in sessions.""" + + async def _post_token_endpoint(request: Request, connection_id: ULID) -> JSONResponse: + logger.warning(f"post_token_endpoint: connection_id = {str(connection_id)}") + logger.warning(f"post_token_endpoint: request body = {request.body.decode("utf-8")}") + raise NotImplementedError("TODO: post_token_endpoint()") + + return "/oauth2/connections//token_endpoint", ["POST"], _post_token_endpoint From e384564be713a9815a49b8ca021ea04331cf9b3d Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Fri, 16 Jan 2026 09:48:41 +0000 Subject: [PATCH 12/54] exp: log more --- .../renku_data_services/connected_services/blueprints.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/components/renku_data_services/connected_services/blueprints.py b/components/renku_data_services/connected_services/blueprints.py index c7819b7ba..657381441 100644 --- a/components/renku_data_services/connected_services/blueprints.py +++ b/components/renku_data_services/connected_services/blueprints.py @@ -249,8 +249,11 @@ async def _get_installations( def post_token_endpoint(self) -> BlueprintFactoryResponse: """OAuth 2.0 token endpoint to support applications running in sessions.""" + # TODO: handle this `post_token_endpoint: request body = client_id=AAA&client_secret=BBB&grant_type=refresh_token&refresh_token=hello-world` # noqa E501 async def _post_token_endpoint(request: Request, connection_id: ULID) -> JSONResponse: logger.warning(f"post_token_endpoint: connection_id = {str(connection_id)}") + logger.warning(f"post_token_endpoint: request headers = {list(request.headers.keys())}") + logger.warning(f"post_token_endpoint: request content-type = {request.headers.get("content-type")}") logger.warning(f"post_token_endpoint: request body = {request.body.decode("utf-8")}") raise NotImplementedError("TODO: post_token_endpoint()") From 56f2b0b2fd8ac1b1934f6a3e14fa3d90feed6bbc Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Fri, 16 Jan 2026 10:07:04 +0000 Subject: [PATCH 13/54] exp: try validation --- .../connected_services/apispec_extras.py | 27 +++++++++++++++++++ .../connected_services/blueprints.py | 9 ++++--- 2 files changed, 33 insertions(+), 3 deletions(-) create mode 100644 components/renku_data_services/connected_services/apispec_extras.py diff --git a/components/renku_data_services/connected_services/apispec_extras.py b/components/renku_data_services/connected_services/apispec_extras.py new file mode 100644 index 000000000..1115c064c --- /dev/null +++ b/components/renku_data_services/connected_services/apispec_extras.py @@ -0,0 +1,27 @@ +"""Extra definitions for the API spec.""" + +from __future__ import annotations + +from enum import StrEnum + +from pydantic import ConfigDict + +from renku_data_services.connected_services.apispec_base import BaseAPISpec + + +class PostTokenGrantType(StrEnum): + """Grant type for token refresh.""" + + refresh_token = "refresh_token" # nosec B105 + + +class PostTokenRequest(BaseAPISpec): + """Body for a refresh token request.""" + + model_config = ConfigDict( + extra="forbid", + ) + client_id: str + client_secret: str + grant_type: PostTokenGrantType + refresh_token: str diff --git a/components/renku_data_services/connected_services/blueprints.py b/components/renku_data_services/connected_services/blueprints.py index 657381441..8b83e7f38 100644 --- a/components/renku_data_services/connected_services/blueprints.py +++ b/components/renku_data_services/connected_services/blueprints.py @@ -17,7 +17,7 @@ from renku_data_services.base_api.misc import validate_query from renku_data_services.base_api.pagination import PaginationRequest, paginate from renku_data_services.base_models.validation import validate_and_dump, validated_json -from renku_data_services.connected_services import apispec +from renku_data_services.connected_services import apispec, apispec_extras from renku_data_services.connected_services.apispec_base import AuthorizeParams, CallbackParams from renku_data_services.connected_services.core import validate_oauth2_client_patch, validate_unsaved_oauth2_client from renku_data_services.connected_services.db import ConnectedServicesRepository @@ -249,12 +249,15 @@ async def _get_installations( def post_token_endpoint(self) -> BlueprintFactoryResponse: """OAuth 2.0 token endpoint to support applications running in sessions.""" - # TODO: handle this `post_token_endpoint: request body = client_id=AAA&client_secret=BBB&grant_type=refresh_token&refresh_token=hello-world` # noqa E501 - async def _post_token_endpoint(request: Request, connection_id: ULID) -> JSONResponse: + @validate(form=apispec_extras.PostTokenRequest) + async def _post_token_endpoint( + request: Request, body: apispec_extras.PostTokenRequest, connection_id: ULID + ) -> JSONResponse: logger.warning(f"post_token_endpoint: connection_id = {str(connection_id)}") logger.warning(f"post_token_endpoint: request headers = {list(request.headers.keys())}") logger.warning(f"post_token_endpoint: request content-type = {request.headers.get("content-type")}") logger.warning(f"post_token_endpoint: request body = {request.body.decode("utf-8")}") + logger.warning(f"post_token_endpoint: request body = {body}") raise NotImplementedError("TODO: post_token_endpoint()") return "/oauth2/connections//token_endpoint", ["POST"], _post_token_endpoint From 7c6434600039beb3f6d26957a323d6807cde4402 Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Fri, 16 Jan 2026 11:43:07 +0100 Subject: [PATCH 14/54] exp: comments --- .../connected_services/apispec_extras.py | 4 +--- .../connected_services/blueprints.py | 14 ++++++++++---- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/components/renku_data_services/connected_services/apispec_extras.py b/components/renku_data_services/connected_services/apispec_extras.py index 1115c064c..fc4acf5d2 100644 --- a/components/renku_data_services/connected_services/apispec_extras.py +++ b/components/renku_data_services/connected_services/apispec_extras.py @@ -19,9 +19,7 @@ class PostTokenRequest(BaseAPISpec): """Body for a refresh token request.""" model_config = ConfigDict( - extra="forbid", + extra="allow", ) - client_id: str - client_secret: str grant_type: PostTokenGrantType refresh_token: str diff --git a/components/renku_data_services/connected_services/blueprints.py b/components/renku_data_services/connected_services/blueprints.py index 8b83e7f38..67b9742f6 100644 --- a/components/renku_data_services/connected_services/blueprints.py +++ b/components/renku_data_services/connected_services/blueprints.py @@ -254,10 +254,16 @@ async def _post_token_endpoint( request: Request, body: apispec_extras.PostTokenRequest, connection_id: ULID ) -> JSONResponse: logger.warning(f"post_token_endpoint: connection_id = {str(connection_id)}") - logger.warning(f"post_token_endpoint: request headers = {list(request.headers.keys())}") - logger.warning(f"post_token_endpoint: request content-type = {request.headers.get("content-type")}") - logger.warning(f"post_token_endpoint: request body = {request.body.decode("utf-8")}") - logger.warning(f"post_token_endpoint: request body = {body}") + logger.warning(f"post_token_endpoint: request body grant_type = {body.grant_type.value}") + logger.warning(f"post_token_endpoint: request body refresh_token = {len(body.refresh_token)}") + + # TODO: + # 1. Decode the refresh_token value -> RenkuTokens + # 2. Validate the access_token -> if valid, send back the new OAuth 2.0 access token + # and the new encoded refresh_token + # 3. If access_token is expired, use the renku refresh_token -> if new tokens are valid, + # send back the new OAuth 2.0 access token and the new encoded refresh_token + raise NotImplementedError("TODO: post_token_endpoint()") return "/oauth2/connections//token_endpoint", ["POST"], _post_token_endpoint From 17b66178f91f72ab855ae6cff8313de492524257 Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Fri, 16 Jan 2026 12:55:07 +0000 Subject: [PATCH 15/54] exp: try to do auth step 1 --- .../connected_services/apispec_extras.py | 23 +++++++++++++++++++ .../connected_services/blueprints.py | 22 ++++++++++++++++-- 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/components/renku_data_services/connected_services/apispec_extras.py b/components/renku_data_services/connected_services/apispec_extras.py index fc4acf5d2..39112e754 100644 --- a/components/renku_data_services/connected_services/apispec_extras.py +++ b/components/renku_data_services/connected_services/apispec_extras.py @@ -2,7 +2,9 @@ from __future__ import annotations +import base64 from enum import StrEnum +from typing import Self from pydantic import ConfigDict @@ -23,3 +25,24 @@ class PostTokenRequest(BaseAPISpec): ) grant_type: PostTokenGrantType refresh_token: str + + +class RenkuTokens(BaseAPISpec): + """Represents a set of authentication tokens used in Renku.""" + + model_config = ConfigDict( + extra="forbid", + ) + access_token: str + refresh_token: str + + def encode(self) -> str: + """Encode the Renku tokens as a single URL-safe string.""" + as_json = self.model_dump_json() + return base64.urlsafe_b64encode(as_json.encode("utf-8")).decode("utf-8") + + @classmethod + def decode(cls, encoded: str) -> Self: + """Decode a single string into a set of Renku tokens.""" + json_raw = base64.urlsafe_b64decode(encoded.encode("utf-8")) + return cls.model_validate_json(json_raw) diff --git a/components/renku_data_services/connected_services/blueprints.py b/components/renku_data_services/connected_services/blueprints.py index 67b9742f6..6d0bf0fe3 100644 --- a/components/renku_data_services/connected_services/blueprints.py +++ b/components/renku_data_services/connected_services/blueprints.py @@ -1,7 +1,7 @@ """Connected services blueprint.""" from dataclasses import dataclass -from typing import Any +from typing import Any, cast from urllib.parse import unquote, urlparse, urlunparse from sanic import HTTPResponse, Request, empty, json, redirect @@ -255,7 +255,25 @@ async def _post_token_endpoint( ) -> JSONResponse: logger.warning(f"post_token_endpoint: connection_id = {str(connection_id)}") logger.warning(f"post_token_endpoint: request body grant_type = {body.grant_type.value}") - logger.warning(f"post_token_endpoint: request body refresh_token = {len(body.refresh_token)}") + logger.warning(f"post_token_endpoint: request body refresh_token = {body.refresh_token}") + + renku_tokens = apispec_extras.RenkuTokens.decode(body.refresh_token) + logger.warning(f"post_token_endpoint: renku_tokens = {renku_tokens}") + request.headers[self.authenticator.token_field] = renku_tokens.access_token + + access_token: str | None = None + try: + user = await self.authenticator.authenticate( + access_token=renku_tokens.access_token or "", request=request + ) + user = cast(base_models.APIUser, user) + if user.is_authenticated and user.access_token: + access_token = user.access_token + except Exception as err: + logger.error(f"Got authenticate error: {err.__class__}.") + raise + + logger.warning(f"post_token_endpoint: access_token = {access_token}") # TODO: # 1. Decode the refresh_token value -> RenkuTokens From 5037a7e3ce5ecdb524404fe58fcba3486d24fa76 Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Fri, 16 Jan 2026 13:32:43 +0000 Subject: [PATCH 16/54] try to log other attemtps --- .../connected_services/blueprints.py | 78 +++++++++++-------- 1 file changed, 45 insertions(+), 33 deletions(-) diff --git a/components/renku_data_services/connected_services/blueprints.py b/components/renku_data_services/connected_services/blueprints.py index 6d0bf0fe3..a8b2fb730 100644 --- a/components/renku_data_services/connected_services/blueprints.py +++ b/components/renku_data_services/connected_services/blueprints.py @@ -249,39 +249,51 @@ async def _get_installations( def post_token_endpoint(self) -> BlueprintFactoryResponse: """OAuth 2.0 token endpoint to support applications running in sessions.""" - @validate(form=apispec_extras.PostTokenRequest) - async def _post_token_endpoint( - request: Request, body: apispec_extras.PostTokenRequest, connection_id: ULID - ) -> JSONResponse: + # @validate(form=apispec_extras.PostTokenRequest) + # async def _post_token_endpoint( + # request: Request, body: apispec_extras.PostTokenRequest, connection_id: ULID + # ) -> JSONResponse: + async def _post_token_endpoint(request: Request, connection_id: ULID) -> JSONResponse: logger.warning(f"post_token_endpoint: connection_id = {str(connection_id)}") - logger.warning(f"post_token_endpoint: request body grant_type = {body.grant_type.value}") - logger.warning(f"post_token_endpoint: request body refresh_token = {body.refresh_token}") - - renku_tokens = apispec_extras.RenkuTokens.decode(body.refresh_token) - logger.warning(f"post_token_endpoint: renku_tokens = {renku_tokens}") - request.headers[self.authenticator.token_field] = renku_tokens.access_token - - access_token: str | None = None - try: - user = await self.authenticator.authenticate( - access_token=renku_tokens.access_token or "", request=request - ) - user = cast(base_models.APIUser, user) - if user.is_authenticated and user.access_token: - access_token = user.access_token - except Exception as err: - logger.error(f"Got authenticate error: {err.__class__}.") - raise - - logger.warning(f"post_token_endpoint: access_token = {access_token}") - - # TODO: - # 1. Decode the refresh_token value -> RenkuTokens - # 2. Validate the access_token -> if valid, send back the new OAuth 2.0 access token - # and the new encoded refresh_token - # 3. If access_token is expired, use the renku refresh_token -> if new tokens are valid, - # send back the new OAuth 2.0 access token and the new encoded refresh_token - - raise NotImplementedError("TODO: post_token_endpoint()") + logger.warning(f"post_token_endpoint: request headers = {list(request.headers.keys())}") + logger.warning(f"post_token_endpoint: request content-type = {request.headers.get("content-type")}") + logger.warning(f"post_token_endpoint: request body = {request.body!r}") + + @validate(form=apispec_extras.PostTokenRequest) + async def _inner( + request: Request, body: apispec_extras.PostTokenRequest, connection_id: ULID + ) -> JSONResponse: + logger.warning(f"post_token_endpoint: request body grant_type = {body.grant_type.value}") + logger.warning(f"post_token_endpoint: request body refresh_token = {body.refresh_token}") + + renku_tokens = apispec_extras.RenkuTokens.decode(body.refresh_token) + logger.warning(f"post_token_endpoint: renku_tokens = {renku_tokens}") + request.headers[self.authenticator.token_field] = renku_tokens.access_token + + access_token: str | None = None + try: + user = await self.authenticator.authenticate( + access_token=renku_tokens.access_token or "", request=request + ) + user = cast(base_models.APIUser, user) + if user.is_authenticated and user.access_token: + access_token = user.access_token + except Exception as err: + logger.error(f"Got authenticate error: {err.__class__}.") + raise + + logger.warning(f"post_token_endpoint: access_token = {access_token}") + + # TODO: + # 1. Decode the refresh_token value -> RenkuTokens + # 2. Validate the access_token -> if valid, send back the new OAuth 2.0 access token + # and the new encoded refresh_token + # 3. If access_token is expired, use the renku refresh_token -> if new tokens are valid, + # send back the new OAuth 2.0 access token and the new encoded refresh_token + + raise NotImplementedError("TODO: post_token_endpoint()") + + res = await _inner(request=request, connection_id=connection_id) # type: ignore + return res return "/oauth2/connections//token_endpoint", ["POST"], _post_token_endpoint From 1242786eceb08f329820893d72b3d9c62e75343b Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Fri, 16 Jan 2026 13:48:43 +0000 Subject: [PATCH 17/54] fix? --- components/renku_data_services/connected_services/blueprints.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/renku_data_services/connected_services/blueprints.py b/components/renku_data_services/connected_services/blueprints.py index a8b2fb730..b1de44129 100644 --- a/components/renku_data_services/connected_services/blueprints.py +++ b/components/renku_data_services/connected_services/blueprints.py @@ -293,7 +293,7 @@ async def _inner( raise NotImplementedError("TODO: post_token_endpoint()") - res = await _inner(request=request, connection_id=connection_id) # type: ignore + res = await _inner(request, connection_id=connection_id) # type: ignore return res return "/oauth2/connections//token_endpoint", ["POST"], _post_token_endpoint From dbcfff9172b2d89a1a2e9ffc5494d1dedd3e3462 Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Fri, 16 Jan 2026 14:08:45 +0000 Subject: [PATCH 18/54] fix? sanic quirks... --- .../renku_data_services/connected_services/apispec_extras.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/components/renku_data_services/connected_services/apispec_extras.py b/components/renku_data_services/connected_services/apispec_extras.py index 39112e754..7ce82accb 100644 --- a/components/renku_data_services/connected_services/apispec_extras.py +++ b/components/renku_data_services/connected_services/apispec_extras.py @@ -35,6 +35,8 @@ class RenkuTokens(BaseAPISpec): ) access_token: str refresh_token: str + client_id: str | None + client_secret: str | None def encode(self) -> str: """Encode the Renku tokens as a single URL-safe string.""" From f71ac6bc1f530b54027097ffabc01ba16fffcd23 Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Fri, 16 Jan 2026 14:29:06 +0000 Subject: [PATCH 19/54] try to return a valid response --- .../connected_services/apispec_extras.py | 13 ++++++++ .../connected_services/blueprints.py | 33 +++++++++++++++---- 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/components/renku_data_services/connected_services/apispec_extras.py b/components/renku_data_services/connected_services/apispec_extras.py index 7ce82accb..19c02680a 100644 --- a/components/renku_data_services/connected_services/apispec_extras.py +++ b/components/renku_data_services/connected_services/apispec_extras.py @@ -27,6 +27,19 @@ class PostTokenRequest(BaseAPISpec): refresh_token: str +class PostTokenResponse(BaseAPISpec): + """Response for a refresh token request.""" + + model_config = ConfigDict( + extra="forbid", + ) + access_token: str + token_type: str + expires_in: int + refresh_token: str + scope: str | None + + class RenkuTokens(BaseAPISpec): """Represents a set of authentication tokens used in Renku.""" diff --git a/components/renku_data_services/connected_services/blueprints.py b/components/renku_data_services/connected_services/blueprints.py index b1de44129..2e584f295 100644 --- a/components/renku_data_services/connected_services/blueprints.py +++ b/components/renku_data_services/connected_services/blueprints.py @@ -1,6 +1,8 @@ """Connected services blueprint.""" +import math from dataclasses import dataclass +from datetime import UTC, datetime from typing import Any, cast from urllib.parse import unquote, urlparse, urlunparse @@ -270,19 +272,38 @@ async def _inner( logger.warning(f"post_token_endpoint: renku_tokens = {renku_tokens}") request.headers[self.authenticator.token_field] = renku_tokens.access_token - access_token: str | None = None + user: base_models.APIUser | None = None try: - user = await self.authenticator.authenticate( + _user = await self.authenticator.authenticate( access_token=renku_tokens.access_token or "", request=request ) - user = cast(base_models.APIUser, user) - if user.is_authenticated and user.access_token: - access_token = user.access_token + _user = cast(base_models.APIUser, user) + if _user.is_authenticated and _user.access_token: + user = _user except Exception as err: logger.error(f"Got authenticate error: {err.__class__}.") raise - logger.warning(f"post_token_endpoint: access_token = {access_token}") + logger.warning(f"post_token_endpoint: user = {user}") + + if user is not None and user.is_authenticated: + client = await self.oauth_client_factory.for_user_connection_raise(user, connection_id) + oauth_token = await client.get_token() + access_token = oauth_token.access_token + if access_token is None: + raise errors.ProgrammingError(message="Unexpected error: access token not present.") + result: dict[str, str | int] = { + "access_token": access_token, + "token_type": str(oauth_token.get("token_type")) or "Bearer", + "refresh_token": renku_tokens.encode(), + } + if oauth_token.get("scope"): + result["scope"] = oauth_token["scope"] + if oauth_token.expires_at: + exp = datetime.fromtimestamp(oauth_token.expires_at, UTC) + expires_in = exp - datetime.now(UTC) + result["expires_in"] = math.ceil(expires_in.total_seconds()) + return validated_json(apispec_extras.PostTokenResponse, result) # TODO: # 1. Decode the refresh_token value -> RenkuTokens From 72a0bfebc098f2f165354fa97e1f6d9f500761fe Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Fri, 16 Jan 2026 14:42:47 +0000 Subject: [PATCH 20/54] fix, oops --- .../connected_services/apispec_extras.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/components/renku_data_services/connected_services/apispec_extras.py b/components/renku_data_services/connected_services/apispec_extras.py index 19c02680a..f961797d9 100644 --- a/components/renku_data_services/connected_services/apispec_extras.py +++ b/components/renku_data_services/connected_services/apispec_extras.py @@ -6,7 +6,7 @@ from enum import StrEnum from typing import Self -from pydantic import ConfigDict +from pydantic import ConfigDict, Field from renku_data_services.connected_services.apispec_base import BaseAPISpec @@ -25,6 +25,8 @@ class PostTokenRequest(BaseAPISpec): ) grant_type: PostTokenGrantType refresh_token: str + client_id: str | None = Field(None) + client_secret: str | None = Field(None) class PostTokenResponse(BaseAPISpec): @@ -48,8 +50,6 @@ class RenkuTokens(BaseAPISpec): ) access_token: str refresh_token: str - client_id: str | None - client_secret: str | None def encode(self) -> str: """Encode the Renku tokens as a single URL-safe string.""" From 83d4b0df545dc250a5a31789168bbd371a6138dd Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Fri, 16 Jan 2026 14:53:49 +0000 Subject: [PATCH 21/54] fix --- .../renku_data_services/connected_services/blueprints.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/components/renku_data_services/connected_services/blueprints.py b/components/renku_data_services/connected_services/blueprints.py index 2e584f295..cfa0f1b39 100644 --- a/components/renku_data_services/connected_services/blueprints.py +++ b/components/renku_data_services/connected_services/blueprints.py @@ -274,10 +274,12 @@ async def _inner( user: base_models.APIUser | None = None try: - _user = await self.authenticator.authenticate( - access_token=renku_tokens.access_token or "", request=request + _user = cast( + base_models.APIUser, + await self.authenticator.authenticate( + access_token=renku_tokens.access_token or "", request=request + ), ) - _user = cast(base_models.APIUser, user) if _user.is_authenticated and _user.access_token: user = _user except Exception as err: From 30b19178becf00c7135d2bb478b2c54eb7170332 Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Mon, 19 Jan 2026 10:12:20 +0000 Subject: [PATCH 22/54] wip: handle renku token refresh --- bases/renku_data_services/data_api/app.py | 1 + .../connected_services/blueprints.py | 136 ++++++++++-------- 2 files changed, 77 insertions(+), 60 deletions(-) diff --git a/bases/renku_data_services/data_api/app.py b/bases/renku_data_services/data_api/app.py index 8e2c8778d..22d55d9bc 100644 --- a/bases/renku_data_services/data_api/app.py +++ b/bases/renku_data_services/data_api/app.py @@ -185,6 +185,7 @@ def register_all_handlers(app: Sanic, dm: DependencyManager) -> Sanic: connected_services_repo=dm.connected_services_repo, oauth_client_factory=dm.oauth_http_client_factory, authenticator=dm.authenticator, + nb_config=dm.config.nb_config, ) repositories = RepositoriesBP( name="repositories", diff --git a/components/renku_data_services/connected_services/blueprints.py b/components/renku_data_services/connected_services/blueprints.py index cfa0f1b39..fbc543bbf 100644 --- a/components/renku_data_services/connected_services/blueprints.py +++ b/components/renku_data_services/connected_services/blueprints.py @@ -6,6 +6,7 @@ from typing import Any, cast from urllib.parse import unquote, urlparse, urlunparse +import httpx from sanic import HTTPResponse, Request, empty, json, redirect from sanic.response import JSONResponse from sanic_ext import validate @@ -28,6 +29,7 @@ OAuthHttpError, OAuthHttpFactoryError, ) +from renku_data_services.notebooks.config import NotebooksConfig logger = logging.getLogger(__name__) @@ -161,6 +163,7 @@ class OAuth2ConnectionsBP(CustomBlueprint): connected_services_repo: ConnectedServicesRepository oauth_client_factory: OAuthHttpClientFactory authenticator: base_models.Authenticator + nb_config: NotebooksConfig def get_all(self) -> BlueprintFactoryResponse: """List all OAuth2 connections.""" @@ -251,72 +254,85 @@ async def _get_installations( def post_token_endpoint(self) -> BlueprintFactoryResponse: """OAuth 2.0 token endpoint to support applications running in sessions.""" - # @validate(form=apispec_extras.PostTokenRequest) - # async def _post_token_endpoint( - # request: Request, body: apispec_extras.PostTokenRequest, connection_id: ULID - # ) -> JSONResponse: - async def _post_token_endpoint(request: Request, connection_id: ULID) -> JSONResponse: + @validate(form=apispec_extras.PostTokenRequest) + async def _post_token_endpoint( + request: Request, body: apispec_extras.PostTokenRequest, connection_id: ULID + ) -> JSONResponse: logger.warning(f"post_token_endpoint: connection_id = {str(connection_id)}") logger.warning(f"post_token_endpoint: request headers = {list(request.headers.keys())}") logger.warning(f"post_token_endpoint: request content-type = {request.headers.get("content-type")}") logger.warning(f"post_token_endpoint: request body = {request.body!r}") - @validate(form=apispec_extras.PostTokenRequest) - async def _inner( - request: Request, body: apispec_extras.PostTokenRequest, connection_id: ULID - ) -> JSONResponse: - logger.warning(f"post_token_endpoint: request body grant_type = {body.grant_type.value}") - logger.warning(f"post_token_endpoint: request body refresh_token = {body.refresh_token}") - - renku_tokens = apispec_extras.RenkuTokens.decode(body.refresh_token) - logger.warning(f"post_token_endpoint: renku_tokens = {renku_tokens}") - request.headers[self.authenticator.token_field] = renku_tokens.access_token - - user: base_models.APIUser | None = None - try: - _user = cast( - base_models.APIUser, - await self.authenticator.authenticate( - access_token=renku_tokens.access_token or "", request=request - ), + logger.warning(f"post_token_endpoint: request body grant_type = {body.grant_type.value}") + logger.warning(f"post_token_endpoint: request body refresh_token = {body.refresh_token}") + + renku_tokens = apispec_extras.RenkuTokens.decode(body.refresh_token) + logger.warning(f"post_token_endpoint: renku_tokens = {renku_tokens}") + request.headers[self.authenticator.token_field] = renku_tokens.access_token + + user: base_models.APIUser | None = None + try: + _user = cast( + base_models.APIUser, + await self.authenticator.authenticate( + access_token=renku_tokens.access_token or "", request=request + ), + ) + if _user.is_authenticated and _user.access_token: + user = _user + except Exception as err: + logger.error(f"Got authenticate error: {err.__class__}.") + raise + + logger.warning(f"post_token_endpoint: user = {user}") + + # Try to refresh the Renku access token + if user is None and renku_tokens.refresh_token: + renku_base_url = "https://" + self.nb_config.sessions.ingress.host + renku_base_url = renku_base_url.rstrip("/") + renku_realm = self.nb_config.keycloak_realm + renku_auth_token_uri = f"{renku_base_url}/auth/realms/{renku_realm}/protocol/openid-connect/token" + + async with httpx.AsyncClient(timeout=10) as http: + auth = ( + self.nb_config.sessions.git_proxy.renku_client_id, + self.nb_config.sessions.git_proxy.renku_client_secret, ) - if _user.is_authenticated and _user.access_token: - user = _user - except Exception as err: - logger.error(f"Got authenticate error: {err.__class__}.") - raise - - logger.warning(f"post_token_endpoint: user = {user}") - - if user is not None and user.is_authenticated: - client = await self.oauth_client_factory.for_user_connection_raise(user, connection_id) - oauth_token = await client.get_token() - access_token = oauth_token.access_token - if access_token is None: - raise errors.ProgrammingError(message="Unexpected error: access token not present.") - result: dict[str, str | int] = { - "access_token": access_token, - "token_type": str(oauth_token.get("token_type")) or "Bearer", - "refresh_token": renku_tokens.encode(), + payload = { + "grant_type": "refresh_token", + "refresh_token": renku_tokens.refresh_token, } - if oauth_token.get("scope"): - result["scope"] = oauth_token["scope"] - if oauth_token.expires_at: - exp = datetime.fromtimestamp(oauth_token.expires_at, UTC) - expires_in = exp - datetime.now(UTC) - result["expires_in"] = math.ceil(expires_in.total_seconds()) - return validated_json(apispec_extras.PostTokenResponse, result) - - # TODO: - # 1. Decode the refresh_token value -> RenkuTokens - # 2. Validate the access_token -> if valid, send back the new OAuth 2.0 access token - # and the new encoded refresh_token - # 3. If access_token is expired, use the renku refresh_token -> if new tokens are valid, - # send back the new OAuth 2.0 access token and the new encoded refresh_token - - raise NotImplementedError("TODO: post_token_endpoint()") - - res = await _inner(request, connection_id=connection_id) # type: ignore - return res + response = await http.post(renku_auth_token_uri, auth=auth, data=payload) + logger.warning(f"Get refresh response from Keycloak: {response}") + logger.warning(f"Get refresh response from Keycloak: {response.json()}") + pass + + if user is not None and user.is_authenticated: + client = await self.oauth_client_factory.for_user_connection_raise(user, connection_id) + oauth_token = await client.get_token() + access_token = oauth_token.access_token + if access_token is None: + raise errors.ProgrammingError(message="Unexpected error: access token not present.") + result: dict[str, str | int] = { + "access_token": access_token, + "token_type": str(oauth_token.get("token_type")) or "Bearer", + "refresh_token": renku_tokens.encode(), + } + if oauth_token.get("scope"): + result["scope"] = oauth_token["scope"] + if oauth_token.expires_at: + exp = datetime.fromtimestamp(oauth_token.expires_at, UTC) + expires_in = exp - datetime.now(UTC) + result["expires_in"] = math.ceil(expires_in.total_seconds()) + return validated_json(apispec_extras.PostTokenResponse, result) + + # TODO: + # 1. Decode the refresh_token value -> RenkuTokens + # 2. Validate the access_token -> if valid, send back the new OAuth 2.0 access token + # and the new encoded refresh_token + # 3. If access_token is expired, use the renku refresh_token -> if new tokens are valid, + # send back the new OAuth 2.0 access token and the new encoded refresh_token + + raise NotImplementedError("TODO: post_token_endpoint()") return "/oauth2/connections//token_endpoint", ["POST"], _post_token_endpoint From d1aa79651e5acd2276b4f2ddc569e57b08a5834d Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Mon, 19 Jan 2026 11:58:04 +0000 Subject: [PATCH 23/54] exp: use refresh token --- .../connected_services/apispec_extras.py | 1 + .../connected_services/blueprints.py | 47 +++++++++++++++++-- 2 files changed, 45 insertions(+), 3 deletions(-) diff --git a/components/renku_data_services/connected_services/apispec_extras.py b/components/renku_data_services/connected_services/apispec_extras.py index f961797d9..72d3d9760 100644 --- a/components/renku_data_services/connected_services/apispec_extras.py +++ b/components/renku_data_services/connected_services/apispec_extras.py @@ -39,6 +39,7 @@ class PostTokenResponse(BaseAPISpec): token_type: str expires_in: int refresh_token: str + refresh_expires_in: int | None = Field(None) scope: str | None diff --git a/components/renku_data_services/connected_services/blueprints.py b/components/renku_data_services/connected_services/blueprints.py index fbc543bbf..00c3d1d86 100644 --- a/components/renku_data_services/connected_services/blueprints.py +++ b/components/renku_data_services/connected_services/blueprints.py @@ -33,6 +33,18 @@ logger = logging.getLogger(__name__) +EXP: dict[str, str | int] = { + "access_token": "AAA", + "expires_in": 1800, + "refresh_expires_in": 86400, + "refresh_token": "BBB", + "token_type": "Bearer", + "id_token": "CCC", + "not-before-policy": 0, + "session_state": "6b46b29f-58e3-49d8-aada-bb8a2c2a258b", + "scope": "openid microprofile-jwt email profile", +} + @dataclass(kw_only=True) class OAuth2ClientsBP(CustomBlueprint): @@ -302,10 +314,39 @@ async def _post_token_endpoint( "grant_type": "refresh_token", "refresh_token": renku_tokens.refresh_token, } - response = await http.post(renku_auth_token_uri, auth=auth, data=payload) + response = await http.post(renku_auth_token_uri, auth=auth, data=payload, follow_redirects=True) logger.warning(f"Get refresh response from Keycloak: {response}") logger.warning(f"Get refresh response from Keycloak: {response.json()}") - pass + if 200 <= response.status_code < 300: + try: + parsed_response = apispec_extras.PostTokenResponse.model_validate_json(response.content) + except Exception as err: + logger.error(f"Failed to parse refreshed Renku tokens: {err.__class__}.") + raise + try: + renku_tokens.access_token = parsed_response.access_token + renku_tokens.refresh_token = parsed_response.refresh_token + request.headers[self.authenticator.token_field] = renku_tokens.access_token + _user = cast( + base_models.APIUser, + await self.authenticator.authenticate( + access_token=renku_tokens.access_token or "", request=request + ), + ) + if _user.is_authenticated and _user.access_token: + user = _user + except Exception as err: + logger.error(f"Got authenticate error: {err.__class__}.") + raise + else: + # Handle bad response: Get refresh response from Keycloak: + # Get refresh response from Keycloak: + # {'error': 'invalid_grant', 'error_description': 'Invalid refresh token'} + logger.error( + f"Got error from refreshing Renku tokens: HTTP {response.status_code}; {response.json()}." + ) + raise errors.UnauthorizedError() + logger.warning(f"post_token_endpoint: user = {user}") if user is not None and user.is_authenticated: client = await self.oauth_client_factory.for_user_connection_raise(user, connection_id) @@ -333,6 +374,6 @@ async def _post_token_endpoint( # 3. If access_token is expired, use the renku refresh_token -> if new tokens are valid, # send back the new OAuth 2.0 access token and the new encoded refresh_token - raise NotImplementedError("TODO: post_token_endpoint()") + raise errors.UnauthorizedError() return "/oauth2/connections//token_endpoint", ["POST"], _post_token_endpoint From 4f7ddddedcda6027dad78da13b2ea6ff66e51cb5 Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Mon, 19 Jan 2026 12:12:14 +0000 Subject: [PATCH 24/54] fix --- .../renku_data_services/connected_services/apispec_extras.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/renku_data_services/connected_services/apispec_extras.py b/components/renku_data_services/connected_services/apispec_extras.py index 72d3d9760..252e650c8 100644 --- a/components/renku_data_services/connected_services/apispec_extras.py +++ b/components/renku_data_services/connected_services/apispec_extras.py @@ -33,7 +33,7 @@ class PostTokenResponse(BaseAPISpec): """Response for a refresh token request.""" model_config = ConfigDict( - extra="forbid", + extra="allow", ) access_token: str token_type: str From 51cd1b0866cf5193781001590eef09d6199aed85 Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Mon, 19 Jan 2026 12:25:12 +0000 Subject: [PATCH 25/54] cleanup --- .../connected_services/blueprints.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/components/renku_data_services/connected_services/blueprints.py b/components/renku_data_services/connected_services/blueprints.py index 00c3d1d86..d6fe14d2f 100644 --- a/components/renku_data_services/connected_services/blueprints.py +++ b/components/renku_data_services/connected_services/blueprints.py @@ -33,18 +33,6 @@ logger = logging.getLogger(__name__) -EXP: dict[str, str | int] = { - "access_token": "AAA", - "expires_in": 1800, - "refresh_expires_in": 86400, - "refresh_token": "BBB", - "token_type": "Bearer", - "id_token": "CCC", - "not-before-policy": 0, - "session_state": "6b46b29f-58e3-49d8-aada-bb8a2c2a258b", - "scope": "openid microprofile-jwt email profile", -} - @dataclass(kw_only=True) class OAuth2ClientsBP(CustomBlueprint): @@ -362,6 +350,8 @@ async def _post_token_endpoint( if oauth_token.get("scope"): result["scope"] = oauth_token["scope"] if oauth_token.expires_at: + # TODO: handle if parsed_response.refresh_expires_in < expires_in + # This should be rare, but we should use the lowest value to be safe. exp = datetime.fromtimestamp(oauth_token.expires_at, UTC) expires_in = exp - datetime.now(UTC) result["expires_in"] = math.ceil(expires_in.total_seconds()) From c2888a3f6015c541b5b7ba7e80a6be94a74ee304 Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Mon, 19 Jan 2026 12:33:35 +0000 Subject: [PATCH 26/54] try: refresh method --- .../notebooks/core_sessions.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/components/renku_data_services/notebooks/core_sessions.py b/components/renku_data_services/notebooks/core_sessions.py index 3319de207..dd10129bb 100644 --- a/components/renku_data_services/notebooks/core_sessions.py +++ b/components/renku_data_services/notebooks/core_sessions.py @@ -21,6 +21,7 @@ from renku_data_services.app_config import logging from renku_data_services.base_models import RESET, AnonymousAPIUser, APIUser, AuthenticatedAPIUser, ResetType from renku_data_services.base_models.metrics import MetricsService +from renku_data_services.connected_services.apispec_extras import RenkuTokens from renku_data_services.connected_services.db import ConnectedServicesRepository from renku_data_services.connected_services.models import OAuth2TokenSet, ProviderKind from renku_data_services.connected_services.oauth_http import ( @@ -312,11 +313,19 @@ async def get_data_sources( logger.warning(f"Adjusting rclone configuration for DC {str(dc.data_connector.id)}.") configuration["scope"] = configuration.get("drive") or "drive" token_config = { - "access_token": token_set.access_token, + # "access_token": token_set.access_token, + "access_token": "fake_one", "token_type": "Bearer", } - if token_set.expires_at_iso: - token_config["expiry"] = token_set.expires_at_iso + if user.access_token and user.refresh_token: + renku_tokens = RenkuTokens( + access_token=user.access_token, + refresh_token=user.refresh_token, + ) + token_config["refresh_token"] = renku_tokens.encode() + token_config["expiry"] = "2026-01-01T14:19:16.114854+01:00" + # if token_set.expires_at_iso: + # token_config["expiry"] = token_set.expires_at_iso configuration["token"] = json.dumps(token_config) mount_folder = ( dc.data_connector.storage.target_path From 22f39b0366400a254f45f28117802bddc8b15fe4 Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Mon, 19 Jan 2026 12:59:45 +0000 Subject: [PATCH 27/54] fix token_url --- components/renku_data_services/notebooks/core_sessions.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/components/renku_data_services/notebooks/core_sessions.py b/components/renku_data_services/notebooks/core_sessions.py index dd10129bb..12c07f197 100644 --- a/components/renku_data_services/notebooks/core_sessions.py +++ b/components/renku_data_services/notebooks/core_sessions.py @@ -262,6 +262,7 @@ async def __get_gitlab_image_pull_secret( async def get_data_sources( + request: Request, nb_config: NotebooksConfig, user: AnonymousAPIUser | AuthenticatedAPIUser, server_name: str, @@ -327,6 +328,9 @@ async def get_data_sources( # if token_set.expires_at_iso: # token_config["expiry"] = token_set.expires_at_iso configuration["token"] = json.dumps(token_config) + configuration["token_url"] = request.url_for( + "oauth2_connections.post_token_endpoint", connection_id=drive_connection.id + ) mount_folder = ( dc.data_connector.storage.target_path if PurePosixPath(dc.data_connector.storage.target_path).is_absolute() @@ -871,6 +875,7 @@ async def start_session( # Data connectors session_extras = session_extras.concat( await get_data_sources( + request=request, nb_config=nb_config, server_name=server_name, user=user, From 2634b6da5ec943cafe1c9b22cdef15e459ce56bc Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Wed, 21 Jan 2026 11:53:02 +0000 Subject: [PATCH 28/54] wip: handle dropbox --- .../connected_services/api.spec.yaml | 7 +- .../connected_services/apispec.py | 7 +- .../connected_services/models.py | 6 +- .../connected_services/provider_adapters.py | 64 ++++++++++--------- .../versions/fddfe7960a8b_squash_me.py | 23 +++++++ 5 files changed, 68 insertions(+), 39 deletions(-) create mode 100644 components/renku_data_services/migrations/versions/fddfe7960a8b_squash_me.py diff --git a/components/renku_data_services/connected_services/api.spec.yaml b/components/renku_data_services/connected_services/api.spec.yaml index 59b88afa0..0a996e7ad 100644 --- a/components/renku_data_services/connected_services/api.spec.yaml +++ b/components/renku_data_services/connected_services/api.spec.yaml @@ -426,13 +426,14 @@ components: ProviderKind: type: string enum: - - "gitlab" + - "dropbox" + - "generic_oidc" - "github" + - "gitlab" - "google" # - "drive" # - "onedrive" - # - "dropbox" - - "generic_oidc" + # - "dopbox" example: "gitlab" ApplicationSlug: description: | diff --git a/components/renku_data_services/connected_services/apispec.py b/components/renku_data_services/connected_services/apispec.py index 9f26b8751..7e187633a 100644 --- a/components/renku_data_services/connected_services/apispec.py +++ b/components/renku_data_services/connected_services/apispec.py @@ -1,6 +1,6 @@ # generated by datamodel-codegen: # filename: api.spec.yaml -# timestamp: 2026-01-14T14:25:25+00:00 +# timestamp: 2026-01-21T11:52:43+00:00 from __future__ import annotations @@ -29,10 +29,11 @@ class AppInstallation(BaseAPISpec): class ProviderKind(Enum): - gitlab = "gitlab" + dropbox = "dropbox" + generic_oidc = "generic_oidc" github = "github" + gitlab = "gitlab" google = "google" - generic_oidc = "generic_oidc" class ConnectionStatus(Enum): diff --git a/components/renku_data_services/connected_services/models.py b/components/renku_data_services/connected_services/models.py index 51ddc0ef0..044a08df7 100644 --- a/components/renku_data_services/connected_services/models.py +++ b/components/renku_data_services/connected_services/models.py @@ -13,13 +13,13 @@ class ProviderKind(StrEnum): """The kind of platform we connnect to.""" - gitlab = "gitlab" + dropbox = "dropbox" + generic_oidc = "generic_oidc" github = "github" + gitlab = "gitlab" google = "google" # drive = "drive" # onedrive = "onedrive" - # dropbox = "dropbox" - generic_oidc = "generic_oidc" class ConnectionStatus(StrEnum): diff --git a/components/renku_data_services/connected_services/provider_adapters.py b/components/renku_data_services/connected_services/provider_adapters.py index 8824760fa..8feb8ec00 100644 --- a/components/renku_data_services/connected_services/provider_adapters.py +++ b/components/renku_data_services/connected_services/provider_adapters.py @@ -11,6 +11,8 @@ from renku_data_services.connected_services import external_models, models from renku_data_services.connected_services import orm as schemas +logger = logging.getLogger(__name__) + class ProviderAdapter(ABC): """Defines the functionality of OAuth2 client adapters.""" @@ -234,43 +236,45 @@ def api_validate_account_response(self, response: Response) -> models.ConnectedA # return external_models.OneDriveConnectedAccount.model_validate(response.json()).to_connected_account() -# class DropboxAdapter(ProviderAdapter): -# """Adapter for Dropbox OAuth2 clients.""" +class DropboxAdapter(ProviderAdapter): + """Adapter for Dropbox OAuth2 clients.""" -# user_info_endpoint = "userinfo" -# user_info_method = "POST" + user_info_endpoint = "userinfo" + user_info_method = "POST" -# @property -# def authorization_url(self) -> str: -# """The authorization URL for the OAuth2 protocol.""" -# return "https://www.dropbox.com/oauth2/authorize" + @property + def authorization_url(self) -> str: + """The authorization URL for the OAuth2 protocol.""" + return "https://www.dropbox.com/oauth2/authorize" -# @property -# def authorization_url_extra_params(self) -> dict[str, str]: -# """Extra parameters to add to the auth url.""" -# return {"access_type": "offline"} + @property + def authorization_url_extra_params(self) -> dict[str, str]: + """Extra parameters to add to the auth url.""" + return {"access_type": "offline"} -# @property -# def token_endpoint_url(self) -> str: -# """The token endpoint URL for the OAuth2 protocol.""" -# return "https://api.dropboxapi.com/oauth2/token" + @property + def token_endpoint_url(self) -> str: + """The token endpoint URL for the OAuth2 protocol.""" + return "https://api.dropboxapi.com/oauth2/token" -# @property -# def api_url(self) -> str: -# """The URL used for API calls on the Resource Server.""" -# return "https://api.dropboxapi.com/2/openid/" + @property + def api_url(self) -> str: + """The URL used for API calls on the Resource Server.""" + return "https://api.dropboxapi.com/2/openid/" -# @property -# def api_common_headers(self) -> dict[str, str] | None: -# """The HTTP headers used for API calls on the Resource Server.""" -# return { -# "Accept": "application/json", -# "Content-Type": "application/json", -# } + @property + def api_common_headers(self) -> dict[str, str] | None: + """The HTTP headers used for API calls on the Resource Server.""" + return { + "Accept": "application/json", + "Content-Type": "application/json", + } -# def api_validate_account_response(self, response: Response) -> models.ConnectedAccount: -# """Validates and returns the connected account response from the Resource Server.""" -# return external_models.DropboxConnectedAccount.model_validate(response.json()).to_connected_account() + def api_validate_account_response(self, response: Response) -> models.ConnectedAccount: + """Validates and returns the connected account response from the Resource Server.""" + logger.getChild(self.__class__.__name__).warning(f"Account response: {response.json()}") + raise NotImplementedError() + # return external_models.DropboxConnectedAccount.model_validate(response.json()).to_connected_account() class GenericOidcAdapter(ProviderAdapter): diff --git a/components/renku_data_services/migrations/versions/fddfe7960a8b_squash_me.py b/components/renku_data_services/migrations/versions/fddfe7960a8b_squash_me.py new file mode 100644 index 000000000..147ac40fc --- /dev/null +++ b/components/renku_data_services/migrations/versions/fddfe7960a8b_squash_me.py @@ -0,0 +1,23 @@ +"""squash me + +Revision ID: fddfe7960a8b +Revises: 58ad5426c2f3 +Create Date: 2026-01-21 11:52:05.169734 + +""" + +from alembic import op + +# revision identifiers, used by Alembic. +revision = "fddfe7960a8b" +down_revision = "58ad5426c2f3" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.execute("ALTER TYPE providerkind ADD VALUE 'dropbox'") + + +def downgrade() -> None: + pass From 47ea52a15471c90a9fdcd644125d787ff2a2405e Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Wed, 21 Jan 2026 12:27:40 +0000 Subject: [PATCH 29/54] fix get_provider_adapter --- .../connected_services/provider_adapters.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/components/renku_data_services/connected_services/provider_adapters.py b/components/renku_data_services/connected_services/provider_adapters.py index 8feb8ec00..d6ed74687 100644 --- a/components/renku_data_services/connected_services/provider_adapters.py +++ b/components/renku_data_services/connected_services/provider_adapters.py @@ -357,13 +357,13 @@ def __get_httpx_client(cls) -> Client: # TODO: test that this map is exhaustive _adapter_map: dict[models.ProviderKind, type[ProviderAdapter]] = { - models.ProviderKind.gitlab: GitLabAdapter, + models.ProviderKind.dropbox: DropboxAdapter, + models.ProviderKind.generic_oidc: GenericOidcAdapter, models.ProviderKind.github: GitHubAdapter, + models.ProviderKind.gitlab: GitLabAdapter, models.ProviderKind.google: GoogleAdapter, # models.ProviderKind.drive: GoogleDriveAdapter, # models.ProviderKind.onedrive: OneDriveAdapter, - # models.ProviderKind.dropbox: DropboxAdapter, - models.ProviderKind.generic_oidc: GenericOidcAdapter, } From 81cc5f6a8e319825d0a635e684ff76ccef11cced Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Wed, 21 Jan 2026 12:54:45 +0000 Subject: [PATCH 30/54] fix token_access_type --- .../renku_data_services/connected_services/provider_adapters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/renku_data_services/connected_services/provider_adapters.py b/components/renku_data_services/connected_services/provider_adapters.py index d6ed74687..d107202e6 100644 --- a/components/renku_data_services/connected_services/provider_adapters.py +++ b/components/renku_data_services/connected_services/provider_adapters.py @@ -250,7 +250,7 @@ def authorization_url(self) -> str: @property def authorization_url_extra_params(self) -> dict[str, str]: """Extra parameters to add to the auth url.""" - return {"access_type": "offline"} + return {"token_access_type": "offline"} @property def token_endpoint_url(self) -> str: From 2431ff138e307bbdfe0f47a51969fed2607f28e8 Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Wed, 21 Jan 2026 13:09:10 +0000 Subject: [PATCH 31/54] fix post --- .../connected_services/oauth_http.py | 11 +++++++++-- .../connected_services/oauth_test_script.py | 2 +- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/components/renku_data_services/connected_services/oauth_http.py b/components/renku_data_services/connected_services/oauth_http.py index 436306868..60f1226ee 100644 --- a/components/renku_data_services/connected_services/oauth_http.py +++ b/components/renku_data_services/connected_services/oauth_http.py @@ -41,7 +41,7 @@ from renku_data_services.users.db import APIUser from renku_data_services.utils import cryptography as crypt -logger = logging.getLogger(__file__) +logger = logging.getLogger(__name__) class OAuthHttpFactoryError(StrEnum): @@ -274,7 +274,14 @@ async def get_connected_account(self) -> OAuthHttpError | models.ConnectedAccoun request_url = urljoin(self.adapter.api_url, self.adapter.user_info_endpoint) try: if self.adapter.user_info_method == "POST": - response = await self._delegate.post(request_url, headers=self.adapter.api_common_headers) + # NOTE: we need to remove "Content-Type" from the headers (empty post) + headers: dict[str, str] | None = None + if self.adapter.api_common_headers: + headers = dict() + for key, value in self.adapter.api_common_headers.items(): + if key.lower() != "content-type": + headers[key] = value + response = await self._delegate.post(request_url, headers=headers) else: response = await self.get(request_url, headers=self.adapter.api_common_headers) except OAuthError as e: diff --git a/test/components/renku_data_services/connected_services/oauth_test_script.py b/test/components/renku_data_services/connected_services/oauth_test_script.py index ef29f62b7..336341068 100644 --- a/test/components/renku_data_services/connected_services/oauth_test_script.py +++ b/test/components/renku_data_services/connected_services/oauth_test_script.py @@ -66,7 +66,7 @@ ### --------------------------------------------------------------------- deps = DependencyManager.from_env() -logger = logging.getLogger(__file__) +logger = logging.getLogger(__name__) factory = DefaultOAuthHttpClientFactory(deps.config.secrets.encryption_key, deps.config.db.async_session_maker) From bfdf53c52b4fb39e7d496ae7a816bd0b37fe39ee Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Wed, 21 Jan 2026 13:38:21 +0000 Subject: [PATCH 32/54] handle dropbox data connectors --- .../connected_services/external_models.py | 16 ++++++---------- .../connected_services/provider_adapters.py | 9 +-------- .../notebooks/core_sessions.py | 2 +- 3 files changed, 8 insertions(+), 19 deletions(-) diff --git a/components/renku_data_services/connected_services/external_models.py b/components/renku_data_services/connected_services/external_models.py index 83c89ec85..0a238ac99 100644 --- a/components/renku_data_services/connected_services/external_models.py +++ b/components/renku_data_services/connected_services/external_models.py @@ -98,18 +98,14 @@ def to_connected_account(self) -> models.ConnectedAccount: # ) -# class DropboxConnectedAccount(BaseModel): -# """OAuth2 connected account model for dropbox.""" +class DropboxConnectedAccount(BaseModel): + """OAuth2 connected account model for dropbox.""" -# family_name: str | None -# given_name: str | None -# email: str + email: str -# def to_connected_account(self) -> models.ConnectedAccount: -# """Returns the corresponding ConnectedAccount object.""" -# return models.ConnectedAccount( -# username=" ".join(filter(None, [self.given_name, self.family_name])), web_url=f"mailto:{self.email}" -# ) + def to_connected_account(self) -> models.ConnectedAccount: + """Returns the corresponding ConnectedAccount object.""" + return models.ConnectedAccount(username=self.email, web_url="") class GenericOIDCConnectedAccount(BaseModel): diff --git a/components/renku_data_services/connected_services/provider_adapters.py b/components/renku_data_services/connected_services/provider_adapters.py index d107202e6..6ef408120 100644 --- a/components/renku_data_services/connected_services/provider_adapters.py +++ b/components/renku_data_services/connected_services/provider_adapters.py @@ -7,12 +7,9 @@ from httpx import Client, Response from renku_data_services import errors -from renku_data_services.app_config import logging from renku_data_services.connected_services import external_models, models from renku_data_services.connected_services import orm as schemas -logger = logging.getLogger(__name__) - class ProviderAdapter(ABC): """Defines the functionality of OAuth2 client adapters.""" @@ -155,8 +152,6 @@ def api_common_headers(self) -> dict[str, str] | None: def api_validate_account_response(self, response: Response) -> models.ConnectedAccount: """Validates and returns the connected account response from the Resource Server.""" - logger = logging.getLogger(self.__class__.__name__) - logger.warning(f"Account response: {response.json()}") return external_models.GoogleConnectedAccount.model_validate(response.json()).to_connected_account() @@ -272,9 +267,7 @@ def api_common_headers(self) -> dict[str, str] | None: def api_validate_account_response(self, response: Response) -> models.ConnectedAccount: """Validates and returns the connected account response from the Resource Server.""" - logger.getChild(self.__class__.__name__).warning(f"Account response: {response.json()}") - raise NotImplementedError() - # return external_models.DropboxConnectedAccount.model_validate(response.json()).to_connected_account() + return external_models.DropboxConnectedAccount.model_validate(response.json()).to_connected_account() class GenericOidcAdapter(ProviderAdapter): diff --git a/components/renku_data_services/notebooks/core_sessions.py b/components/renku_data_services/notebooks/core_sessions.py index 12c07f197..461291a25 100644 --- a/components/renku_data_services/notebooks/core_sessions.py +++ b/components/renku_data_services/notebooks/core_sessions.py @@ -284,7 +284,7 @@ async def get_data_sources( async for dc in data_connectors_stream: configuration = dc.data_connector.storage.configuration - if dc.data_connector.storage.configuration["type"] == "drive": + if dc.data_connector.storage.configuration["type"] in ["drive", "dropbox"]: # TODO: move some logic to the repo, see how it is done for images providers = await connected_services_repo.get_oauth2_clients(user=user) drive_provider = next(filter(lambda p: p.kind == ProviderKind.google, providers), None) From 91ded2ee747e35bcb548906712bd30abfb138ccf Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Wed, 21 Jan 2026 13:55:51 +0000 Subject: [PATCH 33/54] quick & dirty fix --- .../renku_data_services/notebooks/core_sessions.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/components/renku_data_services/notebooks/core_sessions.py b/components/renku_data_services/notebooks/core_sessions.py index 461291a25..52df801d9 100644 --- a/components/renku_data_services/notebooks/core_sessions.py +++ b/components/renku_data_services/notebooks/core_sessions.py @@ -286,8 +286,13 @@ async def get_data_sources( configuration = dc.data_connector.storage.configuration if dc.data_connector.storage.configuration["type"] in ["drive", "dropbox"]: # TODO: move some logic to the repo, see how it is done for images + provider_kind = ( + ProviderKind.google + if dc.data_connector.storage.configuration["type"] == "drive" + else ProviderKind.dropbox + ) providers = await connected_services_repo.get_oauth2_clients(user=user) - drive_provider = next(filter(lambda p: p.kind == ProviderKind.google, providers), None) + drive_provider = next(filter(lambda p: p.kind == provider_kind, providers), None) connections = await connected_services_repo.get_oauth2_connections(user=user) drive_connection = next( filter(lambda c: drive_provider is not None and c.provider_id == drive_provider.id, connections), None @@ -312,7 +317,8 @@ async def get_data_sources( ) continue logger.warning(f"Adjusting rclone configuration for DC {str(dc.data_connector.id)}.") - configuration["scope"] = configuration.get("drive") or "drive" + if provider_kind == ProviderKind.google: + configuration["scope"] = configuration.get("drive") or "drive" token_config = { # "access_token": token_set.access_token, "access_token": "fake_one", From d4ff99df19fc86b3c32f59908ef577c800d7e8fd Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Thu, 22 Jan 2026 10:18:27 +0000 Subject: [PATCH 34/54] add test_provider_adapters.py --- .../test_provider_adapters.py | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 test/components/renku_data_services/connected_services/test_provider_adapters.py diff --git a/test/components/renku_data_services/connected_services/test_provider_adapters.py b/test/components/renku_data_services/connected_services/test_provider_adapters.py new file mode 100644 index 000000000..fa55250d4 --- /dev/null +++ b/test/components/renku_data_services/connected_services/test_provider_adapters.py @@ -0,0 +1,34 @@ +"""Tests for provider adapters.""" + +from datetime import UTC, datetime + +import pytest +from ulid import ULID + +from renku_data_services.connected_services import models +from renku_data_services.connected_services import orm as schemas +from renku_data_services.connected_services.provider_adapters import get_provider_adapter + + +@pytest.mark.parametrize("provider_kind", list(models.ProviderKind)) +def test_get_provider_adapter_maps_all_providers(provider_kind: models.ProviderKind) -> None: + client = schemas.OAuth2ClientORM( + id=ULID(), + client_id=f"c-{provider_kind.value}", + display_name=provider_kind.value, + created_by_id="", + kind=provider_kind, + scope="", + url="https://dev.renku.ch", + use_pkce=False, + app_slug="", + client_secret=None, + creation_date=datetime.now(UTC), + updated_at=datetime.now(UTC), + image_registry_url=None, + oidc_issuer_url=None, + ) + + adapter = get_provider_adapter(client) + + assert adapter is not None From d3261fcf9f5bb76dc6e1b348afd714b4c057ebbe Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Thu, 22 Jan 2026 13:31:21 +0000 Subject: [PATCH 35/54] refactor: cleanup handling code --- bases/renku_data_services/data_api/app.py | 1 + .../data_api/dependencies.py | 8 ++ .../connected_services/db.py | 58 +++++++++-- .../connected_services/models.py | 21 ++-- .../notebooks/blueprints.py | 3 + .../notebooks/core_sessions.py | 77 ++------------- .../notebooks/data_sources.py | 99 +++++++++++++++++++ 7 files changed, 182 insertions(+), 85 deletions(-) create mode 100644 components/renku_data_services/notebooks/data_sources.py diff --git a/bases/renku_data_services/data_api/app.py b/bases/renku_data_services/data_api/app.py index 22d55d9bc..f4645abbc 100644 --- a/bases/renku_data_services/data_api/app.py +++ b/bases/renku_data_services/data_api/app.py @@ -214,6 +214,7 @@ def register_all_handlers(app: Sanic, dm: DependencyManager) -> Sanic: data_connector_repo=dm.data_connector_repo, data_connector_secret_repo=dm.data_connector_secret_repo, git_provider_helper=dm.git_provider_helper, + data_source_repo=dm.data_source_repo, image_check_repo=dm.image_check_repo, internal_gitlab_authenticator=dm.gitlab_authenticator, metrics=dm.metrics, diff --git a/bases/renku_data_services/data_api/dependencies.py b/bases/renku_data_services/data_api/dependencies.py index 2c27d7f57..f7221b4ef 100644 --- a/bases/renku_data_services/data_api/dependencies.py +++ b/bases/renku_data_services/data_api/dependencies.py @@ -54,6 +54,7 @@ from renku_data_services.notebooks.api.classes.data_service import DummyGitProviderHelper, GitProviderHelper from renku_data_services.notebooks.config import GitProviderHelperProto, get_clusters from renku_data_services.notebooks.constants import AMALTHEA_SESSION_GVK, JUPYTER_SESSION_GVK +from renku_data_services.notebooks.data_sources import DataSourceRepository from renku_data_services.notebooks.image_check import ImageCheckRepository from renku_data_services.notifications.db import NotificationsRepository from renku_data_services.platform.db import PlatformRepository, UrlRedirectRepository @@ -141,6 +142,7 @@ class DependencyManager: data_connector_repo: DataConnectorRepository data_connector_secret_repo: DataConnectorSecretRepository cluster_repo: ClusterRepository + data_source_repo: DataSourceRepository image_check_repo: ImageCheckRepository metrics_repo: MetricsRepository metrics: StagingMetricsService @@ -392,6 +394,11 @@ def from_env(cls) -> DependencyManager: secret_service_public_key=config.secrets.public_key, authz=authz, ) + data_source_repo = DataSourceRepository( + nb_config=config.nb_config, + connected_services_repo=connected_services_repo, + oauth_client_factory=oauth_http_client_factory, + ) image_check_repo = ImageCheckRepository( nb_config=config.nb_config, connected_services_repo=connected_services_repo, @@ -439,6 +446,7 @@ def from_env(cls) -> DependencyManager: data_connector_repo=data_connector_repo, data_connector_secret_repo=data_connector_secret_repo, cluster_repo=cluster_repo, + data_source_repo=data_source_repo, image_check_repo=image_check_repo, metrics_repo=metrics_repo, metrics=metrics, diff --git a/components/renku_data_services/connected_services/db.py b/components/renku_data_services/connected_services/db.py index bfb773f0d..2c782954b 100644 --- a/components/renku_data_services/connected_services/db.py +++ b/components/renku_data_services/connected_services/db.py @@ -241,7 +241,7 @@ async def get_oauth2_connection(self, connection_id: ULID, user: base_models.API return connection async def get_provider_for_image(self, user: APIUser, image: Image) -> models.ImageProvider | None: - """Find a provider supporting the given an image.""" + """Find a provider supporting the given image.""" registry_urls = [f"http://{image.hostname}", f"https://{image.hostname}"] async with self.session_maker() as session: stmt = ( @@ -273,6 +273,50 @@ async def get_provider_for_image(self, user: APIUser, image: Image) -> models.Im str(row.OAuth2ClientORM.image_registry_url), # above query makes it non-nil ) + async def get_provider_for_kind( + self, user: APIUser, provider_kind: models.ProviderKind + ) -> models.ServiceProvider | None: + """Find a service provider of a given kind.""" + async with self.session_maker() as session: + # First, match an established connection if it exists + stmt = ( + select(schemas.OAuth2ConnectionORM) + .join(schemas.OAuth2ClientORM) + .where(schemas.OAuth2ConnectionORM.user_id == user.id) + .where(schemas.OAuth2ConnectionORM.status == models.ConnectionStatus.connected.value) + .where(schemas.OAuth2ClientORM.kind == provider_kind.value) + .limit(1) + ) + res = await session.scalars(stmt) + connection = res.one_or_none() + if connection is not None: + return models.ServiceProvider( + provider=connection.client.dump(), + connected_user=models.ConnectedUser(connection=connection.dump(), user=user), + ) + # Otherwise, match the first suitable provider + provider_stmt = ( + select(schemas.OAuth2ClientORM).where(schemas.OAuth2ClientORM.kind == provider_kind.value).limit(1) + ) + provider_res = await session.scalars(provider_stmt) + provider = provider_res.one_or_none() + if provider is not None: + return models.ServiceProvider( + provider=provider.dump(), + connected_user=None, + ) + return None + + async def get_token_set(self, user: APIUser, connection_id: ULID) -> models.OAuth2TokenSet | None: + """Returns the token set from a given OAuth2 connection.""" + client_or_error = await self.oauth_client_factory.for_user_connection(user=user, connection_id=connection_id) + match client_or_error: + case OAuthHttpFactoryError() as err: + logger.info(f"Error getting oauth client for user={user} connection={connection_id}: {err}") + return None + case client: + return await client.get_token() + async def get_image_repo_client(self, image_provider: models.ImageProvider) -> ImageRepoDockerAPI: """Create a image repository client for the given user and image provider.""" url = urlparse(image_provider.registry_url) @@ -282,15 +326,9 @@ async def get_image_repo_client(self, image_provider: models.ImageProvider) -> I user = image_provider.connected_user.user conn = image_provider.connected_user.connection access_token: str | None = None - client_or_error = await self.oauth_client_factory.for_user_connection(user, conn.id) - match client_or_error: - case OAuthHttpFactoryError() as err: - logger.info(f"Error getting oauth client for user={user} connection={conn.id}: {err}") - - case client: - token_set = await client.get_token() - access_token = token_set.access_token - + token_set = await self.get_token_set(user=user, connection_id=conn.id) + if token_set is not None: + access_token = token_set.access_token if access_token: logger.debug(f"Use connection {conn.id} to {image_provider.provider.id} for user {user.id}") repo_api = repo_api.with_oauth2_token(access_token) diff --git a/components/renku_data_services/connected_services/models.py b/components/renku_data_services/connected_services/models.py index 044a08df7..aedaacd38 100644 --- a/components/renku_data_services/connected_services/models.py +++ b/components/renku_data_services/connected_services/models.py @@ -172,12 +172,11 @@ def is_connected(self) -> bool: @dataclass(frozen=True, eq=True) -class ImageProvider: - """Result when retrieving provider information for an image.""" +class ServiceProvider: + """Result when retrieving provider information for a connected service.""" provider: OAuth2Client connected_user: ConnectedUser | None - registry_url: str def is_connected(self) -> bool: """Returns whether the connection exists and is in status 'connected'.""" @@ -186,10 +185,18 @@ def is_connected(self) -> bool: @property def connection(self) -> OAuth2Connection | None: """Return the connection if present.""" - if self.connected_user: - return self.connected_user.connection - else: - return None + return self.connected_user.connection if self.connected_user else None + + def __str__(self) -> str: + conn = f"connection={self.connection.id}" if self.connection else "connection=None" + return f"ServiceProvider(provider={self.provider.id}/{self.provider.kind}, {conn})" + + +@dataclass(frozen=True, eq=True) +class ImageProvider(ServiceProvider): + """Result when retrieving provider information for an image.""" + + registry_url: str def __str__(self) -> str: conn = f"connection={self.connection.id}" if self.connection else "connection=None" diff --git a/components/renku_data_services/notebooks/blueprints.py b/components/renku_data_services/notebooks/blueprints.py index b9676a8f4..cf04d3e13 100644 --- a/components/renku_data_services/notebooks/blueprints.py +++ b/components/renku_data_services/notebooks/blueprints.py @@ -30,6 +30,7 @@ start_session, validate_session_post_request, ) +from renku_data_services.notebooks.data_sources import DataSourceRepository from renku_data_services.notebooks.errors.intermittent import AnonymousUserPatchError from renku_data_services.notebooks.image_check import ImageCheckRepository from renku_data_services.project.db import ProjectRepository, ProjectSessionSecretRepository @@ -202,6 +203,7 @@ class NotebooksNewBP(CustomBlueprint): data_connector_secret_repo: DataConnectorSecretRepository git_provider_helper: GitProviderHelperProto oauth_client_factory: OAuthHttpClientFactory + data_source_repo: DataSourceRepository image_check_repo: ImageCheckRepository project_repo: ProjectRepository project_session_secret_repo: ProjectSessionSecretRepository @@ -239,6 +241,7 @@ async def _handler( user_repo=self.user_repo, metrics=self.metrics, image_check_repo=self.image_check_repo, + data_source_repo=self.data_source_repo, ) status = 201 if created else 200 return json(session.as_apispec().model_dump(exclude_none=True, mode="json"), status) diff --git a/components/renku_data_services/notebooks/core_sessions.py b/components/renku_data_services/notebooks/core_sessions.py index 52df801d9..b8647849c 100644 --- a/components/renku_data_services/notebooks/core_sessions.py +++ b/components/renku_data_services/notebooks/core_sessions.py @@ -21,12 +21,6 @@ from renku_data_services.app_config import logging from renku_data_services.base_models import RESET, AnonymousAPIUser, APIUser, AuthenticatedAPIUser, ResetType from renku_data_services.base_models.metrics import MetricsService -from renku_data_services.connected_services.apispec_extras import RenkuTokens -from renku_data_services.connected_services.db import ConnectedServicesRepository -from renku_data_services.connected_services.models import OAuth2TokenSet, ProviderKind -from renku_data_services.connected_services.oauth_http import ( - OAuthHttpFactoryError, -) from renku_data_services.crc.db import ClusterRepository, ResourcePoolRepository from renku_data_services.crc.models import ( ClusterSettings, @@ -85,6 +79,7 @@ State, Storage, ) +from renku_data_services.notebooks.data_sources import DataSourceRepository from renku_data_services.notebooks.image_check import ImageCheckRepository from renku_data_services.notebooks.models import ( ExtraSecret, @@ -270,8 +265,7 @@ async def get_data_sources( work_dir: PurePosixPath, data_connectors_overrides: list[SessionDataConnectorOverride], user_repo: UserRepo, - connected_services_repo: ConnectedServicesRepository, - image_check_repo: ImageCheckRepository, + data_source_repo: DataSourceRepository, ) -> SessionExtraResources: """Generate cloud storage related resources.""" data_sources: list[DataSource] = [] @@ -279,64 +273,12 @@ async def get_data_sources( dcs: dict[str, RCloneStorage] = {} dcs_secrets: dict[str, list[DataConnectorSecret]] = {} user_secret_key: str | None = None - - logger = logging.getLogger(get_data_sources.__name__) - async for dc in data_connectors_stream: - configuration = dc.data_connector.storage.configuration - if dc.data_connector.storage.configuration["type"] in ["drive", "dropbox"]: - # TODO: move some logic to the repo, see how it is done for images - provider_kind = ( - ProviderKind.google - if dc.data_connector.storage.configuration["type"] == "drive" - else ProviderKind.dropbox - ) - providers = await connected_services_repo.get_oauth2_clients(user=user) - drive_provider = next(filter(lambda p: p.kind == provider_kind, providers), None) - connections = await connected_services_repo.get_oauth2_connections(user=user) - drive_connection = next( - filter(lambda c: drive_provider is not None and c.provider_id == drive_provider.id, connections), None - ) - if drive_connection is None: - logger.warning( - f"Skipping Google Drive DC {str(dc.data_connector.id)} because no OAuth connection found." - ) - continue - token_set: OAuth2TokenSet | None = None - client_or_error = await image_check_repo.oauth_client_factory.for_user_connection( - user=user, connection_id=drive_connection.id - ) - match client_or_error: - case OAuthHttpFactoryError() as err: - logger.info(f"Error getting oauth client for user={user} connection={drive_connection.id}: {err}") - case client: - token_set = await client.get_token() - if not token_set or not token_set.access_token: - logger.warning( - f"Skipping Google Drive DC {str(dc.data_connector.id)} because the connection is not active." - ) - continue - logger.warning(f"Adjusting rclone configuration for DC {str(dc.data_connector.id)}.") - if provider_kind == ProviderKind.google: - configuration["scope"] = configuration.get("drive") or "drive" - token_config = { - # "access_token": token_set.access_token, - "access_token": "fake_one", - "token_type": "Bearer", - } - if user.access_token and user.refresh_token: - renku_tokens = RenkuTokens( - access_token=user.access_token, - refresh_token=user.refresh_token, - ) - token_config["refresh_token"] = renku_tokens.encode() - token_config["expiry"] = "2026-01-01T14:19:16.114854+01:00" - # if token_set.expires_at_iso: - # token_config["expiry"] = token_set.expires_at_iso - configuration["token"] = json.dumps(token_config) - configuration["token_url"] = request.url_for( - "oauth2_connections.post_token_endpoint", connection_id=drive_connection.id - ) + configuration = await data_source_repo.handle_configuration( + request=request, user=user, data_connector=dc.data_connector + ) + if configuration is None: + continue mount_folder = ( dc.data_connector.storage.target_path if PurePosixPath(dc.data_connector.storage.target_path).is_absolute() @@ -805,6 +747,7 @@ async def start_session( user_repo: UserRepo, metrics: MetricsService, image_check_repo: ImageCheckRepository, + data_source_repo: DataSourceRepository, ) -> tuple[AmaltheaSessionV1Alpha1, bool]: """Start an Amalthea session. @@ -889,9 +832,7 @@ async def start_session( work_dir=work_dir, data_connectors_overrides=launch_request.data_connectors_overrides or [], user_repo=user_repo, - # TODO: maybe get the dependency explicitly - connected_services_repo=image_check_repo.connected_services_repo, - image_check_repo=image_check_repo, + data_source_repo=data_source_repo, ) ) diff --git a/components/renku_data_services/notebooks/data_sources.py b/components/renku_data_services/notebooks/data_sources.py new file mode 100644 index 000000000..160b939d6 --- /dev/null +++ b/components/renku_data_services/notebooks/data_sources.py @@ -0,0 +1,99 @@ +"""Handling of data sources which require an OAuth2 connection.""" + +import json +from typing import Any + +from sanic import Request + +from renku_data_services.app_config import logging +from renku_data_services.base_models.core import APIUser +from renku_data_services.connected_services.apispec_extras import RenkuTokens +from renku_data_services.connected_services.db import ConnectedServicesRepository +from renku_data_services.connected_services.models import ProviderKind +from renku_data_services.connected_services.oauth_http import ( + OAuthHttpClientFactory, +) +from renku_data_services.data_connectors.models import DataConnector, GlobalDataConnector +from renku_data_services.notebooks.config import NotebooksConfig + +logger = logging.getLogger(__name__) + + +class DataSourceRepository: + """Repository for checking session images with rich responses.""" + + def __init__( + self, + nb_config: NotebooksConfig, + connected_services_repo: ConnectedServicesRepository, + oauth_client_factory: OAuthHttpClientFactory, + ) -> None: + self.nb_config = nb_config + self.connected_services_repo = connected_services_repo + self.oauth_client_factory = oauth_client_factory + + async def handle_configuration( + self, request: Request, user: APIUser, data_connector: DataConnector | GlobalDataConnector + ) -> dict[str, Any] | None: + """Ajusts the configuration of the input data connector if it requires an OAuth2 connection.""" + # NOTE: do not hanlde global data connectors + if data_connector.namespace is None: + return data_connector.storage.configuration + + provider_kind: ProviderKind | None = None + match data_connector.storage.configuration["type"]: + case "drive": + provider_kind = ProviderKind.google + case "dropbox": + provider_kind = ProviderKind.dropbox + case _: + pass + + if provider_kind is None: + return data_connector.storage.configuration + + configuration = data_connector.storage.configuration + provider = await self.connected_services_repo.get_provider_for_kind(user=user, provider_kind=provider_kind) + if provider is None: + logger.info( + f"Skipping data connector {str(data_connector.id)} of type " + f"{data_connector.storage.configuration["type"]} " + f"because no provider of kind {provider_kind.value} was found." + ) + return None + connection = provider.connected_user.connection if provider.connected_user else None + if connection is None: + logger.info( + f"Skipping data connector {str(data_connector.id)} of type " + f"{data_connector.storage.configuration["type"]} " + f"because no active connection was found; user needs to connect with {provider.provider.id}." + ) + return None + token_set = await self.connected_services_repo.get_token_set(user=user, connection_id=connection.id) + if not token_set or not token_set.access_token: + logger.info( + f"Skipping data connector {str(data_connector.id)} of type " + f"{data_connector.storage.configuration["type"]} " + f"because the connection is not active; user needs to re-connect with {provider.provider.id}." + ) + return None + logger.info(f"Adjusting rclone configuration for data connector {str(data_connector.id)}.") + if provider_kind == ProviderKind.google: + configuration["scope"] = configuration.get("scope") or "drive" + token_config = { + "access_token": token_set.access_token, + "token_type": "Bearer", + } + if user.access_token and user.refresh_token: + renku_tokens = RenkuTokens( + access_token=user.access_token, + refresh_token=user.refresh_token, + ) + token_config["refresh_token"] = renku_tokens.encode() + if token_set.expires_at_iso: + token_config["expiry"] = token_set.expires_at_iso + configuration["token"] = json.dumps(token_config) + configuration["token_url"] = request.url_for( + "oauth2_connections.post_token_endpoint", connection_id=connection.id + ) + return configuration From a145298d09c69a38b97e22afffe9b02f45df368d Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Thu, 22 Jan 2026 13:46:05 +0000 Subject: [PATCH 36/54] fix async issue --- components/renku_data_services/connected_services/db.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/components/renku_data_services/connected_services/db.py b/components/renku_data_services/connected_services/db.py index 2c782954b..cba3afd94 100644 --- a/components/renku_data_services/connected_services/db.py +++ b/components/renku_data_services/connected_services/db.py @@ -5,6 +5,7 @@ from sqlalchemy import and_, select from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.orm import selectinload from ulid import ULID import renku_data_services.base_models as base_models @@ -285,6 +286,7 @@ async def get_provider_for_kind( .where(schemas.OAuth2ConnectionORM.user_id == user.id) .where(schemas.OAuth2ConnectionORM.status == models.ConnectionStatus.connected.value) .where(schemas.OAuth2ClientORM.kind == provider_kind.value) + .options(selectinload(schemas.OAuth2ConnectionORM.client)) .limit(1) ) res = await session.scalars(stmt) From 66f05d3dae84e5d20976b357c785ed81cec71ef2 Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Thu, 22 Jan 2026 14:32:39 +0000 Subject: [PATCH 37/54] experimental: patch oauth2 configs --- .../k8s/client_interfaces.py | 4 ++++ components/renku_data_services/k8s/clients.py | 9 +++++++++ .../notebooks/api/classes/k8s_client.py | 4 ++++ .../notebooks/core_sessions.py | 18 ++++++++++++++++++ 4 files changed, 35 insertions(+) diff --git a/components/renku_data_services/k8s/client_interfaces.py b/components/renku_data_services/k8s/client_interfaces.py index 9bdaf05b7..c038e85ba 100644 --- a/components/renku_data_services/k8s/client_interfaces.py +++ b/components/renku_data_services/k8s/client_interfaces.py @@ -51,6 +51,10 @@ async def patch_resource_quota( class SecretClient(Protocol): """Methods to manipulate Secret kubernetes resources.""" + async def get_secret(self, secret: K8sObjectMeta) -> K8sSecret | None: + """Get a secret.""" + ... + async def create_secret(self, secret: K8sSecret) -> K8sSecret: """Create a secret.""" ... diff --git a/components/renku_data_services/k8s/clients.py b/components/renku_data_services/k8s/clients.py index 7285d1304..73cf7637a 100644 --- a/components/renku_data_services/k8s/clients.py +++ b/components/renku_data_services/k8s/clients.py @@ -107,6 +107,11 @@ class K8sSecretClient(SecretClient): def __init__(self, client: K8sClient) -> None: self.__client = client + async def get_secret(self, secret: K8sObjectMeta) -> K8sSecret | None: + """Get a secret.""" + res = await self.__client.get(secret) + return K8sSecret.from_k8s_object(res) if res is not None else None + async def create_secret(self, secret: K8sSecret) -> K8sSecret: """Create a secret.""" @@ -211,6 +216,10 @@ async def patch_resource_quota( """Update a resource quota.""" raise NotImplementedError() + async def get_secret(self, secret: K8sObjectMeta) -> K8sSecret | None: + """Get a secret.""" + raise NotImplementedError() + async def create_secret(self, secret: K8sSecret) -> K8sSecret: """Create a secret.""" raise NotImplementedError() diff --git a/components/renku_data_services/notebooks/api/classes/k8s_client.py b/components/renku_data_services/notebooks/api/classes/k8s_client.py index 63af8bdb2..86cc2f646 100644 --- a/components/renku_data_services/notebooks/api/classes/k8s_client.py +++ b/components/renku_data_services/notebooks/api/classes/k8s_client.py @@ -410,6 +410,10 @@ async def patch_image_pull_secret(self, session_name: str, gitlab_token: GitlabT await secret.patch(patch, type="json") + async def get_secret(self, secret: K8sObjectMeta) -> K8sSecret | None: + """Get a secret.""" + return await self.__secrets_client.get_secret(secret) + async def create_secret(self, secret: K8sSecret) -> K8sSecret: """Create a secret.""" diff --git a/components/renku_data_services/notebooks/core_sessions.py b/components/renku_data_services/notebooks/core_sessions.py index b8647849c..73bd2f480 100644 --- a/components/renku_data_services/notebooks/core_sessions.py +++ b/components/renku_data_services/notebooks/core_sessions.py @@ -1181,6 +1181,24 @@ async def patch_session( # TODO: but that we do not save these overrides (e.g. as annotations) means that # TODO: we cannot patch data connectors upon resume. # TODO: If we did, we would lose the user's provided overrides (e.g. unsaved credentials). + # Experimental here: + paused_session = await nb_config.k8s_v2_client.get_session(session_id, user.id) + if paused_session is None: + logger.error(f"Paused session {session_id} not found!") + else: + secret_prefix = f"{server_name}-ds-" + dss = paused_session.spec.dataSources or [] + dc_ids: list[ULID] = [] + for ds in dss: + if ds.secretRef is not None: + name = ds.secretRef.name + if name.startswith(secret_prefix): + ulid = name[len(secret_prefix) :] + try: + dc_ids.append(ULID.from_str(ulid.upper())) + except ValueError: + logger.warning(f"Could not parse {ulid.upper()} as a ULID.") + logger.info(f"Found mounted data connectors: {dc_ids}.") # More init containers session_extras = session_extras.concat( From 3ab743f94309c63241630c55a61a43ff5fc8ce5d Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Fri, 23 Jan 2026 10:13:43 +0000 Subject: [PATCH 38/54] try to read existing dc secrets --- .../notebooks/blueprints.py | 1 + .../notebooks/core_sessions.py | 63 ++++++++++++++++++- 2 files changed, 63 insertions(+), 1 deletion(-) diff --git a/components/renku_data_services/notebooks/blueprints.py b/components/renku_data_services/notebooks/blueprints.py index cf04d3e13..1e50a62c0 100644 --- a/components/renku_data_services/notebooks/blueprints.py +++ b/components/renku_data_services/notebooks/blueprints.py @@ -303,6 +303,7 @@ async def _handler( internal_gitlab_user=internal_gitlab_user, nb_config=self.nb_config, git_provider_helper=self.git_provider_helper, + data_connector_secret_repo=self.data_connector_secret_repo, project_repo=self.project_repo, project_session_secret_repo=self.project_session_secret_repo, rp_repo=self.rp_repo, diff --git a/components/renku_data_services/notebooks/core_sessions.py b/components/renku_data_services/notebooks/core_sessions.py index 73bd2f480..166cfc54b 100644 --- a/components/renku_data_services/notebooks/core_sessions.py +++ b/components/renku_data_services/notebooks/core_sessions.py @@ -34,7 +34,7 @@ ) from renku_data_services.data_connectors.models import DataConnectorSecret, DataConnectorWithSecrets from renku_data_services.errors import ValidationError, errors -from renku_data_services.k8s.models import K8sSecret, sanitizer +from renku_data_services.k8s.models import ClusterConnection, K8sSecret, sanitizer from renku_data_services.notebooks import apispec, core from renku_data_services.notebooks.api.amalthea_patches import git_proxy, init_containers from renku_data_services.notebooks.api.amalthea_patches.init_containers import user_secrets_extras @@ -352,6 +352,56 @@ async def get_data_sources( ) +async def patch_data_sources( + session: AmaltheaSessionV1Alpha1, + cluster: ClusterConnection, + nb_config: NotebooksConfig, + data_connectors_stream: AsyncIterator[DataConnectorWithSecrets], + # request: Request, + # nb_config: NotebooksConfig, + # user: AnonymousAPIUser | AuthenticatedAPIUser, + # server_name: str, + # data_connectors_stream: AsyncIterator[DataConnectorWithSecrets], + # work_dir: PurePosixPath, + # data_connectors_overrides: list[SessionDataConnectorOverride], + # user_repo: UserRepo, + # data_source_repo: DataSourceRepository, +) -> SessionExtraResources: + """Handle updating data sources definitions when resuming a session.""" + # Experimental here: + server_name = session.metadata.name + secret_prefix = f"{server_name}-ds-" + dss = session.spec.dataSources or [] + mounted_dcs: list[tuple[ULID, str]] = [] + for ds in dss: + if ds.secretRef is not None: + name = ds.secretRef.name + if name.startswith(secret_prefix): + ulid = name[len(secret_prefix) :] + try: + mounted_dcs.append(ULID.from_str(ulid.upper(), name)) + except ValueError: + logger.warning(f"Could not parse {ulid.upper()} as a ULID.") + logger.info(f"Found mounted data connectors: {[str(u) for u, _ in mounted_dcs]}.") + async for dc in data_connectors_stream: + dc_id = dc.data_connector.id + mounted_dc = next(filter(lambda tup: tup[0] == dc_id, mounted_dcs), None) + if mounted_dc is None: + continue + _, secret_name = mounted_dc + logger.info(f"Patching DC secret {secret_name}.") + k8s_secret = await nb_config.k8s_v2_client.get_secret( + K8sSecret.from_v1_secret(V1Secret(metadata=V1ObjectMeta(name=secret_name)), cluster) + ) + if k8s_secret is None: + logger.warning(f"Could not read secret {secret_name} for patching, skipping!") + continue + s_data = k8s_secret.to_v1_secret().data + s_string_data = k8s_secret.to_v1_secret().string_data + logger.info(f"Got secret: s_data={s_data}, s_string_data={s_string_data}") + return SessionExtraResources() + + async def request_dc_secret_creation( user: AuthenticatedAPIUser | AnonymousAPIUser, nb_config: NotebooksConfig, @@ -1059,6 +1109,7 @@ async def patch_session( internal_gitlab_user: APIUser, nb_config: NotebooksConfig, git_provider_helper: GitProviderHelperProto, + data_connector_secret_repo: DataConnectorSecretRepository, project_repo: ProjectRepository, project_session_secret_repo: ProjectSessionSecretRepository, rp_repo: ResourcePoolRepository, @@ -1160,6 +1211,7 @@ async def patch_session( session_secrets = await project_session_secret_repo.get_all_session_secrets_from_project( user=user, project_id=project.id ) + data_connectors_stream = data_connector_secret_repo.get_data_connectors_with_secrets(user, project.id) git_providers = await git_provider_helper.get_providers(user=user) repositories = repositories_from_project(project, git_providers) @@ -1181,6 +1233,15 @@ async def patch_session( # TODO: but that we do not save these overrides (e.g. as annotations) means that # TODO: we cannot patch data connectors upon resume. # TODO: If we did, we would lose the user's provided overrides (e.g. unsaved credentials). + session_extras = session_extras.concat( + await patch_data_sources( + session=session, + cluster=cluster, + nb_config=nb_config, + data_connectors_stream=data_connectors_stream, + ) + ) + # Experimental here: paused_session = await nb_config.k8s_v2_client.get_session(session_id, user.id) if paused_session is None: From b1be5e890e72141a198e7d658ffab677889a9236 Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Fri, 23 Jan 2026 10:28:08 +0000 Subject: [PATCH 39/54] fix ULID oopsie --- components/renku_data_services/notebooks/core_sessions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/renku_data_services/notebooks/core_sessions.py b/components/renku_data_services/notebooks/core_sessions.py index 166cfc54b..8d39aee24 100644 --- a/components/renku_data_services/notebooks/core_sessions.py +++ b/components/renku_data_services/notebooks/core_sessions.py @@ -379,7 +379,7 @@ async def patch_data_sources( if name.startswith(secret_prefix): ulid = name[len(secret_prefix) :] try: - mounted_dcs.append(ULID.from_str(ulid.upper(), name)) + mounted_dcs.append((ULID.from_str(ulid.upper()), name)) except ValueError: logger.warning(f"Could not parse {ulid.upper()} as a ULID.") logger.info(f"Found mounted data connectors: {[str(u) for u, _ in mounted_dcs]}.") From 96eec5350f0e70ba15d6b67f9ae2633499bdf94e Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Fri, 23 Jan 2026 10:58:29 +0000 Subject: [PATCH 40/54] wip: decode existing rclone config --- .../notebooks/blueprints.py | 1 + .../notebooks/core_sessions.py | 23 ++++++++++++++++--- .../notebooks/data_sources.py | 15 +++++++++++- 3 files changed, 35 insertions(+), 4 deletions(-) diff --git a/components/renku_data_services/notebooks/blueprints.py b/components/renku_data_services/notebooks/blueprints.py index 1e50a62c0..a06236d55 100644 --- a/components/renku_data_services/notebooks/blueprints.py +++ b/components/renku_data_services/notebooks/blueprints.py @@ -310,6 +310,7 @@ async def _handler( session_repo=self.session_repo, metrics=self.metrics, image_check_repo=self.image_check_repo, + data_source_repo=self.data_source_repo, ) return json(new_session.as_apispec().model_dump(exclude_none=True, mode="json")) diff --git a/components/renku_data_services/notebooks/core_sessions.py b/components/renku_data_services/notebooks/core_sessions.py index 8d39aee24..73eb02382 100644 --- a/components/renku_data_services/notebooks/core_sessions.py +++ b/components/renku_data_services/notebooks/core_sessions.py @@ -357,6 +357,7 @@ async def patch_data_sources( cluster: ClusterConnection, nb_config: NotebooksConfig, data_connectors_stream: AsyncIterator[DataConnectorWithSecrets], + data_source_repo: DataSourceRepository, # request: Request, # nb_config: NotebooksConfig, # user: AnonymousAPIUser | AuthenticatedAPIUser, @@ -384,6 +385,8 @@ async def patch_data_sources( logger.warning(f"Could not parse {ulid.upper()} as a ULID.") logger.info(f"Found mounted data connectors: {[str(u) for u, _ in mounted_dcs]}.") async for dc in data_connectors_stream: + if not data_source_repo.is_patching_enabled(dc.data_connector): + continue dc_id = dc.data_connector.id mounted_dc = next(filter(lambda tup: tup[0] == dc_id, mounted_dcs), None) if mounted_dc is None: @@ -396,9 +399,21 @@ async def patch_data_sources( if k8s_secret is None: logger.warning(f"Could not read secret {secret_name} for patching, skipping!") continue - s_data = k8s_secret.to_v1_secret().data - s_string_data = k8s_secret.to_v1_secret().string_data - logger.info(f"Got secret: s_data={s_data}, s_string_data={s_string_data}") + secret_data: dict[str, str] = k8s_secret.to_v1_secret().data + logger.info(f"Got secret: secret_data={secret_data}") + config_data_raw = secret_data.get("configData") + if not config_data_raw: + logger.warning(f"Field 'configData' not found for data connector {str(dc_id)}, skipping!") + continue + logger.info(f"Check type config_data_raw = {type(config_data_raw)}") + existing_config_data: str = "" + try: + existing_config_data = base64.b64decode(config_data_raw).decode("utf-8") + except Exception as err: + logger.warning(f"Error decoding 'configData' for data connector {str(dc_id)}, skipping! {err}") + continue + logger.info(f"Got existing_config_data = {existing_config_data}") + return SessionExtraResources() @@ -1115,6 +1130,7 @@ async def patch_session( rp_repo: ResourcePoolRepository, session_repo: SessionRepository, image_check_repo: ImageCheckRepository, + data_source_repo: DataSourceRepository, metrics: MetricsService, ) -> AmaltheaSessionV1Alpha1: """Patch an Amalthea session.""" @@ -1239,6 +1255,7 @@ async def patch_session( cluster=cluster, nb_config=nb_config, data_connectors_stream=data_connectors_stream, + data_source_repo=data_source_repo, ) ) diff --git a/components/renku_data_services/notebooks/data_sources.py b/components/renku_data_services/notebooks/data_sources.py index 160b939d6..601914aea 100644 --- a/components/renku_data_services/notebooks/data_sources.py +++ b/components/renku_data_services/notebooks/data_sources.py @@ -36,7 +36,7 @@ async def handle_configuration( self, request: Request, user: APIUser, data_connector: DataConnector | GlobalDataConnector ) -> dict[str, Any] | None: """Ajusts the configuration of the input data connector if it requires an OAuth2 connection.""" - # NOTE: do not hanlde global data connectors + # NOTE: do not handle global data connectors if data_connector.namespace is None: return data_connector.storage.configuration @@ -97,3 +97,16 @@ async def handle_configuration( "oauth2_connections.post_token_endpoint", connection_id=connection.id ) return configuration + + def is_patching_enabled(self, data_connector: DataConnector | GlobalDataConnector) -> bool: + """Returns true iff the data connector can be patched.""" + # NOTE: do not handle global data connectors + if data_connector.namespace is None: + return False + match data_connector.storage.configuration["type"]: + case "drive": + return True + case "dropbox": + return True + case _: + return False From 9e9088a4f3685aac09b6a933ea445d5606f50712 Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Fri, 23 Jan 2026 11:19:57 +0000 Subject: [PATCH 41/54] wip: decode existing rclone config --- .../notebooks/core_sessions.py | 20 +------------------ .../notebooks/data_sources.py | 18 +++++++++++++++++ 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/components/renku_data_services/notebooks/core_sessions.py b/components/renku_data_services/notebooks/core_sessions.py index 73eb02382..655f01426 100644 --- a/components/renku_data_services/notebooks/core_sessions.py +++ b/components/renku_data_services/notebooks/core_sessions.py @@ -413,6 +413,7 @@ async def patch_data_sources( logger.warning(f"Error decoding 'configData' for data connector {str(dc_id)}, skipping! {err}") continue logger.info(f"Got existing_config_data = {existing_config_data}") + await data_source_repo.blah(existing_config_data) return SessionExtraResources() @@ -1259,25 +1260,6 @@ async def patch_session( ) ) - # Experimental here: - paused_session = await nb_config.k8s_v2_client.get_session(session_id, user.id) - if paused_session is None: - logger.error(f"Paused session {session_id} not found!") - else: - secret_prefix = f"{server_name}-ds-" - dss = paused_session.spec.dataSources or [] - dc_ids: list[ULID] = [] - for ds in dss: - if ds.secretRef is not None: - name = ds.secretRef.name - if name.startswith(secret_prefix): - ulid = name[len(secret_prefix) :] - try: - dc_ids.append(ULID.from_str(ulid.upper())) - except ValueError: - logger.warning(f"Could not parse {ulid.upper()} as a ULID.") - logger.info(f"Found mounted data connectors: {dc_ids}.") - # More init containers session_extras = session_extras.concat( await get_extra_init_containers( diff --git a/components/renku_data_services/notebooks/data_sources.py b/components/renku_data_services/notebooks/data_sources.py index 601914aea..5dfdecdbf 100644 --- a/components/renku_data_services/notebooks/data_sources.py +++ b/components/renku_data_services/notebooks/data_sources.py @@ -1,6 +1,7 @@ """Handling of data sources which require an OAuth2 connection.""" import json +from configparser import ConfigParser from typing import Any from sanic import Request @@ -110,3 +111,20 @@ def is_patching_enabled(self, data_connector: DataConnector | GlobalDataConnecto return True case _: return False + + async def blah(self, config_data: str) -> None: + """Handles patching...""" + parser = ConfigParser(interpolation=None) + try: + parser.read_string(config_data) + except Exception as err: + logger.error(f"Failed to parse existing data connector configuration: {err}") + return None + main_section = next(filter(lambda s: s, parser.sections()), "") + if not main_section: + logger.error("Failed to parse existing data connector configuration: no main section.") + return None + logger.info(f"Got main section: {main_section}.") + items = parser.items(main_section) + logger.info(f"Got items: {items}.") + pass From 59a506d5ed41af7c6285a32948e448df8cf53ae6 Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Fri, 23 Jan 2026 12:46:59 +0000 Subject: [PATCH 42/54] exp: try patching --- .../notebooks/blueprints.py | 3 +- .../notebooks/core_sessions.py | 19 ++++- .../notebooks/data_sources.py | 76 ++++++++++++++++++- 3 files changed, 93 insertions(+), 5 deletions(-) diff --git a/components/renku_data_services/notebooks/blueprints.py b/components/renku_data_services/notebooks/blueprints.py index a06236d55..66835ef78 100644 --- a/components/renku_data_services/notebooks/blueprints.py +++ b/components/renku_data_services/notebooks/blueprints.py @@ -290,13 +290,14 @@ def patch(self) -> BlueprintFactoryResponse: @authenticate_2(self.authenticator, self.internal_gitlab_authenticator) @validate(json=apispec.SessionPatchRequest) async def _handler( - _: Request, + request: Request, user: AuthenticatedAPIUser | AnonymousAPIUser, internal_gitlab_user: APIUser, session_id: str, body: apispec.SessionPatchRequest, ) -> HTTPResponse: new_session = await patch_session( + request=request, body=body, session_id=session_id, user=user, diff --git a/components/renku_data_services/notebooks/core_sessions.py b/components/renku_data_services/notebooks/core_sessions.py index 655f01426..940fd7000 100644 --- a/components/renku_data_services/notebooks/core_sessions.py +++ b/components/renku_data_services/notebooks/core_sessions.py @@ -353,6 +353,8 @@ async def get_data_sources( async def patch_data_sources( + request: Request, + user: AnonymousAPIUser | AuthenticatedAPIUser, session: AmaltheaSessionV1Alpha1, cluster: ClusterConnection, nb_config: NotebooksConfig, @@ -370,6 +372,7 @@ async def patch_data_sources( ) -> SessionExtraResources: """Handle updating data sources definitions when resuming a session.""" # Experimental here: + secrets: list[ExtraSecret] = [] server_name = session.metadata.name secret_prefix = f"{server_name}-ds-" dss = session.spec.dataSources or [] @@ -413,9 +416,18 @@ async def patch_data_sources( logger.warning(f"Error decoding 'configData' for data connector {str(dc_id)}, skipping! {err}") continue logger.info(f"Got existing_config_data = {existing_config_data}") - await data_source_repo.blah(existing_config_data) + new_config_data = await data_source_repo.handle_patching_configuration( + request=request, user=user, data_connector=dc.data_connector, config_data=existing_config_data + ) + if not new_config_data: + continue + new_secret = k8s_secret.to_v1_secret() + new_secret.data = new_secret.data or dict() + new_secret.data["configData"] = base64.b64encode(new_config_data.encode("utf-8")).decode("utf-8") + secrets.append(ExtraSecret(new_secret)) - return SessionExtraResources() + logger.info(f"Patching secrets: {secrets}") + return SessionExtraResources(secrets=secrets) async def request_dc_secret_creation( @@ -1119,6 +1131,7 @@ async def start_session( async def patch_session( + request: Request, body: apispec.SessionPatchRequest, session_id: str, user: AnonymousAPIUser | AuthenticatedAPIUser, @@ -1252,6 +1265,8 @@ async def patch_session( # TODO: If we did, we would lose the user's provided overrides (e.g. unsaved credentials). session_extras = session_extras.concat( await patch_data_sources( + request=request, + user=user, session=session, cluster=cluster, nb_config=nb_config, diff --git a/components/renku_data_services/notebooks/data_sources.py b/components/renku_data_services/notebooks/data_sources.py index 5dfdecdbf..42380be9a 100644 --- a/components/renku_data_services/notebooks/data_sources.py +++ b/components/renku_data_services/notebooks/data_sources.py @@ -2,6 +2,7 @@ import json from configparser import ConfigParser +from io import StringIO from typing import Any from sanic import Request @@ -112,7 +113,9 @@ def is_patching_enabled(self, data_connector: DataConnector | GlobalDataConnecto case _: return False - async def blah(self, config_data: str) -> None: + async def handle_patching_configuration( + self, request: Request, user: APIUser, data_connector: DataConnector | GlobalDataConnector, config_data: str + ) -> str | None: """Handles patching...""" parser = ConfigParser(interpolation=None) try: @@ -127,4 +130,73 @@ async def blah(self, config_data: str) -> None: logger.info(f"Got main section: {main_section}.") items = parser.items(main_section) logger.info(f"Got items: {items}.") - pass + configuration = dict(items) + if configuration.get("type") != data_connector.storage.configuration.get("type"): + logger.warning( + f"Data connector type changed to {data_connector.storage.configuration.get("type")}, skipping!" + ) + return None + + provider_kind: ProviderKind | None = None + match data_connector.storage.configuration["type"]: + case "drive": + provider_kind = ProviderKind.google + case "dropbox": + provider_kind = ProviderKind.dropbox + case _: + return None + + provider = await self.connected_services_repo.get_provider_for_kind(user=user, provider_kind=provider_kind) + if provider is None: + logger.info( + f"Skipping data connector {str(data_connector.id)} of type " + f"{data_connector.storage.configuration["type"]} " + f"because no provider of kind {provider_kind.value} was found." + ) + return None + connection = provider.connected_user.connection if provider.connected_user else None + if connection is None: + logger.info( + f"Skipping data connector {str(data_connector.id)} of type " + f"{data_connector.storage.configuration["type"]} " + f"because no active connection was found; user needs to connect with {provider.provider.id}." + ) + return None + token_set = await self.connected_services_repo.get_token_set(user=user, connection_id=connection.id) + if not token_set or not token_set.access_token: + logger.info( + f"Skipping data connector {str(data_connector.id)} of type " + f"{data_connector.storage.configuration["type"]} " + f"because the connection is not active; user needs to re-connect with {provider.provider.id}." + ) + return None + logger.info(f"Adjusting rclone configuration for data connector {str(data_connector.id)}.") + token_config = { + "access_token": token_set.access_token, + "token_type": "Bearer", + } + if user.access_token and user.refresh_token: + renku_tokens = RenkuTokens( + access_token=user.access_token, + refresh_token=user.refresh_token, + ) + token_config["refresh_token"] = renku_tokens.encode() + if token_set.expires_at_iso: + token_config["expiry"] = token_set.expires_at_iso + # configuration["token"] = json.dumps(token_config) + # configuration["token_url"] = request.url_for( + # "oauth2_connections.post_token_endpoint", connection_id=connection.id + # ) + # return configuration + + # for k, v in configuration.items(): + # parser.set(name, k, _stringify(v)) + parser.set(main_section, "token", json.dumps(token_config)) + parser.set( + main_section, + "token_url", + request.url_for("oauth2_connections.post_token_endpoint", connection_id=connection.id), + ) + stringio = StringIO() + parser.write(stringio) + return stringio.getvalue() From 2eb59aff13f98a626562d5b97a8b216a8ee6acd7 Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Fri, 23 Jan 2026 13:05:40 +0000 Subject: [PATCH 43/54] debug? --- components/renku_data_services/notebooks/core_sessions.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/components/renku_data_services/notebooks/core_sessions.py b/components/renku_data_services/notebooks/core_sessions.py index 940fd7000..9c5c3679c 100644 --- a/components/renku_data_services/notebooks/core_sessions.py +++ b/components/renku_data_services/notebooks/core_sessions.py @@ -424,6 +424,8 @@ async def patch_data_sources( new_secret = k8s_secret.to_v1_secret() new_secret.data = new_secret.data or dict() new_secret.data["configData"] = base64.b64encode(new_config_data.encode("utf-8")).decode("utf-8") + new_secret.string_data = None + logger.info(f"V1Secret = {new_secret}") secrets.append(ExtraSecret(new_secret)) logger.info(f"Patching secrets: {secrets}") From 9d48d732126d3047f34e3fb8a3e32021bc34643b Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Fri, 23 Jan 2026 13:25:03 +0000 Subject: [PATCH 44/54] try other new secret --- .../notebooks/core_sessions.py | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/components/renku_data_services/notebooks/core_sessions.py b/components/renku_data_services/notebooks/core_sessions.py index 9c5c3679c..389bc5d0e 100644 --- a/components/renku_data_services/notebooks/core_sessions.py +++ b/components/renku_data_services/notebooks/core_sessions.py @@ -421,10 +421,24 @@ async def patch_data_sources( ) if not new_config_data: continue - new_secret = k8s_secret.to_v1_secret() - new_secret.data = new_secret.data or dict() + + new_secret = V1Secret( + api_version="v1", + kind="Secret", + metadata=V1ObjectMeta( + name=k8s_secret.to_v1_secret().metadata.name, + namespace=k8s_secret.to_v1_secret().metadata.namespace, + annotations=k8s_secret.to_v1_secret().metadata.annotations, + labels=k8s_secret.to_v1_secret().metadata.labels, + ), + data=k8s_secret.to_v1_secret().data or dict(), + ) new_secret.data["configData"] = base64.b64encode(new_config_data.encode("utf-8")).decode("utf-8") - new_secret.string_data = None + + # new_secret = k8s_secret.to_v1_secret() + # new_secret.data = new_secret.data or dict() + # new_secret.data["configData"] = base64.b64encode(new_config_data.encode("utf-8")).decode("utf-8") + # new_secret.string_data = None logger.info(f"V1Secret = {new_secret}") secrets.append(ExtraSecret(new_secret)) From b4573b5ebcc1f44375d026e16aad7ac1fed81ad5 Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Fri, 23 Jan 2026 14:26:51 +0000 Subject: [PATCH 45/54] fix? --- .../renku_data_services/notebooks/core_sessions.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/components/renku_data_services/notebooks/core_sessions.py b/components/renku_data_services/notebooks/core_sessions.py index 389bc5d0e..4a010fcde 100644 --- a/components/renku_data_services/notebooks/core_sessions.py +++ b/components/renku_data_services/notebooks/core_sessions.py @@ -402,7 +402,8 @@ async def patch_data_sources( if k8s_secret is None: logger.warning(f"Could not read secret {secret_name} for patching, skipping!") continue - secret_data: dict[str, str] = k8s_secret.to_v1_secret().data + v1_secret = k8s_secret.to_v1_secret() + secret_data: dict[str, str] = v1_secret.data logger.info(f"Got secret: secret_data={secret_data}") config_data_raw = secret_data.get("configData") if not config_data_raw: @@ -422,16 +423,15 @@ async def patch_data_sources( if not new_config_data: continue + metadata = v1_secret.metadata new_secret = V1Secret( api_version="v1", kind="Secret", metadata=V1ObjectMeta( - name=k8s_secret.to_v1_secret().metadata.name, - namespace=k8s_secret.to_v1_secret().metadata.namespace, - annotations=k8s_secret.to_v1_secret().metadata.annotations, - labels=k8s_secret.to_v1_secret().metadata.labels, + name=metadata.name, + namespace=metadata.namespace, ), - data=k8s_secret.to_v1_secret().data or dict(), + data=secret_data, ) new_secret.data["configData"] = base64.b64encode(new_config_data.encode("utf-8")).decode("utf-8") From 5c1b15df878881b79d5dabc24ce21c261669bedd Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Tue, 27 Jan 2026 08:47:48 +0000 Subject: [PATCH 46/54] some code cleanup --- .../connected_services/api.spec.yaml | 3 -- .../connected_services/external_models.py | 11 ----- .../connected_services/models.py | 2 - .../connected_services/provider_adapters.py | 41 ------------------- .../notebooks/core_sessions.py | 15 ------- .../notebooks/data_sources.py | 9 +--- 6 files changed, 1 insertion(+), 80 deletions(-) diff --git a/components/renku_data_services/connected_services/api.spec.yaml b/components/renku_data_services/connected_services/api.spec.yaml index 0a996e7ad..ed4680449 100644 --- a/components/renku_data_services/connected_services/api.spec.yaml +++ b/components/renku_data_services/connected_services/api.spec.yaml @@ -431,9 +431,6 @@ components: - "github" - "gitlab" - "google" - # - "drive" - # - "onedrive" - # - "dopbox" example: "gitlab" ApplicationSlug: description: | diff --git a/components/renku_data_services/connected_services/external_models.py b/components/renku_data_services/connected_services/external_models.py index 0a238ac99..3cf81b74e 100644 --- a/components/renku_data_services/connected_services/external_models.py +++ b/components/renku_data_services/connected_services/external_models.py @@ -73,17 +73,6 @@ def to_connected_account(self) -> models.ConnectedAccount: return models.ConnectedAccount(username=self.email, web_url="") -# class GoogleDriveConnectedAccount(BaseModel): -# """OAuth2 connected account model for google drive.""" - -# name: str -# email: str - -# def to_connected_account(self) -> models.ConnectedAccount: -# """Returns the corresponding ConnectedAccount object.""" -# return models.ConnectedAccount(username=self.name, web_url=f"mailto:{self.email}") - - # class OneDriveConnectedAccount(BaseModel): # """OAuth2 connected account model for onedrive.""" diff --git a/components/renku_data_services/connected_services/models.py b/components/renku_data_services/connected_services/models.py index aedaacd38..72f0f6247 100644 --- a/components/renku_data_services/connected_services/models.py +++ b/components/renku_data_services/connected_services/models.py @@ -18,8 +18,6 @@ class ProviderKind(StrEnum): github = "github" gitlab = "gitlab" google = "google" - # drive = "drive" - # onedrive = "onedrive" class ConnectionStatus(StrEnum): diff --git a/components/renku_data_services/connected_services/provider_adapters.py b/components/renku_data_services/connected_services/provider_adapters.py index 6ef408120..43b6ce656 100644 --- a/components/renku_data_services/connected_services/provider_adapters.py +++ b/components/renku_data_services/connected_services/provider_adapters.py @@ -155,44 +155,6 @@ def api_validate_account_response(self, response: Response) -> models.ConnectedA return external_models.GoogleConnectedAccount.model_validate(response.json()).to_connected_account() -# class GoogleDriveAdapter(ProviderAdapter): -# """Adapter for Google Drive OAuth2 clients.""" - -# user_info_endpoint = "userinfo" - -# @property -# def authorization_url(self) -> str: -# """The authorization URL for the OAuth2 protocol.""" -# return "https://accounts.google.com/o/oauth2/auth" - -# @property -# def authorization_url_extra_params(self) -> dict[str, str]: -# """Extra parameters to add to the auth url.""" -# return {"access_type": "offline"} - -# @property -# def token_endpoint_url(self) -> str: -# """The token endpoint URL for the OAuth2 protocol.""" -# return "https://oauth2.googleapis.com/token" - -# @property -# def api_url(self) -> str: -# """The URL used for API calls on the Resource Server.""" -# return "https://www.googleapis.com/oauth2/v2/" - -# @property -# def api_common_headers(self) -> dict[str, str] | None: -# """The HTTP headers used for API calls on the Resource Server.""" -# return { -# "Accept": "application/json", -# "Content-Type": "application/json", -# } - -# def api_validate_account_response(self, response: Response) -> models.ConnectedAccount: -# """Validates and returns the connected account response from the Resource Server.""" -# return external_models.GoogleDriveConnectedAccount.model_validate(response.json()).to_connected_account() - - # class OneDriveAdapter(ProviderAdapter): # """Adapter for One Drive OAuth2 clients.""" @@ -348,15 +310,12 @@ def __get_httpx_client(cls) -> Client: return cls._httpx_client -# TODO: test that this map is exhaustive _adapter_map: dict[models.ProviderKind, type[ProviderAdapter]] = { models.ProviderKind.dropbox: DropboxAdapter, models.ProviderKind.generic_oidc: GenericOidcAdapter, models.ProviderKind.github: GitHubAdapter, models.ProviderKind.gitlab: GitLabAdapter, models.ProviderKind.google: GoogleAdapter, - # models.ProviderKind.drive: GoogleDriveAdapter, - # models.ProviderKind.onedrive: OneDriveAdapter, } diff --git a/components/renku_data_services/notebooks/core_sessions.py b/components/renku_data_services/notebooks/core_sessions.py index 4a010fcde..f0e16e8fc 100644 --- a/components/renku_data_services/notebooks/core_sessions.py +++ b/components/renku_data_services/notebooks/core_sessions.py @@ -360,18 +360,8 @@ async def patch_data_sources( nb_config: NotebooksConfig, data_connectors_stream: AsyncIterator[DataConnectorWithSecrets], data_source_repo: DataSourceRepository, - # request: Request, - # nb_config: NotebooksConfig, - # user: AnonymousAPIUser | AuthenticatedAPIUser, - # server_name: str, - # data_connectors_stream: AsyncIterator[DataConnectorWithSecrets], - # work_dir: PurePosixPath, - # data_connectors_overrides: list[SessionDataConnectorOverride], - # user_repo: UserRepo, - # data_source_repo: DataSourceRepository, ) -> SessionExtraResources: """Handle updating data sources definitions when resuming a session.""" - # Experimental here: secrets: list[ExtraSecret] = [] server_name = session.metadata.name secret_prefix = f"{server_name}-ds-" @@ -434,11 +424,6 @@ async def patch_data_sources( data=secret_data, ) new_secret.data["configData"] = base64.b64encode(new_config_data.encode("utf-8")).decode("utf-8") - - # new_secret = k8s_secret.to_v1_secret() - # new_secret.data = new_secret.data or dict() - # new_secret.data["configData"] = base64.b64encode(new_config_data.encode("utf-8")).decode("utf-8") - # new_secret.string_data = None logger.info(f"V1Secret = {new_secret}") secrets.append(ExtraSecret(new_secret)) diff --git a/components/renku_data_services/notebooks/data_sources.py b/components/renku_data_services/notebooks/data_sources.py index 42380be9a..52fe20b59 100644 --- a/components/renku_data_services/notebooks/data_sources.py +++ b/components/renku_data_services/notebooks/data_sources.py @@ -183,14 +183,7 @@ async def handle_patching_configuration( token_config["refresh_token"] = renku_tokens.encode() if token_set.expires_at_iso: token_config["expiry"] = token_set.expires_at_iso - # configuration["token"] = json.dumps(token_config) - # configuration["token_url"] = request.url_for( - # "oauth2_connections.post_token_endpoint", connection_id=connection.id - # ) - # return configuration - - # for k, v in configuration.items(): - # parser.set(name, k, _stringify(v)) + parser.set(main_section, "token", json.dumps(token_config)) parser.set( main_section, From 1d33d3e3c06c57d5b736e1c29e92c4e7f6715f4e Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Tue, 27 Jan 2026 09:43:37 +0000 Subject: [PATCH 47/54] refactor some code --- .../notebooks/core_sessions.py | 16 +-- .../notebooks/data_sources.py | 127 +++++++++--------- 2 files changed, 67 insertions(+), 76 deletions(-) diff --git a/components/renku_data_services/notebooks/core_sessions.py b/components/renku_data_services/notebooks/core_sessions.py index f0e16e8fc..f63127651 100644 --- a/components/renku_data_services/notebooks/core_sessions.py +++ b/components/renku_data_services/notebooks/core_sessions.py @@ -361,7 +361,11 @@ async def patch_data_sources( data_connectors_stream: AsyncIterator[DataConnectorWithSecrets], data_source_repo: DataSourceRepository, ) -> SessionExtraResources: - """Handle updating data sources definitions when resuming a session.""" + """Handle updating data sources definitions when resuming a session. + + This touches data connectors which use OAuth2 tokens for access. + Other data connectors are left untouched. + """ secrets: list[ExtraSecret] = [] server_name = session.metadata.name secret_prefix = f"{server_name}-ds-" @@ -376,7 +380,6 @@ async def patch_data_sources( mounted_dcs.append((ULID.from_str(ulid.upper()), name)) except ValueError: logger.warning(f"Could not parse {ulid.upper()} as a ULID.") - logger.info(f"Found mounted data connectors: {[str(u) for u, _ in mounted_dcs]}.") async for dc in data_connectors_stream: if not data_source_repo.is_patching_enabled(dc.data_connector): continue @@ -385,7 +388,7 @@ async def patch_data_sources( if mounted_dc is None: continue _, secret_name = mounted_dc - logger.info(f"Patching DC secret {secret_name}.") + logger.debug(f"Patching DC secret {secret_name} for data connector {str(dc_id)}.") k8s_secret = await nb_config.k8s_v2_client.get_secret( K8sSecret.from_v1_secret(V1Secret(metadata=V1ObjectMeta(name=secret_name)), cluster) ) @@ -394,25 +397,22 @@ async def patch_data_sources( continue v1_secret = k8s_secret.to_v1_secret() secret_data: dict[str, str] = v1_secret.data - logger.info(f"Got secret: secret_data={secret_data}") config_data_raw = secret_data.get("configData") if not config_data_raw: logger.warning(f"Field 'configData' not found for data connector {str(dc_id)}, skipping!") continue - logger.info(f"Check type config_data_raw = {type(config_data_raw)}") existing_config_data: str = "" try: existing_config_data = base64.b64decode(config_data_raw).decode("utf-8") except Exception as err: logger.warning(f"Error decoding 'configData' for data connector {str(dc_id)}, skipping! {err}") continue - logger.info(f"Got existing_config_data = {existing_config_data}") new_config_data = await data_source_repo.handle_patching_configuration( request=request, user=user, data_connector=dc.data_connector, config_data=existing_config_data ) if not new_config_data: continue - + # We re-create the secret for the data connector, with the updated configuration. metadata = v1_secret.metadata new_secret = V1Secret( api_version="v1", @@ -424,10 +424,8 @@ async def patch_data_sources( data=secret_data, ) new_secret.data["configData"] = base64.b64encode(new_config_data.encode("utf-8")).decode("utf-8") - logger.info(f"V1Secret = {new_secret}") secrets.append(ExtraSecret(new_secret)) - logger.info(f"Patching secrets: {secrets}") return SessionExtraResources(secrets=secrets) diff --git a/components/renku_data_services/notebooks/data_sources.py b/components/renku_data_services/notebooks/data_sources.py index 52fe20b59..d7893ae7a 100644 --- a/components/renku_data_services/notebooks/data_sources.py +++ b/components/renku_data_services/notebooks/data_sources.py @@ -2,6 +2,7 @@ import json from configparser import ConfigParser +from dataclasses import dataclass from io import StringIO from typing import Any @@ -21,6 +22,14 @@ logger = logging.getLogger(__name__) +@dataclass(frozen=True, eq=True, kw_only=True) +class _OAuth2ConfigPartial: + """Partial configuration; contains OAuth2 fields.""" + + token: str + token_url: str + + class DataSourceRepository: """Repository for checking session images with rich responses.""" @@ -37,67 +46,30 @@ def __init__( async def handle_configuration( self, request: Request, user: APIUser, data_connector: DataConnector | GlobalDataConnector ) -> dict[str, Any] | None: - """Ajusts the configuration of the input data connector if it requires an OAuth2 connection.""" + """Ajusts the configuration of the input data connector if it requires an OAuth2 connection. + + Returns either an rclone configuration or None if the data connector should be skipped. + """ # NOTE: do not handle global data connectors if data_connector.namespace is None: return data_connector.storage.configuration - provider_kind: ProviderKind | None = None - match data_connector.storage.configuration["type"]: - case "drive": - provider_kind = ProviderKind.google - case "dropbox": - provider_kind = ProviderKind.dropbox - case _: - pass - + provider_kind = self._get_oauth2_provider_kind(data_connector=data_connector) if provider_kind is None: return data_connector.storage.configuration - configuration = data_connector.storage.configuration - provider = await self.connected_services_repo.get_provider_for_kind(user=user, provider_kind=provider_kind) - if provider is None: - logger.info( - f"Skipping data connector {str(data_connector.id)} of type " - f"{data_connector.storage.configuration["type"]} " - f"because no provider of kind {provider_kind.value} was found." - ) - return None - connection = provider.connected_user.connection if provider.connected_user else None - if connection is None: - logger.info( - f"Skipping data connector {str(data_connector.id)} of type " - f"{data_connector.storage.configuration["type"]} " - f"because no active connection was found; user needs to connect with {provider.provider.id}." - ) - return None - token_set = await self.connected_services_repo.get_token_set(user=user, connection_id=connection.id) - if not token_set or not token_set.access_token: - logger.info( - f"Skipping data connector {str(data_connector.id)} of type " - f"{data_connector.storage.configuration["type"]} " - f"because the connection is not active; user needs to re-connect with {provider.provider.id}." - ) + oauth2_part = await self._get_oauth2_configuration_part( + request=request, user=user, data_connector=data_connector + ) + if oauth2_part is None: return None + logger.info(f"Adjusting rclone configuration for data connector {str(data_connector.id)}.") + configuration = data_connector.storage.configuration if provider_kind == ProviderKind.google: configuration["scope"] = configuration.get("scope") or "drive" - token_config = { - "access_token": token_set.access_token, - "token_type": "Bearer", - } - if user.access_token and user.refresh_token: - renku_tokens = RenkuTokens( - access_token=user.access_token, - refresh_token=user.refresh_token, - ) - token_config["refresh_token"] = renku_tokens.encode() - if token_set.expires_at_iso: - token_config["expiry"] = token_set.expires_at_iso - configuration["token"] = json.dumps(token_config) - configuration["token_url"] = request.url_for( - "oauth2_connections.post_token_endpoint", connection_id=connection.id - ) + configuration["token"] = oauth2_part.token + configuration["token_url"] = oauth2_part.token_url return configuration def is_patching_enabled(self, data_connector: DataConnector | GlobalDataConnector) -> bool: @@ -116,7 +88,16 @@ def is_patching_enabled(self, data_connector: DataConnector | GlobalDataConnecto async def handle_patching_configuration( self, request: Request, user: APIUser, data_connector: DataConnector | GlobalDataConnector, config_data: str ) -> str | None: - """Handles patching...""" + """Handles patching the configuration of a data connector when a session is resumed. + + This method updates the "token" and the "token_url" fields and no other part of the configuration. + + Returns either a new configuration (INI form) or None if the configuration should be left untouched. + """ + # NOTE: do not handle global data connectors + if data_connector.namespace is None: + return None + parser = ConfigParser(interpolation=None) try: parser.read_string(config_data) @@ -127,9 +108,7 @@ async def handle_patching_configuration( if not main_section: logger.error("Failed to parse existing data connector configuration: no main section.") return None - logger.info(f"Got main section: {main_section}.") items = parser.items(main_section) - logger.info(f"Got items: {items}.") configuration = dict(items) if configuration.get("type") != data_connector.storage.configuration.get("type"): logger.warning( @@ -137,15 +116,37 @@ async def handle_patching_configuration( ) return None - provider_kind: ProviderKind | None = None + oauth2_part = await self._get_oauth2_configuration_part( + request=request, user=user, data_connector=data_connector + ) + if oauth2_part is None: + return None + + logger.info(f"Patching rclone configuration for data connector {str(data_connector.id)}.") + parser.set(main_section, "token", oauth2_part.token) + parser.set(main_section, "token_url", oauth2_part.token_url) + stringio = StringIO() + parser.write(stringio) + return stringio.getvalue() + + def _get_oauth2_provider_kind(self, data_connector: DataConnector | GlobalDataConnector) -> ProviderKind | None: + """Returns the provider kind for data connectors which require an OAuth2 configuration.""" match data_connector.storage.configuration["type"]: case "drive": - provider_kind = ProviderKind.google + return ProviderKind.google case "dropbox": - provider_kind = ProviderKind.dropbox + return ProviderKind.dropbox case _: return None + async def _get_oauth2_configuration_part( + self, request: Request, user: APIUser, data_connector: DataConnector + ) -> _OAuth2ConfigPartial | None: + """Get the OAuth2 configuration fields.""" + provider_kind = self._get_oauth2_provider_kind(data_connector=data_connector) + if provider_kind is None: + return None + provider = await self.connected_services_repo.get_provider_for_kind(user=user, provider_kind=provider_kind) if provider is None: logger.info( @@ -170,7 +171,6 @@ async def handle_patching_configuration( f"because the connection is not active; user needs to re-connect with {provider.provider.id}." ) return None - logger.info(f"Adjusting rclone configuration for data connector {str(data_connector.id)}.") token_config = { "access_token": token_set.access_token, "token_type": "Bearer", @@ -183,13 +183,6 @@ async def handle_patching_configuration( token_config["refresh_token"] = renku_tokens.encode() if token_set.expires_at_iso: token_config["expiry"] = token_set.expires_at_iso - - parser.set(main_section, "token", json.dumps(token_config)) - parser.set( - main_section, - "token_url", - request.url_for("oauth2_connections.post_token_endpoint", connection_id=connection.id), - ) - stringio = StringIO() - parser.write(stringio) - return stringio.getvalue() + token = json.dumps(token_config) + token_url = request.url_for("oauth2_connections.post_token_endpoint", connection_id=connection.id) + return _OAuth2ConfigPartial(token=token, token_url=token_url) From 6f0722039c0c69ba8c9b9a206604462497c0ee02 Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Tue, 27 Jan 2026 09:57:33 +0000 Subject: [PATCH 48/54] simplify is_patching_enabled() --- components/renku_data_services/notebooks/data_sources.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/components/renku_data_services/notebooks/data_sources.py b/components/renku_data_services/notebooks/data_sources.py index d7893ae7a..547edf545 100644 --- a/components/renku_data_services/notebooks/data_sources.py +++ b/components/renku_data_services/notebooks/data_sources.py @@ -77,13 +77,8 @@ def is_patching_enabled(self, data_connector: DataConnector | GlobalDataConnecto # NOTE: do not handle global data connectors if data_connector.namespace is None: return False - match data_connector.storage.configuration["type"]: - case "drive": - return True - case "dropbox": - return True - case _: - return False + provider_kind = self._get_oauth2_provider_kind(data_connector=data_connector) + return provider_kind is not None async def handle_patching_configuration( self, request: Request, user: APIUser, data_connector: DataConnector | GlobalDataConnector, config_data: str From 2b32ef3b0b51b4a6c4bf54403953cac464aa3d67 Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Tue, 27 Jan 2026 10:31:42 +0000 Subject: [PATCH 49/54] fix urlPath --- components/renku_data_services/notebooks/core_sessions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/renku_data_services/notebooks/core_sessions.py b/components/renku_data_services/notebooks/core_sessions.py index 78ef198a5..3409421ba 100644 --- a/components/renku_data_services/notebooks/core_sessions.py +++ b/components/renku_data_services/notebooks/core_sessions.py @@ -947,7 +947,7 @@ async def start_session( storage_class = cluster_settings.get_storage_class() service_account_name = cluster_settings.service_account_name - ui_path = f"{ingress_config.url}/{environment.default_url.lstrip('/')}" + ui_path = f"{ingress_config.url_path}/{environment.default_url.lstrip('/')}" # Annotations annotations: dict[str, str] = { From 86559530e9e6626c38e29453322dc55e7b22175d Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Tue, 27 Jan 2026 12:40:18 +0000 Subject: [PATCH 50/54] cleanup post_token_endpoint() --- .../connected_services/blueprints.py | 93 ++++++++++--------- 1 file changed, 48 insertions(+), 45 deletions(-) diff --git a/components/renku_data_services/connected_services/blueprints.py b/components/renku_data_services/connected_services/blueprints.py index d6fe14d2f..74e244306 100644 --- a/components/renku_data_services/connected_services/blueprints.py +++ b/components/renku_data_services/connected_services/blueprints.py @@ -7,6 +7,7 @@ from urllib.parse import unquote, urlparse, urlunparse import httpx +import jwt from sanic import HTTPResponse, Request, empty, json, redirect from sanic.response import JSONResponse from sanic_ext import validate @@ -252,22 +253,23 @@ async def _get_installations( return "/oauth2/connections//installations", ["GET"], _get_installations def post_token_endpoint(self) -> BlueprintFactoryResponse: - """OAuth 2.0 token endpoint to support applications running in sessions.""" + """OAuth 2.0 token endpoint to support applications running in sessions. + + Details: + 1. Decode the refresh_token value into an instance of RenkuTokens + 2. Validate the access_token + -> if the access_token is invalid (expired), use the renku refresh_token + to get a fresh set of tokens + 3. Send back the refreshed OAuth 2.0 access token and a the encoded value + of the current RenkuTokens + """ @validate(form=apispec_extras.PostTokenRequest) async def _post_token_endpoint( request: Request, body: apispec_extras.PostTokenRequest, connection_id: ULID ) -> JSONResponse: - logger.warning(f"post_token_endpoint: connection_id = {str(connection_id)}") - logger.warning(f"post_token_endpoint: request headers = {list(request.headers.keys())}") - logger.warning(f"post_token_endpoint: request content-type = {request.headers.get("content-type")}") - logger.warning(f"post_token_endpoint: request body = {request.body!r}") - - logger.warning(f"post_token_endpoint: request body grant_type = {body.grant_type.value}") - logger.warning(f"post_token_endpoint: request body refresh_token = {body.refresh_token}") - renku_tokens = apispec_extras.RenkuTokens.decode(body.refresh_token) - logger.warning(f"post_token_endpoint: renku_tokens = {renku_tokens}") + # NOTE: inject the access token in the headers so that we can use `self.authenticator` request.headers[self.authenticator.token_field] = renku_tokens.access_token user: base_models.APIUser | None = None @@ -284,8 +286,6 @@ async def _post_token_endpoint( logger.error(f"Got authenticate error: {err.__class__}.") raise - logger.warning(f"post_token_endpoint: user = {user}") - # Try to refresh the Renku access token if user is None and renku_tokens.refresh_token: renku_base_url = "https://" + self.nb_config.sessions.ingress.host @@ -303,8 +303,6 @@ async def _post_token_endpoint( "refresh_token": renku_tokens.refresh_token, } response = await http.post(renku_auth_token_uri, auth=auth, data=payload, follow_redirects=True) - logger.warning(f"Get refresh response from Keycloak: {response}") - logger.warning(f"Get refresh response from Keycloak: {response.json()}") if 200 <= response.status_code < 300: try: parsed_response = apispec_extras.PostTokenResponse.model_validate_json(response.content) @@ -327,43 +325,48 @@ async def _post_token_endpoint( logger.error(f"Got authenticate error: {err.__class__}.") raise else: - # Handle bad response: Get refresh response from Keycloak: - # Get refresh response from Keycloak: - # {'error': 'invalid_grant', 'error_description': 'Invalid refresh token'} logger.error( f"Got error from refreshing Renku tokens: HTTP {response.status_code}; {response.json()}." ) raise errors.UnauthorizedError() - logger.warning(f"post_token_endpoint: user = {user}") - - if user is not None and user.is_authenticated: - client = await self.oauth_client_factory.for_user_connection_raise(user, connection_id) - oauth_token = await client.get_token() - access_token = oauth_token.access_token - if access_token is None: - raise errors.ProgrammingError(message="Unexpected error: access token not present.") - result: dict[str, str | int] = { - "access_token": access_token, - "token_type": str(oauth_token.get("token_type")) or "Bearer", - "refresh_token": renku_tokens.encode(), - } - if oauth_token.get("scope"): - result["scope"] = oauth_token["scope"] - if oauth_token.expires_at: - # TODO: handle if parsed_response.refresh_expires_in < expires_in - # This should be rare, but we should use the lowest value to be safe. - exp = datetime.fromtimestamp(oauth_token.expires_at, UTC) + + if user is None or not user.is_authenticated: + raise errors.UnauthorizedError() + + client = await self.oauth_client_factory.for_user_connection_raise(user, connection_id) + oauth_token = await client.get_token() + access_token = oauth_token.access_token + if access_token is None: + raise errors.ProgrammingError(message="Unexpected error: access token not present.") + result: dict[str, str | int] = { + "access_token": access_token, + "token_type": str(oauth_token.get("token_type")) or "Bearer", + "refresh_token": renku_tokens.encode(), + } + if oauth_token.get("scope"): + result["scope"] = oauth_token["scope"] + # NOTE: Set "expires_in" according to whichever of the OAuth 2.0 access token or the Renku refresh + # token expires first. + try: + refresh_decoded: dict[str, Any] = jwt.decode( + renku_tokens.refresh_token, options={"verify_signature": False} + ) + refresh_exp: int | None = refresh_decoded.get("exp") + if refresh_exp is not None and refresh_exp > 0: + exp = datetime.fromtimestamp(refresh_exp, UTC) expires_in = exp - datetime.now(UTC) result["expires_in"] = math.ceil(expires_in.total_seconds()) - return validated_json(apispec_extras.PostTokenResponse, result) - - # TODO: - # 1. Decode the refresh_token value -> RenkuTokens - # 2. Validate the access_token -> if valid, send back the new OAuth 2.0 access token - # and the new encoded refresh_token - # 3. If access_token is expired, use the renku refresh_token -> if new tokens are valid, - # send back the new OAuth 2.0 access token and the new encoded refresh_token + except Exception as err: + logger.error(f"Could not parse Renku refresh token; cannot determine its expiration: {err.__class__}.") + if oauth_token.expires_at: + exp = datetime.fromtimestamp(oauth_token.expires_at, UTC) + expires_in = exp - datetime.now(UTC) + result_expires_in = result.get("expires_in") + if isinstance(result_expires_in, int) and result_expires_in > 0: + result["expires_in"] = min(result_expires_in, math.ceil(expires_in.total_seconds())) + else: + result["expires_in"] = math.ceil(expires_in.total_seconds()) - raise errors.UnauthorizedError() + return validated_json(apispec_extras.PostTokenResponse, result) return "/oauth2/connections//token_endpoint", ["POST"], _post_token_endpoint From b57a67a96168e1a707f5650e85851e14734020a1 Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Thu, 29 Jan 2026 13:32:37 +0000 Subject: [PATCH 51/54] feat: handle testing data connector with OAuth 2.0 --- bases/renku_data_services/data_api/app.py | 7 +++- .../notebooks/data_sources.py | 41 ++++++++++++++++++- .../renku_data_services/storage/blueprints.py | 14 ++++++- .../renku_data_services/storage/rclone.py | 16 +++++++- 4 files changed, 73 insertions(+), 5 deletions(-) diff --git a/bases/renku_data_services/data_api/app.py b/bases/renku_data_services/data_api/app.py index 8d615a5b4..451053a25 100644 --- a/bases/renku_data_services/data_api/app.py +++ b/bases/renku_data_services/data_api/app.py @@ -115,7 +115,12 @@ def register_all_handlers(app: Sanic, dm: DependencyManager) -> Sanic: storage_repo=dm.storage_repo, authenticator=dm.gitlab_authenticator, ) - storage_schema = StorageSchemaBP(name="storage_schema", url_prefix=url_prefix) + storage_schema = StorageSchemaBP( + name="storage_schema", + url_prefix=url_prefix, + data_source_repo=dm.data_source_repo, + authenticator=dm.authenticator, + ) user_preferences = UserPreferencesBP( name="user_preferences", url_prefix=url_prefix, diff --git a/components/renku_data_services/notebooks/data_sources.py b/components/renku_data_services/notebooks/data_sources.py index 547edf545..3ffb043fd 100644 --- a/components/renku_data_services/notebooks/data_sources.py +++ b/components/renku_data_services/notebooks/data_sources.py @@ -4,7 +4,7 @@ from configparser import ConfigParser from dataclasses import dataclass from io import StringIO -from typing import Any +from typing import TYPE_CHECKING, Any from sanic import Request @@ -19,6 +19,9 @@ from renku_data_services.data_connectors.models import DataConnector, GlobalDataConnector from renku_data_services.notebooks.config import NotebooksConfig +if TYPE_CHECKING: + from renku_data_services.storage.models import RCloneConfig + logger = logging.getLogger(__name__) @@ -124,6 +127,42 @@ async def handle_patching_configuration( parser.write(stringio) return stringio.getvalue() + async def handle_configuration_for_test( + self, user: APIUser, configuration: RCloneConfig | dict[str, Any] + ) -> RCloneConfig | dict[str, Any] | None: + """Ajusts the input configuration if it requires an OAuth2 connection. + + Returns either an rclone configuration or None if the data connector should be skipped. + """ + provider_kind: ProviderKind | None = None + match configuration.get("type"): + case "drive": + provider_kind = ProviderKind.google + case "dropbox": + provider_kind = ProviderKind.dropbox + if provider_kind is None: + return configuration + + provider = await self.connected_services_repo.get_provider_for_kind(user=user, provider_kind=provider_kind) + if provider is None: + return None + connection = provider.connected_user.connection if provider.connected_user else None + if connection is None: + return None + token_set = await self.connected_services_repo.get_token_set(user=user, connection_id=connection.id) + if not token_set or not token_set.access_token: + return None + token_config = { + "access_token": token_set.access_token, + "token_type": "Bearer", + } + if provider_kind == ProviderKind.google: + configuration["scope"] = configuration.get("scope") or "drive" + if token_set.expires_at_iso: + token_config["expiry"] = token_set.expires_at_iso + configuration["token"] = json.dumps(token_config) + return configuration + def _get_oauth2_provider_kind(self, data_connector: DataConnector | GlobalDataConnector) -> ProviderKind | None: """Returns the provider kind for data connectors which require an OAuth2 configuration.""" match data_connector.storage.configuration["type"]: diff --git a/components/renku_data_services/storage/blueprints.py b/components/renku_data_services/storage/blueprints.py index 270adb602..d1539908b 100644 --- a/components/renku_data_services/storage/blueprints.py +++ b/components/renku_data_services/storage/blueprints.py @@ -15,6 +15,7 @@ from renku_data_services.base_api.blueprint import BlueprintFactoryResponse, CustomBlueprint from renku_data_services.base_api.misc import validate_query from renku_data_services.base_models.validation import validated_json +from renku_data_services.notebooks.data_sources import DataSourceRepository from renku_data_services.storage import apispec, models from renku_data_services.storage.db import StorageRepository from renku_data_services.storage.rclone import RCloneValidator @@ -193,6 +194,9 @@ async def _delete(request: Request, user: base_models.APIUser, storage_id: ULID) class StorageSchemaBP(CustomBlueprint): """Handler for getting RClone storage schema.""" + data_source_repo: DataSourceRepository + authenticator: base_models.Authenticator + def get(self) -> BlueprintFactoryResponse: """Get cloud storage for a repository.""" @@ -204,12 +208,18 @@ async def _get(_: Request, validator: RCloneValidator) -> JSONResponse: def test_connection(self) -> BlueprintFactoryResponse: """Validate an RClone config.""" + @authenticate(self.authenticator) @validate(json=apispec.StorageSchemaTestConnectionPostRequest) async def _test_connection( - request: Request, validator: RCloneValidator, body: apispec.StorageSchemaTestConnectionPostRequest + request: Request, + user: base_models.APIUser, + validator: RCloneValidator, + body: apispec.StorageSchemaTestConnectionPostRequest, ) -> HTTPResponse: validator.validate(body.configuration, keep_sensitive=True) - result = await validator.test_connection(body.configuration, body.source_path) + result = await validator.test_connection( + body.configuration, body.source_path, user=user, data_source_repo=self.data_source_repo + ) if not result.success: raise errors.ValidationError(message=result.error) return empty(204) diff --git a/components/renku_data_services/storage/rclone.py b/components/renku_data_services/storage/rclone.py index d6c6f2876..3a4086354 100644 --- a/components/renku_data_services/storage/rclone.py +++ b/components/renku_data_services/storage/rclone.py @@ -23,6 +23,8 @@ logger = logging.getLogger(__name__) if TYPE_CHECKING: + from renku_data_services import base_models + from renku_data_services.notebooks.data_sources import DataSourceRepository from renku_data_services.storage.models import RCloneConfig @@ -88,7 +90,11 @@ def get_real_configuration(self, configuration: Union[RCloneConfig, dict[str, An return real_config async def test_connection( - self, configuration: Union[RCloneConfig, dict[str, Any]], source_path: str + self, + configuration: Union[RCloneConfig, dict[str, Any]], + source_path: str, + user: base_models.APIUser | None = None, + data_source_repo: DataSourceRepository | None = None, ) -> ConnectionResult: """Tests connecting with an RClone config.""" try: @@ -101,6 +107,14 @@ async def test_connection( transformed_config = self.inject_default_values(self.transform_polybox_switchdriver_config(obscured_config)) transformed_config = self.transform_envidat_config(transformed_config) + # Handle testing with Renku integrations + if user is not None and data_source_repo is not None: + with_oauth2_config = await data_source_repo.handle_configuration_for_test( + user=user, configuration=transformed_config + ) + if with_oauth2_config is not None: + transformed_config = with_oauth2_config + with tempfile.NamedTemporaryFile(mode="w+", delete=False, encoding="utf-8") as f: config = "\n".join(f"{k}={v}" for k, v in transformed_config.items()) f.write(f"[temp]\n{config}") From 9119eef3cca7f5f9379825094a03fe3018097594 Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Thu, 29 Jan 2026 14:06:15 +0000 Subject: [PATCH 52/54] fix type annotation --- components/renku_data_services/notebooks/data_sources.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/components/renku_data_services/notebooks/data_sources.py b/components/renku_data_services/notebooks/data_sources.py index 3ffb043fd..347cd580b 100644 --- a/components/renku_data_services/notebooks/data_sources.py +++ b/components/renku_data_services/notebooks/data_sources.py @@ -128,8 +128,8 @@ async def handle_patching_configuration( return stringio.getvalue() async def handle_configuration_for_test( - self, user: APIUser, configuration: RCloneConfig | dict[str, Any] - ) -> RCloneConfig | dict[str, Any] | None: + self, user: APIUser, configuration: "RCloneConfig | dict[str, Any]" + ) -> "RCloneConfig | dict[str, Any] | None": """Ajusts the input configuration if it requires an OAuth2 connection. Returns either an rclone configuration or None if the data connector should be skipped. From f41b22bd0dc88984746d78b7ce02c50dc6c3db48 Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Mon, 2 Feb 2026 07:28:48 +0000 Subject: [PATCH 53/54] fix: add promt=consent for Google --- .../renku_data_services/connected_services/provider_adapters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/renku_data_services/connected_services/provider_adapters.py b/components/renku_data_services/connected_services/provider_adapters.py index 43b6ce656..6de3db9c4 100644 --- a/components/renku_data_services/connected_services/provider_adapters.py +++ b/components/renku_data_services/connected_services/provider_adapters.py @@ -130,7 +130,7 @@ def authorization_url(self) -> str: @property def authorization_url_extra_params(self) -> dict[str, str]: """Extra parameters to add to the auth url.""" - return {"access_type": "offline"} + return {"access_type": "offline", "prompt": "consent"} @property def token_endpoint_url(self) -> str: From 77d05a4094fc1d869c8a262dad3d9315610597c8 Mon Sep 17 00:00:00 2001 From: Flora Thiebaut Date: Mon, 16 Feb 2026 13:01:59 +0000 Subject: [PATCH 54/54] removeme: merge migrations heads --- .../versions/0bfc18c91b05_removeme.py | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 components/renku_data_services/migrations/versions/0bfc18c91b05_removeme.py diff --git a/components/renku_data_services/migrations/versions/0bfc18c91b05_removeme.py b/components/renku_data_services/migrations/versions/0bfc18c91b05_removeme.py new file mode 100644 index 000000000..24bf432dd --- /dev/null +++ b/components/renku_data_services/migrations/versions/0bfc18c91b05_removeme.py @@ -0,0 +1,21 @@ +"""removeme: merge migrations heads + +Revision ID: 0bfc18c91b05 +Revises: 287879848fb3, fddfe7960a8b +Create Date: 2026-02-16 12:37:55.425957 + +""" + +# revision identifiers, used by Alembic. +revision = "0bfc18c91b05" +down_revision = ("287879848fb3", "fddfe7960a8b") +branch_labels = None +depends_on = None + + +def upgrade() -> None: + pass + + +def downgrade() -> None: + pass