diff --git a/deployments/charts/router/templates/service-account.yaml b/deployments/charts/router/templates/service-account.yaml index 4fe0a3710..6815d6c5c 100644 --- a/deployments/charts/router/templates/service-account.yaml +++ b/deployments/charts/router/templates/service-account.yaml @@ -13,7 +13,13 @@ # limitations under the License. # # SPDX-License-Identifier: Apache-2.0 +{{- if .Values.serviceAccount.create }} apiVersion: v1 kind: ServiceAccount metadata: - name: router + name: {{ .Values.serviceAccount.name | default "router" }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/deployments/charts/router/values.yaml b/deployments/charts/router/values.yaml index 5b272d661..221a22c16 100644 --- a/deployments/charts/router/values.yaml +++ b/deployments/charts/router/values.yaml @@ -51,6 +51,23 @@ global: ## k8sLogLevel: WARNING +## Service account configuration for router service +## +serviceAccount: + ## Create the chart-managed ServiceAccount. Set to false to re-use + ## an existing ServiceAccount that already has workload identity bindings. + ## + create: true + + ## ServiceAccount name override. When empty, uses 'router'. + ## + name: "" + + ## Extra ServiceAccount annotations such as + ## `azure.workload.identity/client-id` for AKS workload identity. + ## + annotations: {} + ## Configuration for individual Osmo services ## services: diff --git a/deployments/charts/service/templates/agent-service.yaml b/deployments/charts/service/templates/agent-service.yaml index 605d9bf4b..1ccc02d36 100644 --- a/deployments/charts/service/templates/agent-service.yaml +++ b/deployments/charts/service/templates/agent-service.yaml @@ -31,6 +31,9 @@ spec: metadata: labels: app: {{ .Values.services.agent.serviceName }} + {{- with .Values.services.agent.extraPodLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} annotations: {{- include "osmo.extra-annotations" .Values.services.agent | nindent 8 }} {{- if .Values.sidecars.otel.enabled }} diff --git a/deployments/charts/service/templates/api-service.yaml b/deployments/charts/service/templates/api-service.yaml index 01f2bdba8..e83b3e623 100644 --- a/deployments/charts/service/templates/api-service.yaml +++ b/deployments/charts/service/templates/api-service.yaml @@ -30,6 +30,9 @@ spec: metadata: labels: app: {{ .Values.services.service.serviceName }} + {{- with .Values.services.service.extraPodLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} annotations: {{- include "osmo.extra-annotations" .Values.services.service | nindent 8 }} {{- if .Values.sidecars.otel.enabled }} diff --git a/deployments/charts/service/templates/delayed-job-monitor.yaml b/deployments/charts/service/templates/delayed-job-monitor.yaml index 04e6112d1..b8eb1f550 100644 --- a/deployments/charts/service/templates/delayed-job-monitor.yaml +++ b/deployments/charts/service/templates/delayed-job-monitor.yaml @@ -28,6 +28,9 @@ spec: metadata: labels: app: {{ .Values.services.delayedJobMonitor.serviceName }} + {{- with .Values.services.delayedJobMonitor.extraPodLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} annotations: {{- include "osmo.extra-annotations" .Values.services.delayedJobMonitor | nindent 8 }} {{- if .Values.sidecars.otel.enabled }} diff --git a/deployments/charts/service/templates/logger-service.yaml b/deployments/charts/service/templates/logger-service.yaml index 108110322..7244fa929 100644 --- a/deployments/charts/service/templates/logger-service.yaml +++ b/deployments/charts/service/templates/logger-service.yaml @@ -31,6 +31,9 @@ spec: metadata: labels: app: {{ .Values.services.logger.serviceName }} + {{- with .Values.services.logger.extraPodLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} annotations: {{- include "osmo.extra-annotations" .Values.services.logger | nindent 8 }} {{- if .Values.sidecars.otel.enabled }} diff --git a/deployments/charts/service/templates/service-account.yaml b/deployments/charts/service/templates/service-account.yaml index b2192b2c8..b61a8307f 100644 --- a/deployments/charts/service/templates/service-account.yaml +++ b/deployments/charts/service/templates/service-account.yaml @@ -13,11 +13,18 @@ # limitations under the License. # # SPDX-License-Identifier: Apache-2.0 +{{- if .Values.serviceAccount.create }} apiVersion: v1 kind: ServiceAccount metadata: - name: {{ .Values.global.serviceAccountName }} - {{- if and .Values.sidecars.logAgent.cloudwatch .Values.sidecars.logAgent.cloudwatch.enabled }} + name: {{ .Values.serviceAccount.name | default .Values.global.serviceAccountName }} + {{- if or (and .Values.sidecars.logAgent.cloudwatch .Values.sidecars.logAgent.cloudwatch.enabled) .Values.serviceAccount.annotations }} annotations: + {{- if and .Values.sidecars.logAgent.cloudwatch .Values.sidecars.logAgent.cloudwatch.enabled }} eks.amazonaws.com/role-arn: {{ .Values.sidecars.logAgent.cloudwatch.role }} + {{- end }} + {{- with .Values.serviceAccount.annotations }} + {{- toYaml . | nindent 4 }} + {{- end }} {{- end }} +{{- end }} diff --git a/deployments/charts/service/templates/worker.yaml b/deployments/charts/service/templates/worker.yaml index a8558e4c4..ecfe72e76 100644 --- a/deployments/charts/service/templates/worker.yaml +++ b/deployments/charts/service/templates/worker.yaml @@ -27,6 +27,9 @@ spec: metadata: labels: app: {{ .Values.services.worker.serviceName }} + {{- with .Values.services.worker.extraPodLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} annotations: {{- include "osmo.extra-annotations" .Values.services.worker | nindent 8 }} {{- if .Values.sidecars.otel.enabled }} diff --git a/deployments/charts/service/values.yaml b/deployments/charts/service/values.yaml index 52df58386..21228f360 100644 --- a/deployments/charts/service/values.yaml +++ b/deployments/charts/service/values.yaml @@ -55,6 +55,23 @@ global: ## k8sLogLevel: WARNING +## Service account configuration for core Osmo services +## +serviceAccount: + ## Create the chart-managed ServiceAccount. Set to false to re-use + ## an existing ServiceAccount that already has workload identity bindings. + ## + create: true + + ## ServiceAccount name override. When empty, uses global.serviceAccountName. + ## + name: "" + + ## Extra ServiceAccount annotations such as + ## `azure.workload.identity/client-id` for AKS workload identity. + ## + annotations: {} + ## Configuration for individual Osmo services ## services: @@ -335,6 +352,11 @@ services: ## extraPodAnnotations: {} + ## Extra pod labels for the delayed job monitor service pods. + ## Use for workload identity labels like `azure.workload.identity/use`. + ## + extraPodLabels: {} + ## Extra environment variables for the delayed job monitor service container ## extraEnv: [] @@ -439,6 +461,11 @@ services: ## extraPodAnnotations: {} + ## Extra pod labels for the worker service pods. + ## Use for workload identity labels like `azure.workload.identity/use`. + ## + extraPodLabels: {} + ## Extra environment variables for the worker service container ## extraEnv: [] @@ -663,6 +690,11 @@ services: ## extraPodAnnotations: {} + ## Extra pod labels for the API service pods. + ## Use for workload identity labels like `azure.workload.identity/use`. + ## + extraPodLabels: {} + ## Extra environment variables for the API service container ## extraEnv: [] @@ -775,6 +807,11 @@ services: ## extraPodAnnotations: {} + ## Extra pod labels for the logger service pods. + ## Use for workload identity labels like `azure.workload.identity/use`. + ## + extraPodLabels: {} + ## Extra environment variables for the logger service container ## extraEnv: [] @@ -884,6 +921,11 @@ services: ## extraPodAnnotations: {} + ## Extra pod labels for the agent service pods. + ## Use for workload identity labels like `azure.workload.identity/use`. + ## + extraPodLabels: {} + ## Extra environment variables for the agent service container ## extraEnv: [] diff --git a/src/lib/data/storage/backends/azure.py b/src/lib/data/storage/backends/azure.py index a0079a0be..ec5c86004 100644 --- a/src/lib/data/storage/backends/azure.py +++ b/src/lib/data/storage/backends/azure.py @@ -24,9 +24,11 @@ import os import re from typing import Any, Callable, Generator, Iterator, List, Tuple, Type + from typing_extensions import assert_never, override from azure.core import exceptions +from azure.identity import DefaultAzureCredential from azure.storage import blob from .. import credentials @@ -271,7 +273,22 @@ def __next__(self) -> bytes: return chunk -def create_client(data_cred: credentials.DataCredential) -> blob.BlobServiceClient: +def _extract_account_key_from_connection_string(connection_string: str) -> str: + """Extract AccountKey from Azure Storage connection string. + + Connection strings use semicolon-delimited key=value format: + DefaultEndpointsProtocol=https;AccountName=...;AccountKey=...;EndpointSuffix=... + """ + for part in connection_string.split(';'): + if part.startswith('AccountKey='): + return part[len('AccountKey='):] + raise ValueError('AccountKey not found in connection string') + + +def create_client( + data_cred: credentials.DataCredential, + account_url: str | None = None, +) -> blob.BlobServiceClient: """ Creates a new Azure Blob Storage client. """ @@ -281,8 +298,12 @@ def create_client(data_cred: credentials.DataCredential) -> blob.BlobServiceClie conn_str=data_cred.access_key.get_secret_value(), ) case credentials.DefaultDataCredential(): - raise NotImplementedError( - 'Default data credentials are not supported yet') + if account_url is None: + raise ValueError('account_url required for DefaultDataCredential') + return blob.BlobServiceClient( + account_url=account_url, + credential=DefaultAzureCredential(), + ) case _ as unreachable: assert_never(unreachable) @@ -292,15 +313,17 @@ class AzureBlobStorageClient(client.StorageClient): A concrete implementation of the StorageClient interface for Azure Blob Storage. """ _azure_client: blob.BlobServiceClient - + _data_cred: credentials.DataCredential _azure_error_handler: AzureErrorHandler def __init__( self, azure_client_factory: Callable[[], blob.BlobServiceClient], + data_cred: credentials.DataCredential, ): super().__init__() self._azure_client = azure_client_factory() + self._data_cred = data_cred self._azure_error_handler = AzureErrorHandler() @override @@ -715,22 +738,47 @@ def copy( Raises: src.lib.utils.data.client.OSMODataStorageClientError """ - def _get_sas_url_for_copy(source_blob_client: blob.BlobClient) -> str: + def _get_sas_url_for_copy( + source_blob_client: blob.BlobClient, + service_client: blob.BlobServiceClient, + ) -> str: """ Generate a SAS URL for the source blob that can be used for copy operations. - This is necessary to authorize the copy operation. + Uses account key for static credentials, user delegation key for token credentials. """ - assert hasattr(source_blob_client.credential, 'account_key') - - sas_token = blob.generate_blob_sas( - account_name=source_blob_client.account_name, - container_name=source_blob_client.container_name, - blob_name=source_blob_client.blob_name, - account_key=source_blob_client.credential.account_key, - permission=blob.BlobSasPermissions(read=True), - expiry=common.current_time() + _get_copy_sas_expiry_time(), - ) + key_start_time = common.current_time().replace(tzinfo=datetime.timezone.utc) + key_expiry_time = key_start_time + _get_copy_sas_expiry_time() + + match self._data_cred: + case credentials.StaticDataCredential(): + account_key = _extract_account_key_from_connection_string( + self._data_cred.access_key.get_secret_value(), + ) + sas_token = blob.generate_blob_sas( + account_name=source_blob_client.account_name, + container_name=source_blob_client.container_name, + blob_name=source_blob_client.blob_name, + account_key=account_key, + permission=blob.BlobSasPermissions(read=True), + expiry=key_expiry_time, + ) + case credentials.DefaultDataCredential(): + user_delegation_key = service_client.get_user_delegation_key( + key_start_time=key_start_time, + key_expiry_time=key_expiry_time, + ) + sas_token = blob.generate_blob_sas( + account_name=source_blob_client.account_name, + container_name=source_blob_client.container_name, + blob_name=source_blob_client.blob_name, + user_delegation_key=user_delegation_key, + permission=blob.BlobSasPermissions(read=True), + expiry=key_expiry_time, + ) + case _ as unreachable: + assert_never(unreachable) + return f'{source_blob_client.url}?{sas_token}' def _call_api() -> client.CopyResponse: @@ -745,7 +793,7 @@ def _call_api() -> client.CopyResponse: # Copy source blob to destination blob. destination_blob_client.upload_blob_from_url( - _get_sas_url_for_copy(source_blob_client), + _get_sas_url_for_copy(source_blob_client, self._azure_client), ) blob_properties = destination_blob_client.get_blob_properties() @@ -831,9 +879,14 @@ class AzureBlobStorageClientFactory(provider.StorageClientFactory): """ data_cred: credentials.DataCredential + account_url: str @override def create(self) -> AzureBlobStorageClient: return AzureBlobStorageClient( - lambda: create_client(self.data_cred), + azure_client_factory=lambda: create_client( + self.data_cred, + account_url=self.account_url, + ), + data_cred=self.data_cred, ) diff --git a/src/lib/data/storage/backends/backends.py b/src/lib/data/storage/backends/backends.py index e6abfd3f1..bee84fd14 100644 --- a/src/lib/data/storage/backends/backends.py +++ b/src/lib/data/storage/backends/backends.py @@ -908,7 +908,10 @@ def data_auth( data_cred = self.resolved_data_credential def _validate_auth(): - with azure.create_client(data_cred) as service_client: + with azure.create_client( + data_cred, + account_url=self.auth_endpoint, + ) as service_client: if self.container: with service_client.get_container_client(self.container) as container_client: container_client.get_container_properties() @@ -954,7 +957,10 @@ def client_factory( if data_cred is None: data_cred = self.resolved_data_credential - return azure.AzureBlobStorageClientFactory(data_cred=data_cred) + return azure.AzureBlobStorageClientFactory( + data_cred=data_cred, + account_url=self.auth_endpoint, + ) def construct_storage_backend( diff --git a/src/lib/data/storage/backends/tests/BUILD b/src/lib/data/storage/backends/tests/BUILD index 05e2ac1f5..c962ebccd 100644 --- a/src/lib/data/storage/backends/tests/BUILD +++ b/src/lib/data/storage/backends/tests/BUILD @@ -25,5 +25,6 @@ osmo_py_test( "//src/lib/data/storage/backends", "//src/lib/data/storage/core", "//src/lib/data/storage/credentials", + "//src/utils/connectors", ], ) diff --git a/src/lib/data/storage/backends/tests/test_backends.py b/src/lib/data/storage/backends/tests/test_backends.py index f0f4e65b2..545991bd2 100644 --- a/src/lib/data/storage/backends/tests/test_backends.py +++ b/src/lib/data/storage/backends/tests/test_backends.py @@ -22,10 +22,11 @@ from typing import cast from unittest import mock -from src.lib.data.storage.backends import backends, s3 +from src.lib.data.storage.backends import azure, backends, s3 from src.lib.data.storage.credentials import credentials from src.lib.data.storage.core import header from src.lib.utils import osmo_errors +from src.utils.connectors import postgres class TestBackends(unittest.TestCase): @@ -223,5 +224,108 @@ def test_environment_auth_support(self, mock_get_config): self.assertIn(expected_profile, str(context.exception)) +class AzureDefaultDataCredentialTest(unittest.TestCase): + """Tests for Azure DefaultDataCredential support.""" + + @mock.patch('src.lib.data.storage.backends.azure.DefaultAzureCredential') + @mock.patch('src.lib.data.storage.backends.azure.blob.BlobServiceClient') + def test_create_client_with_default_credential( + self, + mock_blob_client, + mock_azure_cred, + ): + """Test create_client uses DefaultAzureCredential.""" + # Arrange + mock_credential_instance = mock.Mock() + mock_azure_cred.return_value = mock_credential_instance + + data_cred = credentials.DefaultDataCredential( + endpoint='azure://mystorageaccount', + region=None, + ) + + # Act + azure.create_client( + data_cred, + account_url='https://mystorageaccount.blob.core.windows.net', + ) + + # Assert + mock_azure_cred.assert_called_once() + mock_blob_client.assert_called_once_with( + account_url='https://mystorageaccount.blob.core.windows.net', + credential=mock_credential_instance, + ) + + +class ExtractAccountKeyFromConnectionStringTest(unittest.TestCase): + """Tests for account key extraction from Azure connection strings.""" + + def test_standard_connection_string(self): + """Test extraction from standard Azure Storage connection string.""" + conn_str = ( + 'DefaultEndpointsProtocol=https;' + 'AccountName=mystorageaccount;' + 'AccountKey=abc123def456ghi789;' + 'EndpointSuffix=core.windows.net' + ) + # pylint: disable=protected-access + result = azure._extract_account_key_from_connection_string(conn_str) + self.assertEqual(result, 'abc123def456ghi789') + + def test_connection_string_with_base64_key(self): + """Test extraction when key contains base64 characters including equals.""" + conn_str = ( + 'DefaultEndpointsProtocol=https;' + 'AccountName=mystorageaccount;' + 'AccountKey=abc123+def/456==;' + 'EndpointSuffix=core.windows.net' + ) + # pylint: disable=protected-access + result = azure._extract_account_key_from_connection_string(conn_str) + self.assertEqual(result, 'abc123+def/456==') + + def test_missing_account_key_raises(self): + """Test that missing AccountKey raises ValueError.""" + conn_str = 'DefaultEndpointsProtocol=https;AccountName=mystorageaccount' + with self.assertRaises(ValueError) as context: + # pylint: disable=protected-access + azure._extract_account_key_from_connection_string(conn_str) + self.assertIn('AccountKey not found', str(context.exception)) + + +class WorkflowConfigCredentialTest(unittest.TestCase): + """Tests for WorkflowConfig credential type support.""" + + def test_workflow_config_with_static_credential(self): + """Test WorkflowConfig accepts StaticDataCredential.""" + static_cred = credentials.StaticDataCredential( + endpoint='s3://bucket.io/workflows', + access_key_id='mykey', + access_key='mysecret', + region='us-east-1', + ) + + # Act + config = postgres.WorkflowConfig( + workflow_data=postgres.DataConfig(credential=static_cred), + ) + + # Assert + self.assertIsInstance( + config.workflow_data.credential, + credentials.StaticDataCredential, + ) + + def test_workflow_config_with_null_credential(self): + """Test WorkflowConfig accepts None credential.""" + config = postgres.WorkflowConfig( + workflow_data=postgres.DataConfig(credential=None), + ) + + # Assert + self.assertIsNone(config.workflow_data.credential) + + if __name__ == '__main__': unittest.main() diff --git a/src/lib/data/storage/credentials/credentials.py b/src/lib/data/storage/credentials/credentials.py index 878688465..467decb56 100644 --- a/src/lib/data/storage/credentials/credentials.py +++ b/src/lib/data/storage/credentials/credentials.py @@ -88,12 +88,27 @@ class DefaultDataCredential(DataCredentialBase, extra=pydantic.Extra.forbid): Data credential that delegates resolution to the underlying SDK. Uses the SDK's default credential chain (e.g., Azure's DefaultAzureCredential, - boto3's credential resolution) which may include environment variables, + boto3's credential resolution) which may include environment variables, workload identity, instance metadata, and other provider-specific methods. Intentionally left empty as all credential resolution is handled by the SDK. """ - pass + + def to_decrypted_dict(self) -> dict[str, str]: + """Return credential dict for SDK-based authentication. + + For DefaultDataCredential, only endpoint and region are provided. + The actual credential resolution occurs at runtime via the SDK's + default credential chain (workload identity, managed identity, etc.). + """ + output = { + 'endpoint': self.endpoint, + } + + if self.region: + output['region'] = self.region + + return output DataCredential = Union[ diff --git a/src/lib/data/storage/extra_hooks/hook-azure.py b/src/lib/data/storage/extra_hooks/hook-azure.py index 66fb40bde..77f47bf0b 100644 --- a/src/lib/data/storage/extra_hooks/hook-azure.py +++ b/src/lib/data/storage/extra_hooks/hook-azure.py @@ -18,30 +18,43 @@ SPDX-License-Identifier: Apache-2.0 """ -from PyInstaller.utils import hooks # type: ignore +from PyInstaller.utils.hooks import collect_all, collect_submodules # type: ignore -# Collect entry points -datas_set = set() -hiddenimports_set = set() +warn_on_missing_hiddenimports = False -data_files = ( - 'azure', +datas = [] +binaries = [] +hiddenimports = [] + +packages_to_collect = [ + 'azure.core', + 'azure.identity', 'azure.storage', + 'azure.storage.blob', + 'msal', + 'msal_extensions', 'isodate', -) +] -for data_file in data_files: - datas_set.update(hooks.collect_data_files(data_file, include_py_files=True)) +for package in packages_to_collect: + try: + pkg_datas, pkg_binaries, pkg_hiddenimports = collect_all(package) + datas.extend(pkg_datas) + binaries.extend(pkg_binaries) + hiddenimports.extend(pkg_hiddenimports) + except Exception: # pylint: disable=broad-except + pass hiddenimports_files = ( 'cryptography.hazmat.primitives.ciphers.aead', 'cryptography.hazmat.primitives.padding', 'wsgiref', + 'wsgiref.handlers', ) -# Add hidden imports for hiddenimport_file in hiddenimports_files: - hiddenimports_set.update(hooks.collect_submodules(hiddenimport_file)) + hiddenimports.extend(collect_submodules(hiddenimport_file)) -datas = list(datas_set) -hiddenimports = list(hiddenimports_set) +datas = list(set(datas)) +binaries = list(set(binaries)) +hiddenimports = list(set(hiddenimports)) diff --git a/src/lib/utils/credentials.py b/src/lib/utils/credentials.py index 176adb972..672378837 100644 --- a/src/lib/utils/credentials.py +++ b/src/lib/utils/credentials.py @@ -20,6 +20,8 @@ from ..data.storage import credentials +DataCredential = credentials.DataCredential +DefaultDataCredential = credentials.DefaultDataCredential StaticDataCredential = credentials.StaticDataCredential get_static_data_credential_from_config = credentials.get_static_data_credential_from_config