From bf97b64a038039ec69282497215d6a816a7abc2b Mon Sep 17 00:00:00 2001 From: Ben Willoughby Date: Thu, 14 May 2026 18:01:43 +0100 Subject: [PATCH 1/2] Support IAM role / default credential chain for S3 auth Make s3_access_key and s3_secret_key Optional[str] (default None). When unset, boto3 falls through to its standard credential chain: env vars, ~/.aws/credentials, EC2/ECS instance metadata (IAM role). Extract S3Config.make_client() to centralise boto3 client construction. Update all four call sites to use make_client() and remove now-redundant direct boto3 imports. Fix test fixtures to patch make_client() directly. Existing behaviour with explicit keys (Garage, MinIO, etc.) is unchanged. --- src/config/__init__.py | 4 ++-- src/config/s3.py | 34 ++++++++++++++++++++++++++------- src/services/file.py | 9 +-------- src/services/health.py | 9 +-------- src/services/state_archival.py | 9 +-------- src/utils/config_validator.py | 9 +-------- tests/conftest.py | 3 +-- tests/unit/test_file_service.py | 3 +-- 8 files changed, 35 insertions(+), 45 deletions(-) diff --git a/src/config/__init__.py b/src/config/__init__.py index bd13e27..52f367c 100644 --- a/src/config/__init__.py +++ b/src/config/__init__.py @@ -145,8 +145,8 @@ class Settings(BaseSettings): # S3 Storage Configuration s3_endpoint: str = Field(default="localhost:3900") - s3_access_key: str = Field(default="test-access-key", min_length=3) - s3_secret_key: str = Field(default="test-secret-key", min_length=8) + s3_access_key: Optional[str] = Field(default=None) + s3_secret_key: Optional[str] = Field(default=None) s3_secure: bool = Field(default=False) s3_bucket: str = Field(default="code-interpreter-files") s3_region: str = Field(default="garage") diff --git a/src/config/s3.py b/src/config/s3.py index 0293279..a1fd85a 100644 --- a/src/config/s3.py +++ b/src/config/s3.py @@ -1,19 +1,25 @@ """S3-compatible object storage configuration.""" +from typing import Any, Optional + +import boto3 from pydantic import Field from pydantic_settings import BaseSettings class S3Config(BaseSettings): - """S3-compatible storage settings (Garage, AWS S3, etc.).""" + """S3-compatible storage settings (Garage, AWS S3, etc.). + + When ``access_key`` and ``secret_key`` are ``None`` (the default), boto3 + uses its standard credential chain — environment variables, + ``~/.aws/credentials``, and EC2/ECS instance metadata (IAM role). Set + them explicitly only when connecting to a non-AWS S3-compatible service + such as Garage or MinIO that requires static credentials. + """ endpoint: str = Field(default="localhost:3900", alias="s3_endpoint") - access_key: str = Field( - default="test-access-key", min_length=3, alias="s3_access_key" - ) - secret_key: str = Field( - default="test-secret-key", min_length=8, alias="s3_secret_key" - ) + access_key: Optional[str] = Field(default=None, alias="s3_access_key") + secret_key: Optional[str] = Field(default=None, alias="s3_secret_key") secure: bool = Field(default=False, alias="s3_secure") bucket: str = Field(default="code-interpreter-files", alias="s3_bucket") region: str = Field(default="garage", alias="s3_region") @@ -24,6 +30,20 @@ def endpoint_url(self) -> str: scheme = "https" if self.secure else "http" return f"{scheme}://{self.endpoint}" + def make_client(self) -> Any: + """Return a configured boto3 S3 client. + + Credentials are passed explicitly only when both ``access_key`` and + ``secret_key`` are set. When they are ``None``, boto3 falls through to + its default credential chain (env vars, ``~/.aws/credentials``, EC2/ECS + instance metadata). + """ + kwargs: dict[str, Any] = {"endpoint_url": self.endpoint_url, "region_name": self.region} + if self.access_key and self.secret_key: + kwargs["aws_access_key_id"] = self.access_key + kwargs["aws_secret_access_key"] = self.secret_key + return boto3.client("s3", **kwargs) + class Config: env_prefix = "" extra = "ignore" diff --git a/src/services/file.py b/src/services/file.py index 5d12c01..939bd9d 100644 --- a/src/services/file.py +++ b/src/services/file.py @@ -6,7 +6,6 @@ from typing import List, Optional, Tuple, Dict, Any # Third-party imports -import boto3 import redis.asyncio as redis import structlog from botocore.exceptions import ClientError @@ -25,13 +24,7 @@ class FileService(FileServiceInterface): def __init__(self): """Initialize the file service with S3 and Redis clients.""" - self.s3_client = boto3.client( - "s3", - endpoint_url=settings.s3.endpoint_url, - aws_access_key_id=settings.s3_access_key, - aws_secret_access_key=settings.s3_secret_key, - region_name=settings.s3_region, - ) + self.s3_client = settings.s3.make_client() # Initialize Redis client self.redis_client = redis.from_url( diff --git a/src/services/health.py b/src/services/health.py index 0027c51..f780ba1 100644 --- a/src/services/health.py +++ b/src/services/health.py @@ -11,7 +11,6 @@ from typing import Dict, Any, Optional # Third-party imports -import boto3 import redis.asyncio as redis import structlog from botocore.exceptions import ClientError @@ -224,13 +223,7 @@ async def check_s3(self) -> HealthCheckResult: try: if not self._s3_client: - self._s3_client = boto3.client( - "s3", - endpoint_url=settings.s3.endpoint_url, - aws_access_key_id=settings.s3_access_key, - aws_secret_access_key=settings.s3_secret_key, - region_name=settings.s3_region, - ) + self._s3_client = settings.s3.make_client() loop = asyncio.get_event_loop() buckets_resp = await loop.run_in_executor( diff --git a/src/services/state_archival.py b/src/services/state_archival.py index 2d05d12..915a9d1 100644 --- a/src/services/state_archival.py +++ b/src/services/state_archival.py @@ -22,7 +22,6 @@ from datetime import datetime, timezone from typing import Optional, Dict, Any -import boto3 import structlog from botocore.exceptions import ClientError @@ -58,13 +57,7 @@ def __init__( s3_client: Optional boto3 S3 client (creates new one if not provided) """ self.state_service = state_service or StateService() - self.s3_client = s3_client or boto3.client( - "s3", - endpoint_url=settings.s3.endpoint_url, - aws_access_key_id=settings.s3_access_key, - aws_secret_access_key=settings.s3_secret_key, - region_name=settings.s3_region, - ) + self.s3_client = s3_client or settings.s3.make_client() self.bucket_name = settings.s3_bucket self._bucket_checked = False diff --git a/src/utils/config_validator.py b/src/utils/config_validator.py index 237ed6a..4f24f8f 100644 --- a/src/utils/config_validator.py +++ b/src/utils/config_validator.py @@ -4,7 +4,6 @@ import shutil from typing import List, Dict, Any import redis -import boto3 from botocore.exceptions import ClientError from ..config import settings @@ -124,13 +123,7 @@ def _validate_redis_connection(self): def _validate_s3_connection(self): """Validate S3 storage connection.""" try: - client = boto3.client( - "s3", - endpoint_url=settings.s3.endpoint_url, - aws_access_key_id=settings.s3_access_key, - aws_secret_access_key=settings.s3_secret_key, - region_name=settings.s3_region, - ) + client = settings.s3.make_client() # Test connection by listing buckets response = client.list_buckets() diff --git a/tests/conftest.py b/tests/conftest.py index 37e432c..bad9dd1 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -115,10 +115,9 @@ def execution_service(mock_sandbox_manager): @pytest.fixture def file_service(mock_s3_client, mock_redis): """Create FileService instance with mocked dependencies.""" - with patch("src.services.file.boto3") as mock_boto3, patch( + with patch("src.config.s3.S3Config.make_client", return_value=mock_s3_client), patch( "src.services.file.redis.from_url", return_value=mock_redis ): - mock_boto3.client.return_value = mock_s3_client service = FileService() yield service diff --git a/tests/unit/test_file_service.py b/tests/unit/test_file_service.py index 58aebb6..f838171 100644 --- a/tests/unit/test_file_service.py +++ b/tests/unit/test_file_service.py @@ -39,8 +39,7 @@ def mock_redis_client(): @pytest.fixture def file_service(mock_s3_client, mock_redis_client): """Create FileService with mocked clients.""" - with patch("src.services.file.boto3") as mock_boto3: - mock_boto3.client.return_value = mock_s3_client + with patch("src.config.s3.S3Config.make_client", return_value=mock_s3_client): with patch("src.services.file.redis.from_url") as mock_redis_from_url: mock_redis_from_url.return_value = mock_redis_client service = FileService() From 64474ae825533758cf88dff8cc706617d3065bb5 Mon Sep 17 00:00:00 2001 From: Ben Willoughby Date: Fri, 15 May 2026 09:21:38 +0100 Subject: [PATCH 2/2] =?UTF-8?q?fix:=20address=20PR=20review=20=E2=80=94=20?= =?UTF-8?q?head=5Fbucket=20and=20partial-creds=20validation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Replace list_buckets() with head_bucket() in config_validator and health check; least-privilege IAM roles need only s3:ListBucket on the configured bucket, not s3:ListAllMyBuckets - Raise ValueError in make_client() when exactly one of S3_ACCESS_KEY / S3_SECRET_KEY is set — partial static credentials are a misconfiguration - Remove total_buckets from health check details (no longer available) --- src/config/s3.py | 13 ++++++++++++- src/services/health.py | 7 +------ src/utils/config_validator.py | 22 ++++++++++------------ 3 files changed, 23 insertions(+), 19 deletions(-) diff --git a/src/config/s3.py b/src/config/s3.py index a1fd85a..c9e1d4a 100644 --- a/src/config/s3.py +++ b/src/config/s3.py @@ -37,8 +37,19 @@ def make_client(self) -> Any: ``secret_key`` are set. When they are ``None``, boto3 falls through to its default credential chain (env vars, ``~/.aws/credentials``, EC2/ECS instance metadata). + + Raises ``ValueError`` when exactly one of the pair is set — partial + static config is always a misconfiguration. """ - kwargs: dict[str, Any] = {"endpoint_url": self.endpoint_url, "region_name": self.region} + if bool(self.access_key) != bool(self.secret_key): + raise ValueError( + "S3_ACCESS_KEY and S3_SECRET_KEY must both be set or both be unset. " + "Partial static credentials are not supported." + ) + kwargs: dict[str, Any] = { + "endpoint_url": self.endpoint_url, + "region_name": self.region, + } if self.access_key and self.secret_key: kwargs["aws_access_key_id"] = self.access_key kwargs["aws_secret_access_key"] = self.secret_key diff --git a/src/services/health.py b/src/services/health.py index f780ba1..e86f5c4 100644 --- a/src/services/health.py +++ b/src/services/health.py @@ -226,12 +226,8 @@ async def check_s3(self) -> HealthCheckResult: self._s3_client = settings.s3.make_client() loop = asyncio.get_event_loop() - buckets_resp = await loop.run_in_executor( - None, self._s3_client.list_buckets - ) - buckets = buckets_resp.get("Buckets", []) - # Check if our bucket exists + # Check if our bucket exists; create it if not try: await loop.run_in_executor( None, @@ -293,7 +289,6 @@ async def check_s3(self) -> HealthCheckResult: "endpoint": settings.s3_endpoint, "bucket": settings.s3_bucket, "bucket_exists": bucket_exists, - "total_buckets": len(buckets), "secure": settings.s3_secure, } diff --git a/src/utils/config_validator.py b/src/utils/config_validator.py index 4f24f8f..36b8ae5 100644 --- a/src/utils/config_validator.py +++ b/src/utils/config_validator.py @@ -125,18 +125,16 @@ def _validate_s3_connection(self): try: client = settings.s3.make_client() - # Test connection by listing buckets - response = client.list_buckets() - buckets = response.get("Buckets", []) - - # Check if our bucket exists - bucket_exists = any( - bucket["Name"] == settings.s3_bucket for bucket in buckets - ) - if not bucket_exists: - self.warnings.append( - f"S3 bucket '{settings.s3_bucket}' does not exist - will be created" - ) + try: + client.head_bucket(Bucket=settings.s3_bucket) + except ClientError as e: + code = e.response["Error"]["Code"] + if code in ("404", "NoSuchBucket"): + self.warnings.append( + f"S3 bucket '{settings.s3_bucket}' does not exist" + ) + else: + raise except ClientError as e: if settings.api_debug: