diff --git a/server/src/api/lifecycle.py b/server/src/api/lifecycle.py index cd4e7ca4b..b5ac92eef 100644 --- a/server/src/api/lifecycle.py +++ b/server/src/api/lifecycle.py @@ -38,13 +38,15 @@ Sandbox, SandboxFilter, ) -from src.services.factory import create_sandbox_service +from src.services.factory import create_pool_service, create_sandbox_service +from src.services.k8s.pool_service import PoolService # Initialize router router = APIRouter(tags=["Sandboxes"]) # Initialize service based on configuration from config.toml (defaults to docker) sandbox_service = create_sandbox_service() +warm_pool_service: Optional[PoolService] = create_pool_service() # ============================================================================ diff --git a/server/src/api/pool.py b/server/src/api/pool.py index f2764bc78..a5b47cfce 100644 --- a/server/src/api/pool.py +++ b/server/src/api/pool.py @@ -19,9 +19,7 @@ These endpoints are only available when the runtime is configured as 'kubernetes'. """ -from typing import Optional - -from fastapi import APIRouter, Header, status +from fastapi import APIRouter, Path, status from fastapi.exceptions import HTTPException from fastapi.responses import Response @@ -32,42 +30,49 @@ PoolResponse, UpdatePoolRequest, ) -from src.config import get_config from src.services.constants import SandboxErrorCodes -router = APIRouter(tags=["Pools"]) +router = APIRouter(tags=["WarmPools"]) + +# Align path validation with OpenAPI `WarmPoolName` / Kubernetes DNS subdomain rules. +_POOL_NAME_PATH = Path( + ..., + pattern=r"^[a-z0-9]([-a-z0-9]*[a-z0-9])?$", + max_length=253, + description="Pool name (Kubernetes metadata.name).", +) _POOL_NOT_K8S_DETAIL = { "code": SandboxErrorCodes.K8S_POOL_NOT_SUPPORTED, "message": "Pool management is only available when runtime.type is 'kubernetes'.", } +_POOL_BATCHSANDBOX_ONLY_DETAIL = { + "code": SandboxErrorCodes.K8S_POOL_NOT_SUPPORTED, + "message": ( + "Pool management is only available when kubernetes.workload_provider is " + "'batchsandbox' (WarmPool is used with BatchSandbox workloads)." + ), +} + def _get_pool_service(): """ - Lazily create the PoolService, raising 501 if the runtime is not Kubernetes. + Return the process-wide ``PoolService`` (see ``lifecycle.warm_pool_service``). - This deferred approach means the pool router can be registered unconditionally - in main.py; non-k8s deployments simply receive a clear 501 on every call. + Resolved via ``lifecycle.warm_pool_service``, created by ``create_pool_service()`` + at import (Kubernetes + ``batchsandbox`` workload provider only). Raises 501 when + WarmPool is not available for the current configuration. """ - from src.services.k8s.client import K8sClient - from src.services.k8s.pool_service import PoolService - - config = get_config() - if config.runtime.type != "kubernetes": - raise HTTPException( - status_code=status.HTTP_501_NOT_IMPLEMENTED, - detail=_POOL_NOT_K8S_DETAIL, - ) + from src.api.lifecycle import warm_pool_service - if not config.kubernetes: + if warm_pool_service is None: raise HTTPException( status_code=status.HTTP_501_NOT_IMPLEMENTED, - detail=_POOL_NOT_K8S_DETAIL, + detail=_POOL_BATCHSANDBOX_ONLY_DETAIL, ) - k8s_client = K8sClient(config.kubernetes) - return PoolService(k8s_client, namespace=config.kubernetes.namespace) + return warm_pool_service # ============================================================================ @@ -75,7 +80,7 @@ def _get_pool_service(): # ============================================================================ @router.post( - "/pools", + "/warmpools", response_model=PoolResponse, response_model_exclude_none=True, status_code=status.HTTP_201_CREATED, @@ -83,6 +88,12 @@ def _get_pool_service(): 201: {"description": "Pool created successfully"}, 400: {"model": ErrorResponse, "description": "The request was invalid or malformed"}, 401: {"model": ErrorResponse, "description": "Authentication credentials are missing or invalid"}, + 422: { + "description": ( + "Request validation failed: JSON body did not match the schema " + "(FastAPI/Pydantic `detail` array)." + ), + }, 409: {"model": ErrorResponse, "description": "A pool with the same name already exists"}, 501: {"model": ErrorResponse, "description": "Pool management is not supported in this runtime"}, 500: {"model": ErrorResponse, "description": "An unexpected server error occurred"}, @@ -90,7 +101,6 @@ def _get_pool_service(): ) async def create_pool( request: CreatePoolRequest, - x_request_id: Optional[str] = Header(None, alias="X-Request-ID"), ) -> PoolResponse: """ Create a pre-warmed resource pool. @@ -101,7 +111,6 @@ async def create_pool( Args: request: Pool creation request including name, pod template, and capacity spec. - x_request_id: Optional request tracing identifier. Returns: PoolResponse: The newly created pool. @@ -111,27 +120,26 @@ async def create_pool( @router.get( - "/pools", + "/warmpools", response_model=ListPoolsResponse, response_model_exclude_none=True, responses={ 200: {"description": "List of pools"}, 401: {"model": ErrorResponse, "description": "Authentication credentials are missing or invalid"}, 501: {"model": ErrorResponse, "description": "Pool management is not supported in this runtime"}, + 503: { + "model": ErrorResponse, + "description": "Pool list API unavailable (e.g. CRD not installed or wrong API version)", + }, 500: {"model": ErrorResponse, "description": "An unexpected server error occurred"}, }, ) -async def list_pools( - x_request_id: Optional[str] = Header(None, alias="X-Request-ID"), -) -> ListPoolsResponse: +async def list_pools() -> ListPoolsResponse: """ List all pre-warmed resource pools. Returns all Pool resources in the configured namespace. - Args: - x_request_id: Optional request tracing identifier. - Returns: ListPoolsResponse: Collection of all pools. """ @@ -140,27 +148,30 @@ async def list_pools( @router.get( - "/pools/{pool_name}", + "/warmpools/{pool_name}", response_model=PoolResponse, response_model_exclude_none=True, responses={ 200: {"description": "Pool retrieved successfully"}, 401: {"model": ErrorResponse, "description": "Authentication credentials are missing or invalid"}, 404: {"model": ErrorResponse, "description": "The requested pool does not exist"}, + 422: { + "description": ( + "Path parameter `pool_name` failed validation (invalid Kubernetes resource name pattern)." + ), + }, 501: {"model": ErrorResponse, "description": "Pool management is not supported in this runtime"}, 500: {"model": ErrorResponse, "description": "An unexpected server error occurred"}, }, ) async def get_pool( - pool_name: str, - x_request_id: Optional[str] = Header(None, alias="X-Request-ID"), + pool_name: str = _POOL_NAME_PATH, ) -> PoolResponse: """ Retrieve a pool by name. Args: pool_name: Name of the pool to retrieve. - x_request_id: Optional request tracing identifier. Returns: PoolResponse: Current state of the pool including runtime status. @@ -170,7 +181,7 @@ async def get_pool( @router.put( - "/pools/{pool_name}", + "/warmpools/{pool_name}", response_model=PoolResponse, response_model_exclude_none=True, responses={ @@ -178,14 +189,18 @@ async def get_pool( 400: {"model": ErrorResponse, "description": "The request was invalid or malformed"}, 401: {"model": ErrorResponse, "description": "Authentication credentials are missing or invalid"}, 404: {"model": ErrorResponse, "description": "The requested pool does not exist"}, + 422: { + "description": ( + "Validation error: invalid `pool_name` path and/or request body did not match the schema." + ), + }, 501: {"model": ErrorResponse, "description": "Pool management is not supported in this runtime"}, 500: {"model": ErrorResponse, "description": "An unexpected server error occurred"}, }, ) async def update_pool( - pool_name: str, request: UpdatePoolRequest, - x_request_id: Optional[str] = Header(None, alias="X-Request-ID"), + pool_name: str = _POOL_NAME_PATH, ) -> PoolResponse: """ Update pool capacity configuration. @@ -195,9 +210,8 @@ async def update_pool( the pool. Args: - pool_name: Name of the pool to update. request: Update request with the new capacity spec. - x_request_id: Optional request tracing identifier. + pool_name: Name of the pool to update. Returns: PoolResponse: Updated pool state. @@ -207,19 +221,23 @@ async def update_pool( @router.delete( - "/pools/{pool_name}", + "/warmpools/{pool_name}", status_code=status.HTTP_204_NO_CONTENT, responses={ 204: {"description": "Pool deleted successfully"}, 401: {"model": ErrorResponse, "description": "Authentication credentials are missing or invalid"}, 404: {"model": ErrorResponse, "description": "The requested pool does not exist"}, + 422: { + "description": ( + "Path parameter `pool_name` failed validation (invalid Kubernetes resource name pattern)." + ), + }, 501: {"model": ErrorResponse, "description": "Pool management is not supported in this runtime"}, 500: {"model": ErrorResponse, "description": "An unexpected server error occurred"}, }, ) async def delete_pool( - pool_name: str, - x_request_id: Optional[str] = Header(None, alias="X-Request-ID"), + pool_name: str = _POOL_NAME_PATH, ) -> Response: """ Delete a pool. @@ -229,7 +247,6 @@ async def delete_pool( Args: pool_name: Name of the pool to delete. - x_request_id: Optional request tracing identifier. Returns: Response: 204 No Content. diff --git a/server/src/api/schema.py b/server/src/api/schema.py index 9daff48aa..2a243a500 100644 --- a/server/src/api/schema.py +++ b/server/src/api/schema.py @@ -22,7 +22,7 @@ from datetime import datetime from typing import Dict, List, Literal, Optional -from pydantic import BaseModel, Field, RootModel, model_validator +from pydantic import BaseModel, ConfigDict, Field, RootModel, model_validator # ============================================================================ @@ -582,8 +582,7 @@ class PoolCapacitySpec(BaseModel): description="Minimum total size of the pool.", ) - class Config: - populate_by_name = True + model_config = ConfigDict(populate_by_name=True) class CreatePoolRequest(BaseModel): @@ -612,8 +611,7 @@ class CreatePoolRequest(BaseModel): description="Capacity configuration controlling pool size and buffer behavior.", ) - class Config: - populate_by_name = True + model_config = ConfigDict(populate_by_name=True) class UpdatePoolRequest(BaseModel): @@ -629,8 +627,7 @@ class UpdatePoolRequest(BaseModel): description="New capacity configuration for the pool.", ) - class Config: - populate_by_name = True + model_config = ConfigDict(populate_by_name=True) class PoolStatus(BaseModel): @@ -663,8 +660,7 @@ class PoolResponse(BaseModel): description="Pool creation timestamp.", ) - class Config: - populate_by_name = True + model_config = ConfigDict(populate_by_name=True) class ListPoolsResponse(BaseModel): diff --git a/server/src/main.py b/server/src/main.py index be7b068d7..46bf0cff7 100644 --- a/server/src/main.py +++ b/server/src/main.py @@ -72,7 +72,7 @@ from src.api.lifecycle import router # noqa: E402 from src.api.pool import router as pool_router # noqa: E402 -from src.api.lifecycle import router, sandbox_service # noqa: E402 +from src.api.lifecycle import sandbox_service # noqa: E402 from src.api.proxy import router as proxy_router # noqa: E402 from src.integrations.renew_intent.proxy_renew import ProxyRenewCoordinator # noqa: E402 from src.middleware.auth import AuthMiddleware # noqa: E402 diff --git a/server/src/services/constants.py b/server/src/services/constants.py index c9dd9e06e..4810d1ccd 100644 --- a/server/src/services/constants.py +++ b/server/src/services/constants.py @@ -77,6 +77,8 @@ class SandboxErrorCodes: K8S_POOL_ALREADY_EXISTS = "KUBERNETES::POOL_ALREADY_EXISTS" K8S_POOL_API_ERROR = "KUBERNETES::POOL_API_ERROR" K8S_POOL_NOT_SUPPORTED = "KUBERNETES::POOL_NOT_SUPPORTED" + # List pools returned 404 (e.g. CRD/APIGroup not served, wrong version, or namespace API missing) + K8S_POOL_LIST_UNAVAILABLE = "KUBERNETES::POOL_LIST_UNAVAILABLE" # Volume error codes INVALID_VOLUME_NAME = "VOLUME::INVALID_NAME" diff --git a/server/src/services/factory.py b/server/src/services/factory.py index aa6991158..a26ed19fa 100644 --- a/server/src/services/factory.py +++ b/server/src/services/factory.py @@ -13,10 +13,10 @@ # limitations under the License. """ -Factory for creating sandbox service instances. +Factory for creating sandbox and WarmPool service instances. -This module provides a factory function to create sandbox service implementations -based on application configuration loaded from sandbox_server.config. +This module provides factory functions keyed off application configuration +(see ``get_config()`` / ``SANDBOX_CONFIG_PATH``). """ import logging @@ -25,6 +25,12 @@ from src.config import AppConfig, get_config from src.services.docker import DockerSandboxService from src.services.k8s import KubernetesSandboxService +from src.services.k8s.client import K8sClient +from src.services.k8s.pool_service import PoolService +from src.services.k8s.provider_factory import ( + PROVIDER_TYPE_BATCHSANDBOX, + resolve_workload_provider_type, +) from src.services.sandbox_service import SandboxService logger = logging.getLogger(__name__) @@ -50,7 +56,7 @@ def create_sandbox_service( active_config = config or get_config() selected_type = (service_type or active_config.runtime.type).lower() - logger.info("Creating sandbox service with type: %s", selected_type) + logger.info(f"Creating sandbox service with type: {selected_type}") # Service implementation registry # Add new implementations here as they are created @@ -70,3 +76,41 @@ def create_sandbox_service( implementation_class = implementations[selected_type] return implementation_class(config=active_config) + + +def create_pool_service(config: Optional[AppConfig] = None) -> Optional[PoolService]: + """ + Create the WarmPool CRUD service when the runtime is Kubernetes **and** + ``kubernetes.workload_provider`` resolves to ``batchsandbox`` (WarmPool is + only used with BatchSandbox workloads). + + Returns ``None`` for other runtimes, missing ``[kubernetes]``, or non-batchsandbox + providers (e.g. ``agent-sandbox``). Uses a dedicated ``K8sClient`` (not the + one held by ``KubernetesSandboxService``). + """ + active_config = config or get_config() + if active_config.runtime.type.lower() != "kubernetes": + logger.debug( + f"Pool service not created: runtime.type={active_config.runtime.type}" + ) + return None + if not active_config.kubernetes: + logger.debug("Pool service not created: kubernetes config is missing") + return None + + resolved_provider = resolve_workload_provider_type( + active_config.kubernetes.workload_provider + ) + if resolved_provider != PROVIDER_TYPE_BATCHSANDBOX: + logger.debug( + f"Pool service not created: workload_provider={resolved_provider!r} " + f"(WarmPool requires {PROVIDER_TYPE_BATCHSANDBOX!r})" + ) + return None + + logger.info("Creating pool service (Kubernetes WarmPool CRUD, batchsandbox)") + k8s_client = K8sClient(active_config.kubernetes) + return PoolService( + k8s_client, + namespace=active_config.kubernetes.namespace, + ) diff --git a/server/src/services/k8s/client.py b/server/src/services/k8s/client.py index ca341a953..5c9243ad7 100644 --- a/server/src/services/k8s/client.py +++ b/server/src/services/k8s/client.py @@ -195,8 +195,15 @@ def list_custom_objects( namespace: str, plural: str, label_selector: str = "", + *, + not_found_returns_empty: bool = True, ) -> List[Dict[str, Any]]: - """List namespaced custom resources, returning the items list.""" + """List namespaced custom resources, returning the items list. + + When ``not_found_returns_empty`` is True (default), a 404 from the API is treated as an + empty list (caller cannot distinguish CRD missing from zero resources). When False, 404 + is re-raised so callers can map it to a specific HTTP error (see PoolService.list_pools). + """ if self._read_limiter: self._read_limiter.acquire() try: @@ -209,7 +216,7 @@ def list_custom_objects( ) return resp.get("items", []) except ApiException as e: - if e.status == 404: + if e.status == 404 and not_found_returns_empty: return [] raise @@ -243,7 +250,11 @@ def patch_custom_object( name: str, body: Dict[str, Any], ) -> Dict[str, Any]: - """Patch a namespaced custom resource.""" + """Patch a namespaced custom resource. + + The kubernetes-client sets ``Content-Type: application/merge-patch+json`` for this call, + which matches Kubernetes CRD merge-patch semantics for partial ``spec`` updates. + """ if self._write_limiter: self._write_limiter.acquire() return self.get_custom_objects_api().patch_namespaced_custom_object( diff --git a/server/src/services/k8s/kubernetes_api_exception.py b/server/src/services/k8s/kubernetes_api_exception.py new file mode 100644 index 000000000..182d66b64 --- /dev/null +++ b/server/src/services/k8s/kubernetes_api_exception.py @@ -0,0 +1,74 @@ +# Copyright 2026 Alibaba Group Holding Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Helpers for mapping ``kubernetes.client.ApiException`` to HTTP-friendly status and message.""" + +from __future__ import annotations + +import json +from typing import Optional + +from fastapi import status +from kubernetes.client import ApiException + +__all__ = [ + "http_status_from_kubernetes_api_exception", + "kubernetes_api_exception_message", +] + + +def kubernetes_api_exception_message(exc: ApiException) -> Optional[str]: + """ + Best-effort human-readable message from the apiserver response. + + Parses JSON ``body`` for ``message``, ``reason``, or first ``details.causes`` + entry; non-JSON bodies return a trimmed string. Falls back to ``exc.reason``. + Returns ``None`` when nothing useful is present (callers may supply defaults). + """ + body = getattr(exc, "body", None) + if body: + if isinstance(body, bytes): + body = body.decode("utf-8", errors="replace") + if isinstance(body, str) and body.strip(): + try: + data = json.loads(body) + except json.JSONDecodeError: + return body.strip()[:4000] + if isinstance(data, dict): + msg = data.get("message") + if msg: + return str(msg) + msg = data.get("reason") + if msg: + return str(msg) + details = data.get("details") + if isinstance(details, dict): + causes = details.get("causes") + if isinstance(causes, list) and causes: + c0 = causes[0] + if isinstance(c0, dict) and c0.get("message"): + return str(c0["message"]) + return str(data)[:4000] + reason = getattr(exc, "reason", None) + if reason: + return str(reason) + return None + + +def http_status_from_kubernetes_api_exception(exc: ApiException) -> int: + """Return ``exc.status`` when it is a valid HTTP code, else 500.""" + st = getattr(exc, "status", None) + if isinstance(st, int) and 100 <= st <= 599: + return st + return status.HTTP_500_INTERNAL_SERVER_ERROR diff --git a/server/src/services/k8s/pool_service.py b/server/src/services/k8s/pool_service.py index e6cffb704..db9c0d3f3 100644 --- a/server/src/services/k8s/pool_service.py +++ b/server/src/services/k8s/pool_service.py @@ -1,4 +1,4 @@ -# Copyright 2025 Alibaba Group Holding Ltd. +# Copyright 2026 Alibaba Group Holding Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,18 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -""" -Kubernetes Pool service for managing pre-warmed sandbox resource pools. - -This module provides CRUD operations for Pool CRD resources, which represent -pre-warmed sets of pods that reduce sandbox cold-start latency. -""" +"""Pool CRD CRUD via ``K8sClient`` (rate limits; get may use informer cache).""" import logging -from typing import Any, Dict, List, Optional +from typing import Any, Dict, Optional from fastapi import HTTPException, status from kubernetes.client import ApiException +from pydantic import ValidationError from src.api.schema import ( CreatePoolRequest, @@ -35,37 +31,140 @@ ) from src.services.constants import SandboxErrorCodes from src.services.k8s.client import K8sClient +from src.services.k8s.kubernetes_api_exception import ( + http_status_from_kubernetes_api_exception, + kubernetes_api_exception_message, +) logger = logging.getLogger(__name__) -# Pool CRD constants _GROUP = "sandbox.opensandbox.io" _VERSION = "v1alpha1" _PLURAL = "pools" -class PoolService: - """ - Service for managing Pool CRD resources in Kubernetes. +def _k8s_pool_detail(message: str) -> Dict[str, str]: + return {"code": SandboxErrorCodes.K8S_POOL_API_ERROR, "message": message} - Provides CRUD operations that mirror the Pool CRD schema defined in - kubernetes/apis/sandbox/v1alpha1/pool_types.go. - """ +class PoolService: def __init__(self, k8s_client: K8sClient, namespace: str) -> None: - """ - Initialize PoolService. - - Args: - k8s_client: Kubernetes client wrapper. - namespace: Kubernetes namespace where pools are managed. - """ - self._custom_api = k8s_client.get_custom_objects_api() + self._k8s = k8s_client self._namespace = namespace - # ------------------------------------------------------------------ - # Internal helpers - # ------------------------------------------------------------------ + def _raise_for_kubernetes_api_exception( + self, + exc: ApiException, + *, + operation: str, + pool_name: Optional[str], + ) -> None: + http_status = http_status_from_kubernetes_api_exception(exc) + message = kubernetes_api_exception_message(exc) or ( + f"Kubernetes API request failed (HTTP {http_status})" + ) + suffix = f" pool={pool_name}" if pool_name else "" + if http_status >= 500: + logger.error( + f"Kubernetes API error op={operation}{suffix} status={exc.status} " + f"message={message!r} exc={exc!r}" + ) + else: + logger.warning( + f"Kubernetes API rejection op={operation}{suffix} status={exc.status} message={message!r}" + ) + raise HTTPException(status_code=http_status, detail=_k8s_pool_detail(message)) from exc + + def _raise_pool_internal_error( + self, operation: str, pool_name: Optional[str], exc: Exception + ) -> None: + if operation == "list": + logger.error(f"Unexpected error listing pools: {exc}") + msg = f"Failed to list pools: {exc}" + else: + logger.error(f"Unexpected error {operation} pool {pool_name}: {exc}") + msg = f"Failed to {operation} pool: {exc}" + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=_k8s_pool_detail(msg), + ) from exc + + def _named_pool_api_exception( + self, exc: ApiException, pool_name: str, operation: str + ) -> None: + if exc.status == 404: + msg = kubernetes_api_exception_message(exc) or f"Pool '{pool_name}' not found." + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail={ + "code": SandboxErrorCodes.K8S_POOL_NOT_FOUND, + "message": msg, + }, + ) from exc + self._raise_for_kubernetes_api_exception( + exc, operation=operation, pool_name=pool_name + ) + + def _capacity_spec_from_raw(self, pool_name: str, capacity: Any) -> PoolCapacitySpec: + if capacity is None: + logger.warning(f"Pool {pool_name or '?'}: missing capacitySpec, using zeros") + data: Dict[str, Any] = {} + elif not isinstance(capacity, dict): + logger.error(f"Pool {pool_name or '?'}: capacitySpec not an object") + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=_k8s_pool_detail( + "Invalid Pool capacitySpec returned from Kubernetes (expected object)." + ), + ) + else: + data = capacity + if not data: + logger.warning(f"Pool {pool_name or '?'}: empty capacitySpec, using zeros") + + payload = { + "bufferMax": data.get("bufferMax", 0), + "bufferMin": data.get("bufferMin", 0), + "poolMax": data.get("poolMax", 0), + "poolMin": data.get("poolMin", 0), + } + try: + return PoolCapacitySpec.model_validate(payload) + except ValidationError as e: + logger.error(f"Pool {pool_name or '?'}: invalid capacitySpec: {e}") + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=_k8s_pool_detail( + "Invalid Pool capacitySpec returned from Kubernetes (types or ranges)." + ), + ) from e + + def _pool_status_from_raw(self, pool_name: str, raw_status: Any) -> Optional[PoolStatus]: + if raw_status is None: + return None + if not isinstance(raw_status, dict): + logger.warning( + f"Pool {pool_name or '?'}: status not an object, ignoring" + ) + return None + if not raw_status: + return None + payload = { + "total": raw_status.get("total", 0), + "allocated": raw_status.get("allocated", 0), + "available": raw_status.get("available", 0), + "revision": raw_status.get("revision", ""), + } + try: + return PoolStatus.model_validate(payload) + except ValidationError as e: + logger.error(f"Pool {pool_name or '?'}: invalid status: {e}") + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=_k8s_pool_detail( + "Invalid Pool status returned from Kubernetes (types or ranges)." + ), + ) from e def _build_pool_manifest( self, @@ -74,14 +173,10 @@ def _build_pool_manifest( template: Dict[str, Any], capacity_spec: PoolCapacitySpec, ) -> Dict[str, Any]: - """Build a Pool CRD manifest dict.""" return { "apiVersion": f"{_GROUP}/{_VERSION}", "kind": "Pool", - "metadata": { - "name": name, - "namespace": namespace, - }, + "metadata": {"name": name, "namespace": namespace}, "spec": { "template": template, "capacitySpec": { @@ -94,214 +189,119 @@ def _build_pool_manifest( } def _pool_from_raw(self, raw: Dict[str, Any]) -> PoolResponse: - """Convert a raw Pool CRD dict to a PoolResponse model.""" - metadata = raw.get("metadata", {}) - spec = raw.get("spec", {}) - raw_status = raw.get("status") - - capacity = spec.get("capacitySpec", {}) - capacity_spec = PoolCapacitySpec( - bufferMax=capacity.get("bufferMax", 0), - bufferMin=capacity.get("bufferMin", 0), - poolMax=capacity.get("poolMax", 0), - poolMin=capacity.get("poolMin", 0), - ) - - pool_status: Optional[PoolStatus] = None - if raw_status: - pool_status = PoolStatus( - total=raw_status.get("total", 0), - allocated=raw_status.get("allocated", 0), - available=raw_status.get("available", 0), - revision=raw_status.get("revision", ""), - ) + metadata = raw.get("metadata") + if not isinstance(metadata, dict): + if metadata is not None: + logger.warning(f"Pool metadata not a dict ({type(metadata).__name__}), using {{}}") + metadata = {} + pool_name = metadata.get("name") or "" + if not pool_name: + logger.warning("Pool missing metadata.name") + + spec = raw.get("spec") + if not isinstance(spec, dict): + if spec is not None: + logger.warning(f"Pool {pool_name or '?'}: spec not a dict, using {{}}") + spec = {} return PoolResponse( - name=metadata.get("name", ""), - capacitySpec=capacity_spec, - status=pool_status, + name=pool_name, + capacitySpec=self._capacity_spec_from_raw(pool_name, spec.get("capacitySpec")), + status=self._pool_status_from_raw(pool_name, raw.get("status")), createdAt=metadata.get("creationTimestamp"), ) - # ------------------------------------------------------------------ - # Public API - # ------------------------------------------------------------------ - def create_pool(self, request: CreatePoolRequest) -> PoolResponse: - """ - Create a new Pool resource. - - Args: - request: Pool creation request. - - Returns: - PoolResponse representing the newly created pool. - - Raises: - HTTPException 409: If a pool with the same name already exists. - HTTPException 500: On unexpected Kubernetes API errors. - """ manifest = self._build_pool_manifest( - name=request.name, - namespace=self._namespace, - template=request.template, - capacity_spec=request.capacity_spec, + request.name, self._namespace, request.template, request.capacity_spec ) - try: - created = self._custom_api.create_namespaced_custom_object( + created = self._k8s.create_custom_object( group=_GROUP, version=_VERSION, namespace=self._namespace, plural=_PLURAL, body=manifest, ) - logger.info("Created pool: name=%s, namespace=%s", request.name, self._namespace) + logger.info(f"Created pool name={request.name} namespace={self._namespace}") return self._pool_from_raw(created) - + except HTTPException: + raise except ApiException as e: if e.status == 409: + msg = kubernetes_api_exception_message(e) or f"Pool '{request.name}' already exists." raise HTTPException( status_code=status.HTTP_409_CONFLICT, detail={ "code": SandboxErrorCodes.K8S_POOL_ALREADY_EXISTS, - "message": f"Pool '{request.name}' already exists.", + "message": msg, }, ) from e - logger.error("Kubernetes API error creating pool %s: %s", request.name, e) - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail={ - "code": SandboxErrorCodes.K8S_POOL_API_ERROR, - "message": f"Failed to create pool: {e.reason}", - }, - ) from e + self._raise_for_kubernetes_api_exception( + e, operation="create", pool_name=request.name + ) except Exception as e: - logger.error("Unexpected error creating pool %s: %s", request.name, e) - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail={ - "code": SandboxErrorCodes.K8S_POOL_API_ERROR, - "message": f"Failed to create pool: {e}", - }, - ) from e + self._raise_pool_internal_error("create", request.name, e) def get_pool(self, pool_name: str) -> PoolResponse: - """ - Retrieve a Pool by name. - - Args: - pool_name: Name of the pool to retrieve. - - Returns: - PoolResponse for the requested pool. - - Raises: - HTTPException 404: If the pool does not exist. - HTTPException 500: On unexpected Kubernetes API errors. - """ try: - raw = self._custom_api.get_namespaced_custom_object( + raw = self._k8s.get_custom_object( group=_GROUP, version=_VERSION, namespace=self._namespace, plural=_PLURAL, name=pool_name, ) - return self._pool_from_raw(raw) - - except ApiException as e: - if e.status == 404: + if raw is None: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail={ "code": SandboxErrorCodes.K8S_POOL_NOT_FOUND, "message": f"Pool '{pool_name}' not found.", }, - ) from e - logger.error("Kubernetes API error getting pool %s: %s", pool_name, e) - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail={ - "code": SandboxErrorCodes.K8S_POOL_API_ERROR, - "message": f"Failed to get pool: {e.reason}", - }, - ) from e + ) + return self._pool_from_raw(raw) except HTTPException: raise + except ApiException as e: + self._raise_for_kubernetes_api_exception(e, operation="get", pool_name=pool_name) except Exception as e: - logger.error("Unexpected error getting pool %s: %s", pool_name, e) - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail={ - "code": SandboxErrorCodes.K8S_POOL_API_ERROR, - "message": f"Failed to get pool: {e}", - }, - ) from e + self._raise_pool_internal_error("get", pool_name, e) def list_pools(self) -> ListPoolsResponse: - """ - List all Pools in the configured namespace. - - Returns: - ListPoolsResponse containing all pools. - - Raises: - HTTPException 500: On unexpected Kubernetes API errors. - """ try: - result = self._custom_api.list_namespaced_custom_object( + raw_items = self._k8s.list_custom_objects( group=_GROUP, version=_VERSION, namespace=self._namespace, plural=_PLURAL, + not_found_returns_empty=False, ) - items: List[PoolResponse] = [ - self._pool_from_raw(item) for item in result.get("items", []) - ] - return ListPoolsResponse(items=items) - + return ListPoolsResponse( + items=[self._pool_from_raw(item) for item in raw_items] + ) + except HTTPException: + raise except ApiException as e: if e.status == 404: - # CRD not installed — return empty list gracefully - logger.warning("Pool CRD not found (404); returning empty list.") - return ListPoolsResponse(items=[]) - logger.error("Kubernetes API error listing pools: %s", e) - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail={ - "code": SandboxErrorCodes.K8S_POOL_API_ERROR, - "message": f"Failed to list pools: {e.reason}", - }, - ) from e + logger.warning( + f"Pool list 404 (CRD/path?): {_GROUP}/{_VERSION} {_PLURAL} ns={self._namespace}" + ) + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + detail={ + "code": SandboxErrorCodes.K8S_POOL_LIST_UNAVAILABLE, + "message": ( + "Cannot list Pool resources: Kubernetes returned 404 for the Pool " + f"API path. Check Pool CRD and group/version ({_GROUP}/{_VERSION})." + ), + }, + ) from e + self._raise_for_kubernetes_api_exception(e, operation="list", pool_name=None) except Exception as e: - logger.error("Unexpected error listing pools: %s", e) - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail={ - "code": SandboxErrorCodes.K8S_POOL_API_ERROR, - "message": f"Failed to list pools: {e}", - }, - ) from e + self._raise_pool_internal_error("list", None, e) def update_pool(self, pool_name: str, request: UpdatePoolRequest) -> PoolResponse: - """ - Update the capacity configuration of an existing Pool. - - Only ``capacitySpec`` can be updated; pod template changes require - recreating the pool. - - Args: - pool_name: Name of the pool to update. - request: Update request containing the new capacity spec. - - Returns: - PoolResponse reflecting the updated state. - - Raises: - HTTPException 404: If the pool does not exist. - HTTPException 500: On unexpected Kubernetes API errors. - """ patch_body = { "spec": { "capacitySpec": { @@ -312,9 +312,8 @@ def update_pool(self, pool_name: str, request: UpdatePoolRequest) -> PoolRespons } } } - try: - updated = self._custom_api.patch_namespaced_custom_object( + updated = self._k8s.patch_custom_object( group=_GROUP, version=_VERSION, namespace=self._namespace, @@ -322,51 +321,18 @@ def update_pool(self, pool_name: str, request: UpdatePoolRequest) -> PoolRespons name=pool_name, body=patch_body, ) - logger.info("Updated pool capacity: name=%s", pool_name) + logger.info(f"Updated pool capacity name={pool_name}") return self._pool_from_raw(updated) - - except ApiException as e: - if e.status == 404: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail={ - "code": SandboxErrorCodes.K8S_POOL_NOT_FOUND, - "message": f"Pool '{pool_name}' not found.", - }, - ) from e - logger.error("Kubernetes API error updating pool %s: %s", pool_name, e) - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail={ - "code": SandboxErrorCodes.K8S_POOL_API_ERROR, - "message": f"Failed to update pool: {e.reason}", - }, - ) from e except HTTPException: raise + except ApiException as e: + self._named_pool_api_exception(e, pool_name, "update") except Exception as e: - logger.error("Unexpected error updating pool %s: %s", pool_name, e) - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail={ - "code": SandboxErrorCodes.K8S_POOL_API_ERROR, - "message": f"Failed to update pool: {e}", - }, - ) from e + self._raise_pool_internal_error("update", pool_name, e) def delete_pool(self, pool_name: str) -> None: - """ - Delete a Pool resource. - - Args: - pool_name: Name of the pool to delete. - - Raises: - HTTPException 404: If the pool does not exist. - HTTPException 500: On unexpected Kubernetes API errors. - """ try: - self._custom_api.delete_namespaced_custom_object( + self._k8s.delete_custom_object( group=_GROUP, version=_VERSION, namespace=self._namespace, @@ -374,33 +340,10 @@ def delete_pool(self, pool_name: str) -> None: name=pool_name, grace_period_seconds=0, ) - logger.info("Deleted pool: name=%s, namespace=%s", pool_name, self._namespace) - - except ApiException as e: - if e.status == 404: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail={ - "code": SandboxErrorCodes.K8S_POOL_NOT_FOUND, - "message": f"Pool '{pool_name}' not found.", - }, - ) from e - logger.error("Kubernetes API error deleting pool %s: %s", pool_name, e) - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail={ - "code": SandboxErrorCodes.K8S_POOL_API_ERROR, - "message": f"Failed to delete pool: {e.reason}", - }, - ) from e + logger.info(f"Deleted pool name={pool_name} namespace={self._namespace}") except HTTPException: raise + except ApiException as e: + self._named_pool_api_exception(e, pool_name, "delete") except Exception as e: - logger.error("Unexpected error deleting pool %s: %s", pool_name, e) - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail={ - "code": SandboxErrorCodes.K8S_POOL_API_ERROR, - "message": f"Failed to delete pool: {e}", - }, - ) from e + self._raise_pool_internal_error("delete", pool_name, e) diff --git a/server/src/services/k8s/provider_factory.py b/server/src/services/k8s/provider_factory.py index fc1ba2bc2..044f9facf 100644 --- a/server/src/services/k8s/provider_factory.py +++ b/server/src/services/k8s/provider_factory.py @@ -40,6 +40,23 @@ } +def resolve_workload_provider_type(provider_type: str | None) -> str: + """ + Normalize ``kubernetes.workload_provider`` to a registry key. + + If unset, matches ``create_workload_provider`` by using the first registered + provider (currently ``batchsandbox``). + """ + if provider_type is None: + if not _PROVIDER_REGISTRY: + raise ValueError( + "No workload providers are registered. " + "Cannot resolve default provider." + ) + return next(iter(_PROVIDER_REGISTRY.keys())) + return provider_type.lower() + + def create_workload_provider( provider_type: str | None, k8s_client: K8sClient, @@ -61,17 +78,9 @@ def create_workload_provider( Raises: ValueError: If provider_type is not supported or no providers are registered """ - # Use first registered provider if not specified + provider_type_lower = resolve_workload_provider_type(provider_type) if provider_type is None: - if not _PROVIDER_REGISTRY: - raise ValueError( - "No workload providers are registered. " - "Cannot create a default provider." - ) - provider_type = next(iter(_PROVIDER_REGISTRY.keys())) - logger.info(f"No provider specified, using default: {provider_type}") - - provider_type_lower = provider_type.lower() + logger.info(f"No provider specified, using default: {provider_type_lower}") if provider_type_lower not in _PROVIDER_REGISTRY: available = ", ".join(_PROVIDER_REGISTRY.keys()) diff --git a/server/tests/k8s/conftest.py b/server/tests/k8s/conftest.py index 30b72c9e8..237e8f607 100644 --- a/server/tests/k8s/conftest.py +++ b/server/tests/k8s/conftest.py @@ -25,7 +25,7 @@ def stub_workload_informer(monkeypatch): """ Prevent real informer threads in unit tests. - + Stubs the WorkloadInformer used inside K8sClient so that watch threads are not started during unit tests. Cache is always empty (has_synced=False), so get_custom_object falls through to the mocked API call. diff --git a/server/tests/k8s/fixtures/k8s_fixtures.py b/server/tests/k8s/fixtures/k8s_fixtures.py index 81dae4ed4..1e97270e7 100644 --- a/server/tests/k8s/fixtures/k8s_fixtures.py +++ b/server/tests/k8s/fixtures/k8s_fixtures.py @@ -294,7 +294,7 @@ def k8s_service(k8s_app_config): with patch('src.services.k8s.kubernetes_service.K8sClient') as mock_k8s_client_cls, \ patch('src.services.k8s.kubernetes_service.create_workload_provider') as mock_create_provider: - + # Mock K8sClient instance mock_k8s_client = MagicMock() mock_k8s_client_cls.return_value = mock_k8s_client diff --git a/server/tests/k8s/test_k8s_client.py b/server/tests/k8s/test_k8s_client.py index be926dccd..f4a0f818a 100644 --- a/server/tests/k8s/test_k8s_client.py +++ b/server/tests/k8s/test_k8s_client.py @@ -247,6 +247,14 @@ def test_list_custom_objects_reraises_non_404(self, k8s_runtime_config): with pytest.raises(ApiException): c.list_custom_objects("g", "v1", "ns", "foos") + def test_list_custom_objects_reraises_404_when_not_found_returns_empty_false(self, k8s_runtime_config): + """When not_found_returns_empty=False, 404 is propagated to the caller.""" + c = self._make_client(k8s_runtime_config) + c._custom_objects_api.list_namespaced_custom_object.side_effect = ApiException(status=404) + with pytest.raises(ApiException) as exc_info: + c.list_custom_objects("g", "v1", "ns", "foos", not_found_returns_empty=False) + assert exc_info.value.status == 404 + def test_delete_custom_object_delegates_to_api(self, k8s_runtime_config): """delete_custom_object forwards arguments to the raw API.""" c = self._make_client(k8s_runtime_config) diff --git a/server/tests/k8s/test_kubernetes_service.py b/server/tests/k8s/test_kubernetes_service.py index 8a50871a7..0e8bd68b2 100644 --- a/server/tests/k8s/test_kubernetes_service.py +++ b/server/tests/k8s/test_kubernetes_service.py @@ -44,12 +44,12 @@ def test_init_with_valid_config_succeeds(self, k8s_app_config): """ with patch('src.services.k8s.kubernetes_service.K8sClient') as mock_k8s_client, \ patch('src.services.k8s.kubernetes_service.create_workload_provider') as mock_create_provider: - + mock_provider = MagicMock() mock_create_provider.return_value = mock_provider - + service = KubernetesSandboxService(k8s_app_config) - + assert service.namespace == k8s_app_config.kubernetes.namespace assert service.execd_image == k8s_app_config.runtime.execd_image mock_k8s_client.assert_called_once_with(k8s_app_config.kubernetes) diff --git a/server/tests/k8s/test_pool_service.py b/server/tests/k8s/test_pool_service.py index 32f6e09df..a0d534bf5 100644 --- a/server/tests/k8s/test_pool_service.py +++ b/server/tests/k8s/test_pool_service.py @@ -15,11 +15,11 @@ """ Unit tests for PoolService (server/src/services/k8s/pool_service.py). -All tests mock the Kubernetes CustomObjectsApi so no cluster connection is needed. +All tests mock K8sClient CRUD helpers so no cluster connection is needed. """ import pytest -from unittest.mock import MagicMock, call +from unittest.mock import MagicMock from kubernetes.client import ApiException from src.api.schema import ( @@ -76,12 +76,10 @@ def _make_raw_pool( def _make_pool_service(namespace: str = "test-ns") -> tuple[PoolService, MagicMock]: - """Return a (PoolService, mock_custom_api) pair.""" + """Return a (PoolService, mock_k8s_client) pair.""" mock_client = MagicMock() - mock_api = MagicMock() - mock_client.get_custom_objects_api.return_value = mock_api service = PoolService(mock_client, namespace=namespace) - return service, mock_api + return service, mock_client def _capacity_spec( @@ -146,6 +144,34 @@ def test_pool_capacity_defaults_to_zero_on_missing_fields(self): assert result.capacity_spec.buffer_max == 0 assert result.capacity_spec.pool_max == 0 + def test_pool_missing_capacity_spec_logs_and_defaults(self): + svc, _ = _make_pool_service() + raw = { + "metadata": {"name": "no-cap"}, + "spec": {"template": {}}, + } + result = svc._pool_from_raw(raw) + assert result.capacity_spec.buffer_max == 0 + assert result.capacity_spec.pool_min == 0 + + def test_pool_non_object_capacity_spec_raises_500(self): + svc, _ = _make_pool_service() + raw = { + "metadata": {"name": "bad-cap"}, + "spec": {"capacitySpec": "not-a-dict"}, + } + with pytest.raises(HTTPException) as exc_info: + svc._pool_from_raw(raw) + assert exc_info.value.status_code == 500 + assert exc_info.value.detail["code"] == SandboxErrorCodes.K8S_POOL_API_ERROR + + def test_pool_non_object_status_treated_as_unreconciled(self): + svc, _ = _make_pool_service() + raw = _make_raw_pool() + raw["status"] = "broken" + result = svc._pool_from_raw(raw) + assert result.status is None + # --------------------------------------------------------------------------- # create_pool @@ -153,9 +179,9 @@ def test_pool_capacity_defaults_to_zero_on_missing_fields(self): class TestCreatePool: def test_create_pool_calls_k8s_api_with_correct_manifest(self): - svc, mock_api = _make_pool_service(namespace="opensandbox") + svc, mock_k8s = _make_pool_service(namespace="opensandbox") raw = _make_raw_pool(name="ci-pool", namespace="opensandbox") - mock_api.create_namespaced_custom_object.return_value = raw + mock_k8s.create_custom_object.return_value = raw request = CreatePoolRequest( name="ci-pool", @@ -164,8 +190,8 @@ def test_create_pool_calls_k8s_api_with_correct_manifest(self): ) result = svc.create_pool(request) - mock_api.create_namespaced_custom_object.assert_called_once() - call_kwargs = mock_api.create_namespaced_custom_object.call_args.kwargs + mock_k8s.create_custom_object.assert_called_once() + call_kwargs = mock_k8s.create_custom_object.call_args.kwargs assert call_kwargs["group"] == "sandbox.opensandbox.io" assert call_kwargs["version"] == "v1alpha1" assert call_kwargs["plural"] == "pools" @@ -180,9 +206,9 @@ def test_create_pool_calls_k8s_api_with_correct_manifest(self): assert result.name == "ci-pool" def test_create_pool_returns_pool_response(self): - svc, mock_api = _make_pool_service() + svc, mock_k8s = _make_pool_service() raw = _make_raw_pool() - mock_api.create_namespaced_custom_object.return_value = raw + mock_k8s.create_custom_object.return_value = raw request = CreatePoolRequest( name="my-pool", @@ -196,8 +222,8 @@ def test_create_pool_returns_pool_response(self): assert result.status.total == 2 def test_create_pool_409_raises_http_conflict(self): - svc, mock_api = _make_pool_service() - mock_api.create_namespaced_custom_object.side_effect = ApiException(status=409) + svc, mock_k8s = _make_pool_service() + mock_k8s.create_custom_object.side_effect = ApiException(status=409) request = CreatePoolRequest( name="dup-pool", @@ -211,10 +237,10 @@ def test_create_pool_409_raises_http_conflict(self): assert exc_info.value.detail["code"] == SandboxErrorCodes.K8S_POOL_ALREADY_EXISTS def test_create_pool_5xx_api_error_raises_http_500(self): - svc, mock_api = _make_pool_service() + svc, mock_k8s = _make_pool_service() err = ApiException(status=500) err.reason = "Internal Server Error" - mock_api.create_namespaced_custom_object.side_effect = err + mock_k8s.create_custom_object.side_effect = err request = CreatePoolRequest(name="p", template={}, capacitySpec=_capacity_spec()) with pytest.raises(HTTPException) as exc_info: @@ -223,9 +249,36 @@ def test_create_pool_5xx_api_error_raises_http_500(self): assert exc_info.value.status_code == 500 assert exc_info.value.detail["code"] == SandboxErrorCodes.K8S_POOL_API_ERROR + def test_create_pool_422_preserves_status_and_json_message(self): + svc, mock_k8s = _make_pool_service() + err = ApiException(status=422) + err.body = '{"message": "admission webhook denied the request"}' + mock_k8s.create_custom_object.side_effect = err + + request = CreatePoolRequest(name="p", template={}, capacitySpec=_capacity_spec()) + with pytest.raises(HTTPException) as exc_info: + svc.create_pool(request) + + assert exc_info.value.status_code == 422 + assert exc_info.value.detail["message"] == "admission webhook denied the request" + + def test_create_pool_409_prefers_kubernetes_message(self): + svc, mock_k8s = _make_pool_service() + err = ApiException(status=409) + err.body = '{"message": "pool.example.io already taken"}' + mock_k8s.create_custom_object.side_effect = err + + request = CreatePoolRequest(name="p", template={}, capacitySpec=_capacity_spec()) + with pytest.raises(HTTPException) as exc_info: + svc.create_pool(request) + + assert exc_info.value.status_code == 409 + assert exc_info.value.detail["code"] == SandboxErrorCodes.K8S_POOL_ALREADY_EXISTS + assert exc_info.value.detail["message"] == "pool.example.io already taken" + def test_create_pool_unexpected_exception_raises_http_500(self): - svc, mock_api = _make_pool_service() - mock_api.create_namespaced_custom_object.side_effect = RuntimeError("boom") + svc, mock_k8s = _make_pool_service() + mock_k8s.create_custom_object.side_effect = RuntimeError("boom") request = CreatePoolRequest(name="p", template={}, capacitySpec=_capacity_spec()) with pytest.raises(HTTPException) as exc_info: @@ -241,13 +294,13 @@ def test_create_pool_unexpected_exception_raises_http_500(self): class TestGetPool: def test_get_pool_returns_correct_pool(self): - svc, mock_api = _make_pool_service() + svc, mock_k8s = _make_pool_service() raw = _make_raw_pool(name="target-pool") - mock_api.get_namespaced_custom_object.return_value = raw + mock_k8s.get_custom_object.return_value = raw result = svc.get_pool("target-pool") - mock_api.get_namespaced_custom_object.assert_called_once_with( + mock_k8s.get_custom_object.assert_called_once_with( group="sandbox.opensandbox.io", version="v1alpha1", namespace="test-ns", @@ -257,8 +310,8 @@ def test_get_pool_returns_correct_pool(self): assert result.name == "target-pool" def test_get_pool_404_raises_http_not_found(self): - svc, mock_api = _make_pool_service() - mock_api.get_namespaced_custom_object.side_effect = ApiException(status=404) + svc, mock_k8s = _make_pool_service() + mock_k8s.get_custom_object.return_value = None with pytest.raises(HTTPException) as exc_info: svc.get_pool("ghost-pool") @@ -267,21 +320,22 @@ def test_get_pool_404_raises_http_not_found(self): assert exc_info.value.detail["code"] == SandboxErrorCodes.K8S_POOL_NOT_FOUND assert "ghost-pool" in exc_info.value.detail["message"] - def test_get_pool_5xx_raises_http_500(self): - svc, mock_api = _make_pool_service() + def test_get_pool_kubernetes_status_is_preserved(self): + svc, mock_k8s = _make_pool_service() err = ApiException(status=503) err.reason = "Service Unavailable" - mock_api.get_namespaced_custom_object.side_effect = err + mock_k8s.get_custom_object.side_effect = err with pytest.raises(HTTPException) as exc_info: svc.get_pool("p") - assert exc_info.value.status_code == 500 + assert exc_info.value.status_code == 503 assert exc_info.value.detail["code"] == SandboxErrorCodes.K8S_POOL_API_ERROR + assert "Unavailable" in exc_info.value.detail["message"] def test_get_pool_unexpected_exception_raises_http_500(self): - svc, mock_api = _make_pool_service() - mock_api.get_namespaced_custom_object.side_effect = ConnectionError("timeout") + svc, mock_k8s = _make_pool_service() + mock_k8s.get_custom_object.side_effect = ConnectionError("timeout") with pytest.raises(HTTPException) as exc_info: svc.get_pool("p") @@ -295,13 +349,11 @@ def test_get_pool_unexpected_exception_raises_http_500(self): class TestListPools: def test_list_pools_returns_all_items(self): - svc, mock_api = _make_pool_service() - mock_api.list_namespaced_custom_object.return_value = { - "items": [ - _make_raw_pool(name="pool-a"), - _make_raw_pool(name="pool-b"), - ] - } + svc, mock_k8s = _make_pool_service() + mock_k8s.list_custom_objects.return_value = [ + _make_raw_pool(name="pool-a"), + _make_raw_pool(name="pool-b"), + ] result = svc.list_pools() @@ -310,25 +362,28 @@ def test_list_pools_returns_all_items(self): assert names == {"pool-a", "pool-b"} def test_list_pools_empty_returns_empty_list(self): - svc, mock_api = _make_pool_service() - mock_api.list_namespaced_custom_object.return_value = {"items": []} + svc, mock_k8s = _make_pool_service() + mock_k8s.list_custom_objects.return_value = [] result = svc.list_pools() assert result.items == [] - def test_list_pools_404_crd_not_installed_returns_empty(self): - """If the CRD doesn't exist (404), list should return empty gracefully.""" - svc, mock_api = _make_pool_service() - mock_api.list_namespaced_custom_object.side_effect = ApiException(status=404) + def test_list_pools_404_from_cluster_returns_503(self): + """404 on list API means CRD/path unavailable — not an empty tenant.""" + svc, mock_k8s = _make_pool_service() + mock_k8s.list_custom_objects.side_effect = ApiException(status=404) - result = svc.list_pools() - assert result.items == [] + with pytest.raises(HTTPException) as exc_info: + svc.list_pools() + + assert exc_info.value.status_code == 503 + assert exc_info.value.detail["code"] == SandboxErrorCodes.K8S_POOL_LIST_UNAVAILABLE def test_list_pools_5xx_raises_http_500(self): - svc, mock_api = _make_pool_service() + svc, mock_k8s = _make_pool_service() err = ApiException(status=500) err.reason = "Internal" - mock_api.list_namespaced_custom_object.side_effect = err + mock_k8s.list_custom_objects.side_effect = err with pytest.raises(HTTPException) as exc_info: svc.list_pools() @@ -336,9 +391,21 @@ def test_list_pools_5xx_raises_http_500(self): assert exc_info.value.status_code == 500 assert exc_info.value.detail["code"] == SandboxErrorCodes.K8S_POOL_API_ERROR + def test_list_pools_403_passthrough(self): + svc, mock_k8s = _make_pool_service() + err = ApiException(status=403) + err.body = '{"message": "pods is forbidden: User cannot list resource"}' + mock_k8s.list_custom_objects.side_effect = err + + with pytest.raises(HTTPException) as exc_info: + svc.list_pools() + + assert exc_info.value.status_code == 403 + assert "forbidden" in exc_info.value.detail["message"].lower() + def test_list_pools_unexpected_exception_raises_http_500(self): - svc, mock_api = _make_pool_service() - mock_api.list_namespaced_custom_object.side_effect = RuntimeError("unexpected") + svc, mock_k8s = _make_pool_service() + mock_k8s.list_custom_objects.side_effect = RuntimeError("unexpected") with pytest.raises(HTTPException) as exc_info: svc.list_pools() @@ -352,15 +419,15 @@ def test_list_pools_unexpected_exception_raises_http_500(self): class TestUpdatePool: def test_update_pool_sends_correct_patch(self): - svc, mock_api = _make_pool_service() + svc, mock_k8s = _make_pool_service() updated_raw = _make_raw_pool(buffer_max=5, pool_max=20) - mock_api.patch_namespaced_custom_object.return_value = updated_raw + mock_k8s.patch_custom_object.return_value = updated_raw request = UpdatePoolRequest(capacitySpec=_capacity_spec(buffer_max=5, pool_max=20)) result = svc.update_pool("my-pool", request) - mock_api.patch_namespaced_custom_object.assert_called_once() - call_kwargs = mock_api.patch_namespaced_custom_object.call_args.kwargs + mock_k8s.patch_custom_object.assert_called_once() + call_kwargs = mock_k8s.patch_custom_object.call_args.kwargs assert call_kwargs["name"] == "my-pool" assert call_kwargs["namespace"] == "test-ns" patch_body = call_kwargs["body"] @@ -369,8 +436,8 @@ def test_update_pool_sends_correct_patch(self): assert result.capacity_spec.buffer_max == 5 def test_update_pool_404_raises_http_not_found(self): - svc, mock_api = _make_pool_service() - mock_api.patch_namespaced_custom_object.side_effect = ApiException(status=404) + svc, mock_k8s = _make_pool_service() + mock_k8s.patch_custom_object.side_effect = ApiException(status=404) with pytest.raises(HTTPException) as exc_info: svc.update_pool("missing", UpdatePoolRequest(capacitySpec=_capacity_spec())) @@ -379,10 +446,10 @@ def test_update_pool_404_raises_http_not_found(self): assert exc_info.value.detail["code"] == SandboxErrorCodes.K8S_POOL_NOT_FOUND def test_update_pool_5xx_raises_http_500(self): - svc, mock_api = _make_pool_service() + svc, mock_k8s = _make_pool_service() err = ApiException(status=500) err.reason = "Timeout" - mock_api.patch_namespaced_custom_object.side_effect = err + mock_k8s.patch_custom_object.side_effect = err with pytest.raises(HTTPException) as exc_info: svc.update_pool("p", UpdatePoolRequest(capacitySpec=_capacity_spec())) @@ -391,8 +458,8 @@ def test_update_pool_5xx_raises_http_500(self): assert exc_info.value.detail["code"] == SandboxErrorCodes.K8S_POOL_API_ERROR def test_update_pool_unexpected_exception_raises_http_500(self): - svc, mock_api = _make_pool_service() - mock_api.patch_namespaced_custom_object.side_effect = ValueError("bad") + svc, mock_k8s = _make_pool_service() + mock_k8s.patch_custom_object.side_effect = ValueError("bad") with pytest.raises(HTTPException) as exc_info: svc.update_pool("p", UpdatePoolRequest(capacitySpec=_capacity_spec())) @@ -406,12 +473,11 @@ def test_update_pool_unexpected_exception_raises_http_500(self): class TestDeletePool: def test_delete_pool_calls_k8s_delete(self): - svc, mock_api = _make_pool_service(namespace="opensandbox") - mock_api.delete_namespaced_custom_object.return_value = {} + svc, mock_k8s = _make_pool_service(namespace="opensandbox") svc.delete_pool("old-pool") - mock_api.delete_namespaced_custom_object.assert_called_once_with( + mock_k8s.delete_custom_object.assert_called_once_with( group="sandbox.opensandbox.io", version="v1alpha1", namespace="opensandbox", @@ -421,15 +487,14 @@ def test_delete_pool_calls_k8s_delete(self): ) def test_delete_pool_returns_none(self): - svc, mock_api = _make_pool_service() - mock_api.delete_namespaced_custom_object.return_value = {} + svc, mock_k8s = _make_pool_service() result = svc.delete_pool("p") assert result is None def test_delete_pool_404_raises_http_not_found(self): - svc, mock_api = _make_pool_service() - mock_api.delete_namespaced_custom_object.side_effect = ApiException(status=404) + svc, mock_k8s = _make_pool_service() + mock_k8s.delete_custom_object.side_effect = ApiException(status=404) with pytest.raises(HTTPException) as exc_info: svc.delete_pool("ghost") @@ -439,10 +504,10 @@ def test_delete_pool_404_raises_http_not_found(self): assert "ghost" in exc_info.value.detail["message"] def test_delete_pool_5xx_raises_http_500(self): - svc, mock_api = _make_pool_service() + svc, mock_k8s = _make_pool_service() err = ApiException(status=500) err.reason = "Internal" - mock_api.delete_namespaced_custom_object.side_effect = err + mock_k8s.delete_custom_object.side_effect = err with pytest.raises(HTTPException) as exc_info: svc.delete_pool("p") @@ -451,8 +516,8 @@ def test_delete_pool_5xx_raises_http_500(self): assert exc_info.value.detail["code"] == SandboxErrorCodes.K8S_POOL_API_ERROR def test_delete_pool_unexpected_exception_raises_http_500(self): - svc, mock_api = _make_pool_service() - mock_api.delete_namespaced_custom_object.side_effect = OSError("io") + svc, mock_k8s = _make_pool_service() + mock_k8s.delete_custom_object.side_effect = OSError("io") with pytest.raises(HTTPException) as exc_info: svc.delete_pool("p") diff --git a/server/tests/k8s/test_provider_factory.py b/server/tests/k8s/test_provider_factory.py index 20c938908..4385b288a 100644 --- a/server/tests/k8s/test_provider_factory.py +++ b/server/tests/k8s/test_provider_factory.py @@ -24,6 +24,7 @@ register_provider, create_workload_provider, list_available_providers, + resolve_workload_provider_type, PROVIDER_TYPE_BATCHSANDBOX, PROVIDER_TYPE_AGENT_SANDBOX, ) @@ -35,6 +36,15 @@ +class TestResolveWorkloadProviderType: + def test_none_returns_default_registry_key(self): + assert resolve_workload_provider_type(None) == PROVIDER_TYPE_BATCHSANDBOX + + def test_normalizes_case(self): + assert resolve_workload_provider_type("BatchSandbox") == PROVIDER_TYPE_BATCHSANDBOX + assert resolve_workload_provider_type("Agent-Sandbox") == PROVIDER_TYPE_AGENT_SANDBOX + + class TestProviderFactory: """provider_factory unit tests""" diff --git a/server/tests/test_factory.py b/server/tests/test_factory.py new file mode 100644 index 000000000..2d045a7fa --- /dev/null +++ b/server/tests/test_factory.py @@ -0,0 +1,82 @@ +# Copyright 2026 Alibaba Group Holding Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. + +"""Tests for src.services.factory (sandbox + pool service creation).""" + +from unittest.mock import MagicMock, patch + +from src.config import ( + AgentSandboxRuntimeConfig, + AppConfig, + KubernetesRuntimeConfig, + RuntimeConfig, + ServerConfig, +) +from src.services.factory import create_pool_service +from src.services.k8s.pool_service import PoolService +from src.services.k8s.provider_factory import PROVIDER_TYPE_BATCHSANDBOX + + +def _server() -> ServerConfig: + return ServerConfig(host="0.0.0.0", port=8080, log_level="DEBUG", api_key="k") + + +def _k8s_cfg(**kwargs) -> KubernetesRuntimeConfig: + base = dict( + kubeconfig_path="/tmp/test-kubeconfig", + namespace="ns", + service_account="sa", + workload_provider=PROVIDER_TYPE_BATCHSANDBOX, + ) + base.update(kwargs) + return KubernetesRuntimeConfig(**base) + + +def test_create_pool_service_none_for_docker_runtime(): + cfg = AppConfig( + server=_server(), + runtime=RuntimeConfig(type="docker", execd_image="img:test"), + kubernetes=None, + ) + assert create_pool_service(cfg) is None + + +def test_create_pool_service_none_for_agent_sandbox(): + k8s = _k8s_cfg(workload_provider="agent-sandbox") + app = AppConfig( + server=_server(), + runtime=RuntimeConfig(type="kubernetes", execd_image="img:test"), + kubernetes=k8s, + agent_sandbox=AgentSandboxRuntimeConfig(), + ) + assert create_pool_service(app) is None + + +def test_create_pool_service_returns_pool_for_batchsandbox(): + k8s = _k8s_cfg() + app = AppConfig( + server=_server(), + runtime=RuntimeConfig(type="kubernetes", execd_image="img:test"), + kubernetes=k8s, + ) + with patch("src.services.factory.K8sClient") as mock_cls: + mock_cls.return_value = MagicMock() + svc = create_pool_service(app) + assert isinstance(svc, PoolService) + mock_cls.assert_called_once_with(k8s) + + +def test_create_pool_service_default_workload_provider_is_batchsandbox(): + """Unset workload_provider resolves to default (batchsandbox) -> pool service created.""" + k8s = _k8s_cfg(workload_provider=None) + app = AppConfig( + server=_server(), + runtime=RuntimeConfig(type="kubernetes", execd_image="img:test"), + kubernetes=k8s, + ) + with patch("src.services.factory.K8sClient") as mock_cls: + mock_cls.return_value = MagicMock() + svc = create_pool_service(app) + assert isinstance(svc, PoolService) diff --git a/server/tests/test_pool_api.py b/server/tests/test_pool_api.py index 3917ee434..15c48f5a0 100644 --- a/server/tests/test_pool_api.py +++ b/server/tests/test_pool_api.py @@ -19,7 +19,6 @@ so no real cluster connection is needed. """ -import pytest from unittest.mock import MagicMock, patch from fastapi.testclient import TestClient from fastapi import HTTPException, status as http_status @@ -100,36 +99,36 @@ def _create_request_body(name: str = "test-pool") -> dict: class TestPoolAuthentication: def test_list_pools_without_api_key_returns_401(self, client: TestClient): - response = client.get("/pools") + response = client.get("/warmpools") assert response.status_code == 401 def test_create_pool_without_api_key_returns_401(self, client: TestClient): - response = client.post("/pools", json=_create_request_body()) + response = client.post("/warmpools", json=_create_request_body()) assert response.status_code == 401 def test_get_pool_without_api_key_returns_401(self, client: TestClient): - response = client.get("/pools/my-pool") + response = client.get("/warmpools/my-pool") assert response.status_code == 401 def test_update_pool_without_api_key_returns_401(self, client: TestClient): response = client.put( - "/pools/my-pool", + "/warmpools/my-pool", json={"capacitySpec": {"bufferMax": 5, "bufferMin": 1, "poolMax": 10, "poolMin": 0}}, ) assert response.status_code == 401 def test_delete_pool_without_api_key_returns_401(self, client: TestClient): - response = client.delete("/pools/my-pool") + response = client.delete("/warmpools/my-pool") assert response.status_code == 401 def test_pool_routes_exist_on_v1_prefix(self, client: TestClient, auth_headers: dict): - """Verify the /v1/pools routes are registered (even if they return 501 on docker runtime).""" + """Verify the /v1/warmpools routes are registered (even if they return 501 on docker runtime).""" with patch(_POOL_SERVICE_PATCH) as mock_svc_factory: mock_svc_factory.side_effect = HTTPException( status_code=http_status.HTTP_501_NOT_IMPLEMENTED, detail={"code": "X", "message": "y"}, ) - response = client.get("/v1/pools", headers=auth_headers) + response = client.get("/v1/warmpools", headers=auth_headers) assert response.status_code == 501 @@ -156,7 +155,7 @@ def test_list_pools_returns_501(self, client: TestClient, auth_headers: dict): status_code=501, detail={"code": SandboxErrorCodes.K8S_POOL_NOT_SUPPORTED, "message": "not k8s"}, )): - response = client.get("/pools", headers=auth_headers) + response = client.get("/warmpools", headers=auth_headers) assert response.status_code == 501 assert SandboxErrorCodes.K8S_POOL_NOT_SUPPORTED in response.json()["code"] @@ -165,12 +164,12 @@ def test_create_pool_returns_501(self, client: TestClient, auth_headers: dict): status_code=501, detail={"code": SandboxErrorCodes.K8S_POOL_NOT_SUPPORTED, "message": "not k8s"}, )): - response = client.post("/pools", json=_create_request_body(), headers=auth_headers) + response = client.post("/warmpools", json=_create_request_body(), headers=auth_headers) assert response.status_code == 501 # --------------------------------------------------------------------------- -# POST /pools +# POST /warmpools # --------------------------------------------------------------------------- class TestCreatePoolRoute: @@ -179,7 +178,7 @@ def test_create_pool_success_returns_201(self, client: TestClient, auth_headers: mock_svc.create_pool.return_value = _pool_response(name="new-pool") with patch(_POOL_SERVICE_PATCH, return_value=mock_svc): - response = client.post("/pools", json=_create_request_body("new-pool"), headers=auth_headers) + response = client.post("/warmpools", json=_create_request_body("new-pool"), headers=auth_headers) assert response.status_code == 201 body = response.json() @@ -191,35 +190,35 @@ def test_create_pool_missing_name_returns_422(self, client: TestClient, auth_hea body = _create_request_body() del body["name"] with patch(_POOL_SERVICE_PATCH, return_value=MagicMock()): - response = client.post("/pools", json=body, headers=auth_headers) + response = client.post("/warmpools", json=body, headers=auth_headers) assert response.status_code == 422 def test_create_pool_missing_template_returns_422(self, client: TestClient, auth_headers: dict): body = _create_request_body() del body["template"] with patch(_POOL_SERVICE_PATCH, return_value=MagicMock()): - response = client.post("/pools", json=body, headers=auth_headers) + response = client.post("/warmpools", json=body, headers=auth_headers) assert response.status_code == 422 def test_create_pool_missing_capacity_spec_returns_422(self, client: TestClient, auth_headers: dict): body = _create_request_body() del body["capacitySpec"] with patch(_POOL_SERVICE_PATCH, return_value=MagicMock()): - response = client.post("/pools", json=body, headers=auth_headers) + response = client.post("/warmpools", json=body, headers=auth_headers) assert response.status_code == 422 def test_create_pool_invalid_name_pattern_returns_422(self, client: TestClient, auth_headers: dict): """Pool name must be a valid k8s name (no uppercase, no spaces).""" body = _create_request_body("Invalid_Name") with patch(_POOL_SERVICE_PATCH, return_value=MagicMock()): - response = client.post("/pools", json=body, headers=auth_headers) + response = client.post("/warmpools", json=body, headers=auth_headers) assert response.status_code == 422 def test_create_pool_negative_buffer_max_returns_422(self, client: TestClient, auth_headers: dict): body = _create_request_body() body["capacitySpec"]["bufferMax"] = -1 with patch(_POOL_SERVICE_PATCH, return_value=MagicMock()): - response = client.post("/pools", json=body, headers=auth_headers) + response = client.post("/warmpools", json=body, headers=auth_headers) assert response.status_code == 422 def test_create_pool_duplicate_returns_409(self, client: TestClient, auth_headers: dict): @@ -232,7 +231,7 @@ def test_create_pool_duplicate_returns_409(self, client: TestClient, auth_header }, ) with patch(_POOL_SERVICE_PATCH, return_value=mock_svc): - response = client.post("/pools", json=_create_request_body("dup-pool"), headers=auth_headers) + response = client.post("/warmpools", json=_create_request_body("dup-pool"), headers=auth_headers) assert response.status_code == 409 assert SandboxErrorCodes.K8S_POOL_ALREADY_EXISTS in response.json()["code"] @@ -247,7 +246,7 @@ def test_create_pool_service_error_returns_500(self, client: TestClient, auth_he }, ) with patch(_POOL_SERVICE_PATCH, return_value=mock_svc): - response = client.post("/pools", json=_create_request_body(), headers=auth_headers) + response = client.post("/warmpools", json=_create_request_body(), headers=auth_headers) assert response.status_code == 500 @@ -256,7 +255,7 @@ def test_create_pool_passes_request_to_service(self, client: TestClient, auth_he mock_svc.create_pool.return_value = _pool_response() with patch(_POOL_SERVICE_PATCH, return_value=mock_svc): - client.post("/pools", json=_create_request_body("my-pool"), headers=auth_headers) + client.post("/warmpools", json=_create_request_body("my-pool"), headers=auth_headers) call_args = mock_svc.create_pool.call_args req: CreatePoolRequest = call_args.args[0] @@ -266,7 +265,7 @@ def test_create_pool_passes_request_to_service(self, client: TestClient, auth_he # --------------------------------------------------------------------------- -# GET /pools +# GET /warmpools # --------------------------------------------------------------------------- class TestListPoolsRoute: @@ -277,7 +276,7 @@ def test_list_pools_returns_200_and_items(self, client: TestClient, auth_headers ) with patch(_POOL_SERVICE_PATCH, return_value=mock_svc): - response = client.get("/pools", headers=auth_headers) + response = client.get("/warmpools", headers=auth_headers) assert response.status_code == 200 body = response.json() @@ -290,7 +289,7 @@ def test_list_pools_empty_returns_200_and_empty_list(self, client: TestClient, a mock_svc.list_pools.return_value = ListPoolsResponse(items=[]) with patch(_POOL_SERVICE_PATCH, return_value=mock_svc): - response = client.get("/pools", headers=auth_headers) + response = client.get("/warmpools", headers=auth_headers) assert response.status_code == 200 assert response.json()["items"] == [] @@ -302,7 +301,7 @@ def test_list_pools_service_error_returns_500(self, client: TestClient, auth_hea detail={"code": SandboxErrorCodes.K8S_POOL_API_ERROR, "message": "err"}, ) with patch(_POOL_SERVICE_PATCH, return_value=mock_svc): - response = client.get("/pools", headers=auth_headers) + response = client.get("/warmpools", headers=auth_headers) assert response.status_code == 500 @@ -312,7 +311,7 @@ def test_list_pools_response_has_status_fields(self, client: TestClient, auth_he items=[_pool_response("p", total=5, allocated=3, available=2)] ) with patch(_POOL_SERVICE_PATCH, return_value=mock_svc): - response = client.get("/pools", headers=auth_headers) + response = client.get("/warmpools", headers=auth_headers) pool = response.json()["items"][0] assert pool["status"]["total"] == 5 @@ -321,16 +320,22 @@ def test_list_pools_response_has_status_fields(self, client: TestClient, auth_he # --------------------------------------------------------------------------- -# GET /pools/{pool_name} +# GET /warmpoolss/{pool_name} # --------------------------------------------------------------------------- class TestGetPoolRoute: + def test_get_pool_invalid_path_name_returns_422(self, client: TestClient, auth_headers: dict): + with patch(_POOL_SERVICE_PATCH) as mock_get: + mock_get.side_effect = AssertionError("pool service must not run when path validation fails") + response = client.get("/warmpools/Invalid_Name", headers=auth_headers) + assert response.status_code == 422 + def test_get_pool_success_returns_200(self, client: TestClient, auth_headers: dict): mock_svc = MagicMock() mock_svc.get_pool.return_value = _pool_response(name="my-pool") with patch(_POOL_SERVICE_PATCH, return_value=mock_svc): - response = client.get("/pools/my-pool", headers=auth_headers) + response = client.get("/warmpools/my-pool", headers=auth_headers) assert response.status_code == 200 assert response.json()["name"] == "my-pool" @@ -340,7 +345,7 @@ def test_get_pool_calls_service_with_correct_name(self, client: TestClient, auth mock_svc.get_pool.return_value = _pool_response() with patch(_POOL_SERVICE_PATCH, return_value=mock_svc): - client.get("/pools/target-pool", headers=auth_headers) + client.get("/warmpools/target-pool", headers=auth_headers) mock_svc.get_pool.assert_called_once_with("target-pool") @@ -354,7 +359,7 @@ def test_get_pool_not_found_returns_404(self, client: TestClient, auth_headers: }, ) with patch(_POOL_SERVICE_PATCH, return_value=mock_svc): - response = client.get("/pools/ghost", headers=auth_headers) + response = client.get("/warmpools/ghost", headers=auth_headers) assert response.status_code == 404 assert SandboxErrorCodes.K8S_POOL_NOT_FOUND in response.json()["code"] @@ -364,7 +369,7 @@ def test_get_pool_response_includes_capacity_spec(self, client: TestClient, auth mock_svc.get_pool.return_value = _pool_response(buffer_max=7, pool_max=50) with patch(_POOL_SERVICE_PATCH, return_value=mock_svc): - response = client.get("/pools/p", headers=auth_headers) + response = client.get("/warmpools/p", headers=auth_headers) cap = response.json()["capacitySpec"] assert cap["bufferMax"] == 7 @@ -372,7 +377,7 @@ def test_get_pool_response_includes_capacity_spec(self, client: TestClient, auth # --------------------------------------------------------------------------- -# PUT /pools/{pool_name} +# PUT /warmpools/{pool_name} # --------------------------------------------------------------------------- class TestUpdatePoolRoute: @@ -391,7 +396,7 @@ def test_update_pool_success_returns_200(self, client: TestClient, auth_headers: mock_svc.update_pool.return_value = _pool_response(buffer_max=5, pool_max=20) with patch(_POOL_SERVICE_PATCH, return_value=mock_svc): - response = client.put("/pools/my-pool", json=self._update_body(), headers=auth_headers) + response = client.put("/warmpools/my-pool", json=self._update_body(), headers=auth_headers) assert response.status_code == 200 assert response.json()["capacitySpec"]["bufferMax"] == 5 @@ -403,7 +408,7 @@ def test_update_pool_calls_service_with_name_and_request( mock_svc.update_pool.return_value = _pool_response() with patch(_POOL_SERVICE_PATCH, return_value=mock_svc): - client.put("/pools/target", json=self._update_body(buffer_max=9), headers=auth_headers) + client.put("/warmpools/target", json=self._update_body(buffer_max=9), headers=auth_headers) call_args = mock_svc.update_pool.call_args assert call_args.args[0] == "target" @@ -420,7 +425,7 @@ def test_update_pool_not_found_returns_404(self, client: TestClient, auth_header }, ) with patch(_POOL_SERVICE_PATCH, return_value=mock_svc): - response = client.put("/pools/x", json=self._update_body(), headers=auth_headers) + response = client.put("/warmpools/x", json=self._update_body(), headers=auth_headers) assert response.status_code == 404 @@ -428,7 +433,7 @@ def test_update_pool_missing_capacity_spec_returns_422( self, client: TestClient, auth_headers: dict ): with patch(_POOL_SERVICE_PATCH, return_value=MagicMock()): - response = client.put("/pools/p", json={}, headers=auth_headers) + response = client.put("/warmpools/p", json={}, headers=auth_headers) assert response.status_code == 422 def test_update_pool_negative_pool_max_returns_422( @@ -436,7 +441,7 @@ def test_update_pool_negative_pool_max_returns_422( ): with patch(_POOL_SERVICE_PATCH, return_value=MagicMock()): response = client.put( - "/pools/p", + "/warmpools/p", json={"capacitySpec": {"bufferMax": 1, "bufferMin": 0, "poolMax": -5, "poolMin": 0}}, headers=auth_headers, ) @@ -444,7 +449,7 @@ def test_update_pool_negative_pool_max_returns_422( # --------------------------------------------------------------------------- -# DELETE /pools/{pool_name} +# DELETE /warmpools/{pool_name} # --------------------------------------------------------------------------- class TestDeletePoolRoute: @@ -453,7 +458,7 @@ def test_delete_pool_success_returns_204(self, client: TestClient, auth_headers: mock_svc.delete_pool.return_value = None with patch(_POOL_SERVICE_PATCH, return_value=mock_svc): - response = client.delete("/pools/my-pool", headers=auth_headers) + response = client.delete("/warmpools/my-pool", headers=auth_headers) assert response.status_code == 204 assert response.content == b"" @@ -465,7 +470,7 @@ def test_delete_pool_calls_service_with_correct_name( mock_svc.delete_pool.return_value = None with patch(_POOL_SERVICE_PATCH, return_value=mock_svc): - client.delete("/pools/to-remove", headers=auth_headers) + client.delete("/warmpools/to-remove", headers=auth_headers) mock_svc.delete_pool.assert_called_once_with("to-remove") @@ -479,7 +484,7 @@ def test_delete_pool_not_found_returns_404(self, client: TestClient, auth_header }, ) with patch(_POOL_SERVICE_PATCH, return_value=mock_svc): - response = client.delete("/pools/gone", headers=auth_headers) + response = client.delete("/warmpools/gone", headers=auth_headers) assert response.status_code == 404 assert SandboxErrorCodes.K8S_POOL_NOT_FOUND in response.json()["code"] @@ -491,6 +496,6 @@ def test_delete_pool_service_error_returns_500(self, client: TestClient, auth_he detail={"code": SandboxErrorCodes.K8S_POOL_API_ERROR, "message": "err"}, ) with patch(_POOL_SERVICE_PATCH, return_value=mock_svc): - response = client.delete("/pools/p", headers=auth_headers) + response = client.delete("/warmpools/p", headers=auth_headers) assert response.status_code == 500 diff --git a/specs/warm-pool-api.yaml b/specs/warm-pool-api.yaml new file mode 100644 index 000000000..a0fb03b9a --- /dev/null +++ b/specs/warm-pool-api.yaml @@ -0,0 +1,457 @@ +openapi: 3.1.0 +info: + title: OpenSandbox WarmPool API + version: 0.1.0 + description: | + The WarmPool API manages pre-warmed sandbox resource pools on Kubernetes (CRD kind `Pool`). + WarmPools reduce cold-start latency by maintaining a set of ready pods; sandboxes can reference + a pool via `extensions.poolRef` during sandbox creation. + + These endpoints are only available when the server is configured with `runtime.type: kubernetes` + **and** `kubernetes.workload_provider` resolves to `batchsandbox` (WarmPool is used with + BatchSandbox workloads). Other runtimes or workload providers (e.g. `agent-sandbox`) return + HTTP `501 Not Implemented` with a machine-readable error body (`KUBERNETES::POOL_NOT_SUPPORTED`). + + ## Authentication + + API Key authentication is required for all operations: + + 1. **HTTP Header** + ``` + OPEN-SANDBOX-API-KEY: your-api-key + ``` + + 2. **Environment Variable** (for SDK clients) + ``` + OPEN_SANDBOX_API_KEY=your-api-key + ``` + + SDK clients will automatically pick up this environment variable. +servers: + - url: http://localhost:8080/v1 + description: Local development +security: + - apiKeyAuth: [] +tags: + - name: WarmPools + description: Create, list, update, and delete pre-warmed Kubernetes WarmPool resources +paths: + /warmpools: + get: + tags: [WarmPools] + summary: List WarmPools + description: | + Returns all WarmPool resources in the configured Kubernetes namespace. + + If the cluster returns HTTP 404 for the Pool list API (CRD not installed, wrong + API version, or similar), the server responds with **503** and + `KUBERNETES::POOL_LIST_UNAVAILABLE`—this is distinct from **200** with an empty + `items` array when the API is healthy but no pools exist. + responses: + '200': + description: List of WarmPools + content: + application/json: + schema: + $ref: '#/components/schemas/ListWarmPoolsResponse' + headers: + X-Request-ID: + $ref: '#/components/headers/XRequestId' + '401': + $ref: '#/components/responses/Unauthorized' + '501': + $ref: '#/components/responses/NotImplemented' + '503': + $ref: '#/components/responses/ServiceUnavailable' + '500': + $ref: '#/components/responses/InternalServerError' + post: + tags: [WarmPools] + summary: Create a WarmPool + description: | + Creates a Pool CRD that manages a set of pre-warmed pods. After creation, + sandboxes can reference the pool via `extensions.poolRef` in the Sandbox Lifecycle + API to benefit from reduced cold-start latency. + + ## Authentication + + API Key authentication is required via: + - `OPEN-SANDBOX-API-KEY: ` header + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateWarmPoolRequest' + examples: + minimal: + summary: Basic WarmPool with capacity and pod template + value: + name: my-warm-pool + template: + metadata: + labels: + app: sandbox-warm-pool + spec: + containers: + - name: worker + image: opensandbox/execd:latest + capacitySpec: + bufferMax: 2 + bufferMin: 1 + poolMax: 10 + poolMin: 2 + responses: + '201': + description: WarmPool created successfully + content: + application/json: + schema: + $ref: '#/components/schemas/WarmPool' + headers: + X-Request-ID: + $ref: '#/components/headers/XRequestId' + '400': + $ref: '#/components/responses/BadRequest' + '401': + $ref: '#/components/responses/Unauthorized' + '409': + $ref: '#/components/responses/Conflict' + '501': + $ref: '#/components/responses/NotImplemented' + '500': + $ref: '#/components/responses/InternalServerError' + '422': + $ref: '#/components/responses/UnprocessableEntity' + /warmpools/{warmPoolName}: + parameters: + - $ref: '#/components/parameters/WarmPoolName' + get: + tags: [WarmPools] + summary: Get a WarmPool by name + description: | + Returns the current WarmPool definition including capacity configuration and observed + runtime status (when reconciled). + responses: + '200': + description: WarmPool retrieved successfully + content: + application/json: + schema: + $ref: '#/components/schemas/WarmPool' + headers: + X-Request-ID: + $ref: '#/components/headers/XRequestId' + '401': + $ref: '#/components/responses/Unauthorized' + '404': + $ref: '#/components/responses/NotFound' + '501': + $ref: '#/components/responses/NotImplemented' + '500': + $ref: '#/components/responses/InternalServerError' + '422': + $ref: '#/components/responses/UnprocessableEntity' + put: + tags: [WarmPools] + summary: Update WarmPool capacity + description: | + Updates only the capacity configuration (`capacitySpec`: `bufferMax`, `bufferMin`, + `poolMax`, `poolMin`). To change the pod template, delete and recreate the WarmPool. + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/UpdateWarmPoolRequest' + examples: + scale: + summary: Adjust capacity bounds + value: + capacitySpec: + bufferMax: 3 + bufferMin: 1 + poolMax: 20 + poolMin: 4 + responses: + '200': + description: WarmPool capacity updated successfully + content: + application/json: + schema: + $ref: '#/components/schemas/WarmPool' + headers: + X-Request-ID: + $ref: '#/components/headers/XRequestId' + '400': + $ref: '#/components/responses/BadRequest' + '401': + $ref: '#/components/responses/Unauthorized' + '404': + $ref: '#/components/responses/NotFound' + '501': + $ref: '#/components/responses/NotImplemented' + '500': + $ref: '#/components/responses/InternalServerError' + '422': + $ref: '#/components/responses/UnprocessableEntity' + delete: + tags: [WarmPools] + summary: Delete a WarmPool + description: | + Deletes the Pool CRD. Pre-warmed pods managed by the warm pool are terminated by the + pool controller. + responses: + '204': + description: WarmPool deleted successfully + headers: + X-Request-ID: + $ref: '#/components/headers/XRequestId' + '401': + $ref: '#/components/responses/Unauthorized' + '404': + $ref: '#/components/responses/NotFound' + '501': + $ref: '#/components/responses/NotImplemented' + '500': + $ref: '#/components/responses/InternalServerError' + '422': + $ref: '#/components/responses/UnprocessableEntity' +components: + securitySchemes: + apiKeyAuth: + type: apiKey + in: header + name: OPEN-SANDBOX-API-KEY + description: | + API Key for authentication. Can be provided via: + 1. HTTP Header: OPEN-SANDBOX-API-KEY: your-api-key + 2. Environment variable: OPEN_SANDBOX_API_KEY (for SDK clients) + parameters: + WarmPoolName: + name: warmPoolName + in: path + required: true + description: Name of the WarmPool (Kubernetes resource name) + schema: + type: string + pattern: '^[a-z0-9]([-a-z0-9]*[a-z0-9])?$' + maxLength: 253 + headers: + XRequestId: + description: Unique request identifier for tracing + schema: + type: string + format: uuid + responses: + BadRequest: + description: The request was invalid or malformed + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + headers: + X-Request-ID: + $ref: '#/components/headers/XRequestId' + Unauthorized: + description: Authentication credentials are missing or invalid + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + headers: + X-Request-ID: + $ref: '#/components/headers/XRequestId' + NotFound: + description: The requested WarmPool does not exist + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + headers: + X-Request-ID: + $ref: '#/components/headers/XRequestId' + Conflict: + description: A WarmPool with the same name already exists + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + headers: + X-Request-ID: + $ref: '#/components/headers/XRequestId' + NotImplemented: + description: | + WarmPool management is not supported: e.g. `runtime.type` is not `kubernetes`, or + `kubernetes.workload_provider` is not `batchsandbox` (same error code in both cases). + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + headers: + X-Request-ID: + $ref: '#/components/headers/XRequestId' + ServiceUnavailable: + description: | + Pool list API returned 404 from Kubernetes (e.g. Pool CRD missing or API path wrong). + Error body uses code `KUBERNETES::POOL_LIST_UNAVAILABLE`. + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + headers: + X-Request-ID: + $ref: '#/components/headers/XRequestId' + InternalServerError: + description: An unexpected server error occurred + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + headers: + X-Request-ID: + $ref: '#/components/headers/XRequestId' + UnprocessableEntity: + description: | + Request validation failed. Typical cases: JSON body does not match the schema (POST/PUT), + or path parameter `warmPoolName` violates the Kubernetes resource name pattern. + The response follows FastAPI's default shape: JSON object with a `detail` array of + validation errors (each entry includes a location, message, and type). + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + headers: + X-Request-ID: + $ref: '#/components/headers/XRequestId' + schemas: + ValidationErrorItem: + type: object + description: Single validation issue (FastAPI / Pydantic). + additionalProperties: true + HTTPValidationError: + type: object + description: FastAPI default 422 response body. + properties: + detail: + type: array + description: List of validation errors. + items: + $ref: '#/components/schemas/ValidationErrorItem' + required: [detail] + additionalProperties: false + ErrorResponse: + type: object + description: | + Standard error response for all non-2xx HTTP responses. + HTTP status code indicates the error category; code and message provide details. + properties: + code: + type: string + description: | + Machine-readable error code (e.g., INVALID_REQUEST, NOT_FOUND, INTERNAL_ERROR). + Use this for programmatic error handling. + message: + type: string + description: Human-readable error message describing what went wrong and how to fix it. + required: [code, message] + additionalProperties: false + WarmPoolCapacitySpec: + type: object + description: Capacity configuration that controls the size of the WarmPool. + properties: + bufferMax: + type: integer + minimum: 0 + description: Maximum number of nodes kept in the warm buffer. + bufferMin: + type: integer + minimum: 0 + description: Minimum number of nodes that must remain in the buffer. + poolMax: + type: integer + minimum: 0 + description: Maximum total number of nodes allowed in the entire pool. + poolMin: + type: integer + minimum: 0 + description: Minimum total size of the pool. + required: [bufferMax, bufferMin, poolMax, poolMin] + additionalProperties: false + CreateWarmPoolRequest: + type: object + description: | + Request to create a new WarmPool. A WarmPool manages a set of + pre-warmed pods that can be rapidly allocated to sandboxes. + properties: + name: + type: string + description: Unique name for the WarmPool (must be a valid Kubernetes resource name). + pattern: '^[a-z0-9]([-a-z0-9]*[a-z0-9])?$' + maxLength: 253 + template: + type: object + description: | + Kubernetes PodTemplateSpec defining the pod configuration for pre-warmed nodes. + Follows the same schema as `spec.template` in a Kubernetes Deployment. + capacitySpec: + $ref: '#/components/schemas/WarmPoolCapacitySpec' + required: [name, template, capacitySpec] + additionalProperties: false + UpdateWarmPoolRequest: + type: object + description: | + Request to update an existing WarmPool's capacity configuration. Only capacity settings + can be updated after creation; changing the pod template requires recreating the WarmPool. + properties: + capacitySpec: + $ref: '#/components/schemas/WarmPoolCapacitySpec' + required: [capacitySpec] + additionalProperties: false + WarmPoolStatus: + type: object + description: Observed runtime state of a WarmPool. + properties: + total: + type: integer + description: Total number of nodes in the warm pool. + allocated: + type: integer + description: Number of nodes currently allocated to sandboxes. + available: + type: integer + description: Number of nodes currently available in the warm pool. + revision: + type: string + description: Latest revision identifier of the WarmPool. + required: [total, allocated, available, revision] + additionalProperties: false + WarmPool: + type: object + description: Full representation of a WarmPool resource. + properties: + name: + type: string + description: Unique WarmPool name. + capacitySpec: + $ref: '#/components/schemas/WarmPoolCapacitySpec' + status: + $ref: '#/components/schemas/WarmPoolStatus' + description: Observed runtime state of the WarmPool. May be absent if not yet reconciled. + createdAt: + type: string + format: date-time + description: WarmPool creation timestamp. + required: [name, capacitySpec] + additionalProperties: false + ListWarmPoolsResponse: + type: object + description: Collection of WarmPools. + properties: + items: + type: array + description: List of WarmPools. + items: + $ref: '#/components/schemas/WarmPool' + required: [items] + additionalProperties: false