diff --git a/.github/workflows/real-e2e.yml b/.github/workflows/real-e2e.yml index 8ab7fe749..ce785e29d 100644 --- a/.github/workflows/real-e2e.yml +++ b/.github/workflows/real-e2e.yml @@ -67,6 +67,9 @@ jobs: network_mode = "bridge" [storage] allowed_host_paths = ["/tmp/opensandbox-e2e"] + [renew_intent] + enabled = true + min_interval_seconds = 60 EOF ./scripts/python-e2e.sh diff --git a/components/ingress/README.md b/components/ingress/README.md index 51c4b48d2..e516396ca 100644 --- a/components/ingress/README.md +++ b/components/ingress/README.md @@ -76,7 +76,7 @@ wss://ingress.opensandbox.io/my-sandbox/8080/ws When enabled, the ingress publishes **renew-intent** events to a Redis list on each proxied request (after resolving the sandbox). The OpenSandbox server consumes these events and may extend sandbox expiration for sandboxes that opted in at creation time. See [OSEP-0009](https://github.com/alibaba/opensandbox/blob/main/oseps/0009-auto-renew-sandbox-on-ingress-access.md) for the full design. -**Requirements:** The server must have auto-renew and Redis consumer enabled; the sandbox must be created with `extensions["auto_renew_on_access"]="true"`. This feature is best-effort and disabled by default. +**Requirements:** The server must have `renew_intent` (and Redis consumer for ingress mode) enabled; the sandbox must opt in via `extensions["access.renew.extend.seconds"]` (decimal integer string between **300** and **86400** seconds, see OSEP-0009). This feature is best-effort and disabled by default. | Flag | Default | Description | |------|---------|-------------| diff --git a/oseps/0009-auto-renew-sandbox-on-ingress-access.md b/oseps/0009-auto-renew-sandbox-on-ingress-access.md index dec632d60..1e669a828 100644 --- a/oseps/0009-auto-renew-sandbox-on-ingress-access.md +++ b/oseps/0009-auto-renew-sandbox-on-ingress-access.md @@ -3,8 +3,8 @@ title: Auto-Renew Sandbox on Ingress Access authors: - "@Pangjiping" creation-date: 2026-03-15 -last-updated: 2026-03-19 -status: implementing +last-updated: 2026-03-23 +status: implemented --- # OSEP-0009: Auto-Renew Sandbox on Ingress Access @@ -74,7 +74,7 @@ An access-driven renewal mechanism is needed, but it must be strongly rate-contr - The implementation must work with existing lifecycle API and runtime providers. - Reverse proxy traffic must be the only trigger source for this proposal. - Auto-renew must be disabled unless all three conditions are met: - - server supports and enables auto-renew-on-access, + - server supports and enables `renew_intent`, - ingress supports and enables renew-intent signaling (for ingress mode), - sandbox creation request explicitly opts in via `extensions`. - Renewal requests must be bounded by deduplication and throttling controls. @@ -88,7 +88,7 @@ Add an "access renew controller" that converts proxy access signals into control - In server proxy mode, the server path handling proxied traffic submits local renew intents and performs internal renewal calls. - In ingress gateway mode, ingress publishes renew intents into Redis; OpenSandbox server consumes and executes controlled renewals. -- Both modes share the same renewal gate logic: opt-in check, eligibility window, cooldown, and per-sandbox in-flight deduplication. +- Both modes share the same renewal gate logic: opt-in check, sandbox state, server-side validity for each renew attempt, cooldown, and per-sandbox in-flight deduplication. At a high level, access traffic indicates activity, but only eligible events produce actual `renew-expiration` operations. @@ -103,10 +103,10 @@ At a high level, access traffic indicates activity, but only eligible events pro | Risk | Mitigation | | --- | --- | -| Renewal storms under high ingress QPS | Multi-stage gating: renew-window check + cooldown + in-flight dedupe | +| Renewal storms under high ingress QPS | Multi-stage gating: validity checks + cooldown + in-flight dedupe | | Duplicate renewals across server replicas | Redis lock keys for distributed dedupe in ingress mode; local dedupe in server proxy path | | Redis backlog growth in traffic spikes | Queue TTL, bounded consumer concurrency, and drop-on-overload policy | -| False negatives (active sandbox not renewed) | Configurable renew window and cooldown; metrics/alerts for missed renew opportunities | +| False negatives (active sandbox not renewed) | Server-side eligibility rules and cooldown; metrics/alerts for missed renew opportunities | | Added operational complexity | Feature flag rollout, default-off mode, and explicit docs/runbooks | ## Design Details @@ -133,9 +133,9 @@ Explicitly unsupported: This feature uses explicit "three-party handshake" activation. 1. **Server-side capability switch** - - `server.auto_renew_on_access.enabled = true` must be set (stored under `ServerConfig`). + - `renew_intent.enabled = true` must be set (top-level TOML section `[renew_intent]`, model field on root `AppConfig`). 2. **Ingress-side capability switch** (ingress mode only) - - ingress must be configured to publish renew-intents (`server.auto_renew_on_access.redis.enabled = true` and ingress integration enabled). + - ingress must be configured to publish renew-intents (`renew_intent.redis.enabled = true` and ingress integration enabled). 3. **Sandbox-level opt-in and duration** - sandbox must declare in `CreateSandboxRequest.extensions` how long each automatic renewal extends expiration (see below). Presence of a valid value opts the sandbox in. @@ -143,23 +143,23 @@ If any condition is missing, access events are ignored for renewal. Given current API schema (`extensions: Dict[str, str]`), this OSEP proposes: -- `extensions["access.renew.extend.seconds"]` = positive integer string (e.g. `"1800"`) +- `extensions["access.renew.extend.seconds"]` = decimal integer **string** in the inclusive range **300–86400** seconds (**5 minutes** to **24 hours**), e.g. `"1800"`. **Meaning:** When auto-renew on access is triggered for this sandbox, each renewal extends expiration by this many seconds. The key thus both opts the sandbox in and defines the per-renewal extension duration. **Behavior rules:** -- Missing key or invalid value (non-positive integer string) means no auto-renew on access for that sandbox. -- Valid value (e.g. `"1800"`) enables auto-renew subject to policy gating; each successful renewal uses `new_expires_at = now + (value of access.renew.extend.seconds)`. -- Invalid values are rejected at sandbox creation time with 4xx validation error. +- Missing key means no renew-on-access for that sandbox. +- If the key is present, the value must parse as an integer in **300–86400**; otherwise the create request fails with **400** (validated in the HTTP API layer via `validate_extensions` in `src/extensions/validation.py` before the runtime service runs). +- Valid value enables auto-renew subject to policy gating; each successful renewal uses `new_expires_at = now + (value of access.renew.extend.seconds)`. ### Control Strategy to Prevent Renewal Storms Both modes share the same strict control policy. An access event triggers renewal only when all checks pass: -1. **Opt-in check**: sandbox has a valid positive `access.renew.extend.seconds` in extensions. +1. **Opt-in check**: sandbox has `access.renew.extend.seconds` in extensions within **300–86400** (validated at creation). 2. **Sandbox state check**: sandbox must be `Running`. -3. **Renew window check**: remaining TTL must be below `before_expiration_seconds`. +3. **Validity check**: server decides whether the renewal attempt should proceed (e.g. `new_expires_at` meaningfully extends current expiration, lifecycle rules). There is **no** separate configurable “remaining TTL must be below N seconds” knob in server config. 4. **Cooldown check**: no successful renewal for this sandbox within `min_interval_seconds`. 5. **In-flight dedupe**: at most one renewal task per sandbox at a time. @@ -167,7 +167,7 @@ If any check fails, the event is acknowledged and dropped without a renewal call Renew target time: -- `new_expires_at = now + (value of extensions["access.renew.extend.seconds"])`; server may enforce a cap or default. +- `new_expires_at = now + (value of extensions["access.renew.extend.seconds"])`; the extension duration is taken only from the sandbox `extensions` (no server-side override or default for this value). - must also satisfy `new_expires_at > current_expires_at` before calling renew API This guarantees bounded renewal frequency even for very hot sandboxes. @@ -270,46 +270,44 @@ Producer (ingress): Consumer (server): - One or more workers block with `BRPOP opensandbox:renew:intent `. -- On pop: parse payload; if `now - observed_at > event_ttl_seconds`, drop and continue. -- Acquire lock: `SET opensandbox:renew:lock:{sandbox_id} NX EX lock_ttl_seconds`. -- If lock acquired: run gate checks (opt-in, state, window, cooldown) and maybe renew; then lock expires by TTL. +- On pop: parse payload; if the intent is older than a short implementation-defined max age (vs `observed_at`), drop and continue. +- Acquire lock: `SET opensandbox:renew:lock:{sandbox_id} NX EX ` using a short implementation-defined lock TTL. +- If lock acquired: run gate checks (opt-in, state, validity, cooldown) and maybe renew; then lock expires by TTL. - If lock not acquired: treat as in-flight dedupe, drop. - No ack or requeue: if the worker crashes after pop, that intent is lost (best-effort). Notes: -- Lock TTL must be short and greater than the renew critical section. +- Lock TTL and intent staleness thresholds are fixed in code (not Redis config); lock TTL must be short and greater than the renew critical section. - Implementations must use Redis List; this LPUSH/BRPOP + lock flow is the only specified processing model. ### Configuration -Use `server` configuration namespace; no independent top-level config block is required: +Use the root config file: lifecycle API settings stay under `[server]`; renew-on-access is a **separate top-level section** `[renew_intent]` (not nested under `[server]`), alongside `[runtime]`, `[docker]`, etc. ```toml [server] -auto_renew_on_access.enabled = false -auto_renew_on_access.before_expiration_seconds = 300 -auto_renew_on_access.extension_seconds = 1800 -auto_renew_on_access.min_interval_seconds = 60 - -# auto-detected by request path: -# - server-proxy path uses local trigger -# - ingress path uses redis trigger - -auto_renew_on_access.redis.enabled = false -auto_renew_on_access.redis.url = "redis://127.0.0.1:6379/0" -auto_renew_on_access.redis.queue_key = "opensandbox:renew:intent" -auto_renew_on_access.redis.lock_ttl_seconds = 10 -auto_renew_on_access.redis.event_ttl_seconds = 30 -auto_renew_on_access.redis.consumer_concurrency = 8 +# ... host, port, etc. + +# Auto-detected by request path: +# - server-proxy path uses local trigger (no Redis required) +# - ingress path uses Redis consumer when renew_intent.redis is enabled + +[renew_intent] +enabled = false +min_interval_seconds = 60 +redis.enabled = false +redis.dsn = "redis://127.0.0.1:6379/0" +redis.queue_key = "opensandbox:renew:intent" +redis.consumer_concurrency = 8 ``` Configuration rules: -- `server.auto_renew_on_access.enabled=false` means feature fully disabled. +- `renew_intent.enabled=false` means feature fully disabled. - Ingress path renewal requires Redis block enabled and reachable on the server; the **ingress component** uses its own config (e.g. CLI flags: `--renew-intent-enabled`, `--renew-intent-redis-dsn`, `--renew-intent-queue-key`, `--renew-intent-queue-max-len`, `--renew-intent-min-interval`) to connect to Redis and publish intents. Queue key and default list name should match what the server consumer expects (e.g. `opensandbox:renew:intent`). - Server proxy path can run without Redis. -- Feature is applied per sandbox only when `extensions["access.renew.extend.seconds"]` is present and a valid positive integer string. +- Per-renewal extension duration is **not** a server setting: it comes only from sandbox `extensions["access.renew.extend.seconds"]` (set at creation to **300–86400** seconds or creation fails with **400**). Omit the key to disable renew-on-access for that sandbox. - Docker runtime direct mode remains unsupported regardless of this config. Create request example: @@ -329,7 +327,7 @@ Create request example: - **Unit Tests** - Extension validation for auto-renew opt-in keys and values - - Renew eligibility function (window/cooldown/state checks) + - Renew eligibility function (validity/cooldown/state checks) - In-flight dedupe behavior under concurrent signals - Renew target time calculation and monotonicity checks - **Integration Tests (Server Proxy)** @@ -369,5 +367,5 @@ Success criteria: 2. Enable in server proxy path for canary validation. 3. Enable ingress + Redis path progressively. - Rollback: - - Disable `server.auto_renew_on_access.enabled` (and `server.auto_renew_on_access.redis.enabled` for ingress mode). + - Disable `renew_intent.enabled` (and `renew_intent.redis.enabled` for ingress mode). - Existing manual renewal flow remains unchanged. diff --git a/oseps/README.md b/oseps/README.md index 702f4656a..1886bf89f 100644 --- a/oseps/README.md +++ b/oseps/README.md @@ -14,4 +14,4 @@ This is the complete list of OpenSandbox Enhancement Proposals: | [OSEP-0006](0006-developer-console.md) | Developer Console for Sandbox Operations | implementable | 2026-03-06 | | [OSEP-0007](0007-fast-sandbox-runtime-support.md) | Fast Sandbox Runtime Support | provisional | 2026-02-08 | | [OSEP-0008](0008-pause-resume-rootfs-snapshot.md) | Pause and Resume via Rootfs Snapshot | draft | 2026-03-13 | -| [OSEP-0009](0009-auto-renew-sandbox-on-ingress-access.md) | Auto-Renew Sandbox on Ingress Access | implementing | 2026-03-18 | \ No newline at end of file +| [OSEP-0009](0009-auto-renew-sandbox-on-ingress-access.md) | Auto-Renew Sandbox on Ingress Access | implemented | 2026-03-23 | \ No newline at end of file diff --git a/server/README.md b/server/README.md index 95ee18b20..13d3bb5b7 100644 --- a/server/README.md +++ b/server/README.md @@ -149,6 +149,8 @@ The returned endpoint is rewritten to the server proxy route: Reference runtime compose file: - `server/docker-compose.example.yaml` +For **experimental** lifecycle options (e.g. auto-renew on access), see [Experimental features](#experimental-features) (after [Configuration reference](#configuration-reference)). + **Sandbox TTL configuration** - `timeout` requests must be at least 60 seconds. @@ -564,6 +566,35 @@ curl -X DELETE \ | `DOCKER_HOST` | Docker daemon URL (e.g., `unix:///var/run/docker.sock`) | | `PENDING_FAILURE_TTL` | TTL for failed pending sandboxes in seconds (default: 3600) | +## Experimental features + +Optional **🧪 experimental** capabilities; **off by default** in `server/example.config.toml` and `example.config.*.toml`. Check release notes before production. + +### Auto-renew on access + +Extends sandbox TTL when access is observed (via the lifecycle **server proxy** and/or **ingress**). Architecture, data flow, and tuning are in **[OSEP-0009](../oseps/0009-auto-renew-sandbox-on-ingress-access.md)**. + +**Server on/off** + +| Goal | What to do | +|------|------------| +| **Off (default)** | Keep `[renew_intent] enabled = false` in `~/.sandbox.toml` (see `example.config.toml`). | +| **On** | Set `[renew_intent] enabled = true`. For **ingress + Redis** mode, set `redis.enabled = true` and `redis.dsn` in the same `[renew_intent]` table (see OSEP-0009). | +| **Other keys** | `min_interval_seconds`, `queue_key`, `consumer_concurrency` — see OSEP-0009 and `[renew_intent]` in `example.config.toml`. | + +**Per sandbox** + +On **create**, set `extensions["access.renew.extend.seconds"]` to a string integer between **300** and **86400** (seconds). Omit the key to opt that sandbox out of renew-on-access (or leave renew_intent disabled globally). + +**Clients (SDK / HTTP)** + +- **Use the lifecycle server as proxy** so traffic goes to `/v1/sandboxes/{id}/proxy/{port}/...`: + - **REST**: request endpoints with `use_server_proxy=true`, e.g. `GET /v1/sandboxes/{id}/endpoints/{port}?use_server_proxy=true`. + - **SDK**: `ConnectionConfig(use_server_proxy=True)` or `ConnectionConfigSync(use_server_proxy=True)` (see SDK docs for `use_server_proxy`). +- **Ingress / gateway** path: deploy and route per OSEP-0009; clients use the gateway as usual. + +**Further reading**: [OSEP-0009](../oseps/0009-auto-renew-sandbox-on-ingress-access.md); sample keys under `[renew_intent]` in `server/example.config.toml`. + ## Development ### Code quality diff --git a/server/README_zh.md b/server/README_zh.md index 102e1c67b..a97d0ec5f 100644 --- a/server/README_zh.md +++ b/server/README_zh.md @@ -143,6 +143,8 @@ curl -H "OPEN-SANDBOX-API-KEY: your-secret-api-key" \ 可参考 Compose 运行示例: - `server/docker-compose.example.yaml` +**实验性**生命周期能力(例如按访问自动续期)见文末 [实验性功能](#实验性功能) 一节(位于 [配置参考](#配置参考) 之后)。 + **安全加固(适用于所有 Docker 模式)** ```toml [docker] @@ -539,6 +541,35 @@ curl -X DELETE \ | `DOCKER_HOST` | Docker 守护进程 URL(例如 `unix:///var/run/docker.sock`)| | `PENDING_FAILURE_TTL` | 失败的待处理沙箱的 TTL(秒,默认:3600)| +## 实验性功能 + +以下为**可选**的 **🧪 实验性**能力;在 `server/example.config.toml` 与各 `example.config.*.toml` 中**默认关闭**。生产启用前请阅读 **[OSEP-0009](../oseps/0009-auto-renew-sandbox-on-ingress-access.md)** 与发版说明。 + +### 按访问自动续期 + +在观测到访问时延长沙箱 TTL(经 Lifecycle **服务端代理** 和/或 **Ingress**)。设计、数据流与调参见 **[OSEP-0009](../oseps/0009-auto-renew-sandbox-on-ingress-access.md)**。 + +**服务端开关** + +| 目的 | 操作 | +|------|------| +| **关闭(默认)** | `~/.sandbox.toml` 中保持 `[renew_intent] enabled = false`(见 `example.config.zh.toml`)。 | +| **开启** | 设置 `[renew_intent] enabled = true`。若使用 **Ingress + Redis** 模式,在同一 `[renew_intent]` 表中设置 `redis.enabled = true` 与 `redis.dsn`(见 OSEP)。 | +| **其它配置项** | `min_interval_seconds`、`queue_key`、`consumer_concurrency` 等见 OSEP 与 `example.config.zh.toml` 的 `[renew_intent]`。 | + +**按沙箱接入** + +**创建**沙箱时在 `extensions` 中设置 `access.renew.extend.seconds`,值为 **300~86400** 的**字符串**整数(秒)。不设该键(或未开 renew_intent)则该沙箱不按访问续期。 + +**客户端(SDK / HTTP)** + +- **走 Lifecycle 服务端代理**,使请求经过 `/v1/sandboxes/{id}/proxy/{port}/...`: + - **REST**:获取端点时加 `use_server_proxy=true`,例如 `GET /v1/sandboxes/{id}/endpoints/{port}?use_server_proxy=true`。 + - **SDK**:`ConnectionConfig(use_server_proxy=True)` 或 `ConnectionConfigSync(use_server_proxy=True)`(详见 SDK 文档中的 `use_server_proxy`)。 +- **Ingress / 网关** 模式:按 OSEP 部署网关与路由,客户端按网关方式访问即可。 + +**延伸阅读**:[OSEP-0009](../oseps/0009-auto-renew-sandbox-on-ingress-access.md);配置样例见 `server/example.config.zh.toml` → `[renew_intent]`。 + ## 开发 ### 代码质量 diff --git a/server/example.config.k8s.toml b/server/example.config.k8s.toml index f0a049085..20ecaa576 100644 --- a/server/example.config.k8s.toml +++ b/server/example.config.k8s.toml @@ -28,6 +28,15 @@ port = 8080 log_level = "INFO" # api_key = "your-secret-api-key" # Optional: Uncomment to enable API key authentication +# 🧪 [EXPERIMENTAL] Renew-on-access. Off by default — see server/README.md. +[renew_intent] +enabled = false +min_interval_seconds = 60 +redis.enabled = false +# redis.dsn = "redis://127.0.0.1:6379/0" +redis.queue_key = "opensandbox:renew:intent" +redis.consumer_concurrency = 8 + [runtime] type = "kubernetes" execd_image = "opensandbox/execd:v1.0.7" diff --git a/server/example.config.k8s.zh.toml b/server/example.config.k8s.zh.toml index a61d752c1..2ad65e350 100644 --- a/server/example.config.k8s.zh.toml +++ b/server/example.config.k8s.zh.toml @@ -28,6 +28,15 @@ port = 8080 log_level = "INFO" # api_key = "your-secret-api-key" # Optional: Uncomment to enable API key authentication +# 🧪 [EXPERIMENTAL] 按访问续期。默认关闭 — 见 server/README_zh.md。 +[renew_intent] +enabled = false +min_interval_seconds = 60 +redis.enabled = false +# redis.dsn = "redis://127.0.0.1:6379/0" +redis.queue_key = "opensandbox:renew:intent" +redis.consumer_concurrency = 8 + [runtime] type = "kubernetes" execd_image = "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/execd:v1.0.7" diff --git a/server/example.config.toml b/server/example.config.toml index 0599a4d5d..c35dce6b7 100644 --- a/server/example.config.toml +++ b/server/example.config.toml @@ -27,6 +27,15 @@ log_level = "INFO" # Maximum TTL for sandboxes that specify timeout. Comment out this line to disable the upper bound. max_sandbox_timeout_seconds = 86400 +# 🧪 [EXPERIMENTAL] Renew-on-access (OSEP-0009). Off by default — see server/README.md. +[renew_intent] +enabled = false +min_interval_seconds = 60 +redis.enabled = false +# redis.dsn = "redis://127.0.0.1:6379/0" +redis.queue_key = "opensandbox:renew:intent" +redis.consumer_concurrency = 8 + [runtime] # Runtime selection (docker | kubernetes) # ----------------------------------------------------------------- diff --git a/server/example.config.zh.toml b/server/example.config.zh.toml index 8ba43df89..77848a878 100644 --- a/server/example.config.zh.toml +++ b/server/example.config.zh.toml @@ -24,6 +24,15 @@ port = 8080 log_level = "INFO" # api_key = "your-secret-api-key" # Optional: Uncomment to enable API key authentication +# 🧪 [EXPERIMENTAL] 按访问续期(OSEP-0009)。默认关闭 — 说明见 server/README_zh.md。 +[renew_intent] +enabled = false +min_interval_seconds = 60 +redis.enabled = false +# redis.dsn = "redis://127.0.0.1:6379/0" +redis.queue_key = "opensandbox:renew:intent" +redis.consumer_concurrency = 8 + [runtime] # Runtime selection (docker | kubernetes) # ----------------------------------------------------------------- diff --git a/server/pyproject.toml b/server/pyproject.toml index ca41b3305..b32b871b7 100644 --- a/server/pyproject.toml +++ b/server/pyproject.toml @@ -47,6 +47,7 @@ dependencies = [ "httpx[socks]", "kubernetes", "pydantic", + "redis>=5", "pydantic-settings", "pyyaml", "tomli; python_version < \"3.11\"", diff --git a/server/src/api/lifecycle.py b/server/src/api/lifecycle.py index cf08dd6a7..86c4b4a13 100644 --- a/server/src/api/lifecycle.py +++ b/server/src/api/lifecycle.py @@ -26,6 +26,7 @@ from fastapi.exceptions import HTTPException from fastapi.responses import Response, StreamingResponse +from src.extensions import validate_extensions from src.api.schema import ( CreateSandboxRequest, CreateSandboxResponse, @@ -103,7 +104,7 @@ async def create_sandbox( Raises: HTTPException: If sandbox creation scheduling fails """ - + validate_extensions(request.extensions) return await sandbox_service.create_sandbox(request) @@ -427,6 +428,10 @@ async def proxy_sandbox_endpoint_request(request: Request, sandbox_id: str, port endpoint = sandbox_service.get_endpoint(sandbox_id, port, resolve_internal=True) + proxy_renew = getattr(request.app.state, "proxy_renew_coordinator", None) + if proxy_renew is not None: + proxy_renew.schedule(sandbox_id) + target_host = endpoint.endpoint query_string = request.url.query diff --git a/server/src/cli.py b/server/src/cli.py index 23625f978..5d3232675 100644 --- a/server/src/cli.py +++ b/server/src/cli.py @@ -17,9 +17,12 @@ import argparse import os import shutil +import types from pathlib import Path +from typing import Any, FrozenSet, Union, get_args, get_origin import uvicorn +from pydantic import BaseModel from src.config import ( AgentSandboxRuntimeConfig, @@ -29,11 +32,30 @@ EgressConfig, IngressConfig, KubernetesRuntimeConfig, + RenewIntentConfig, RuntimeConfig, ServerConfig, StorageConfig, ) + +def _strip_optional(annotation: Any) -> Any: + """Unwrap Optional / Union[..., None] to the inner type.""" + if annotation is None: + return None + origin = get_origin(annotation) + args = get_args(annotation) + if origin is Union or origin is types.UnionType: + filtered = [a for a in args if a is not type(None)] + if len(filtered) == 1: + return filtered[0] + return annotation + + +def _is_basemodel_type(annotation: Any) -> bool: + inner = _strip_optional(annotation) + return isinstance(inner, type) and issubclass(inner, BaseModel) + EXAMPLE_FILE_MAP = { "docker": "example.config.toml", "docker-zh": "example.config.zh.toml", @@ -139,6 +161,7 @@ def _render_section( *, placeholders: dict[str, str] | None = None, extra_comments: list[str] | None = None, + dotted_nested: FrozenSet[str] | None = None, ) -> str: lines: list[str] = [] if extra_comments: @@ -146,8 +169,11 @@ def _render_section( lines.append(f"[{section}]") placeholders = placeholders or {} + dotted_nested = dotted_nested or frozenset() for field_name, field in model.model_fields.items(): + if _is_basemodel_type(field.annotation): + continue key = field.alias or field_name value = placeholders.get(key, _placeholder_for_field(field)) if field.description: @@ -155,6 +181,40 @@ def _render_section( lines.append(f"{key} = {value}") lines.append("") + for field_name, field in model.model_fields.items(): + if field_name not in dotted_nested or not _is_basemodel_type(field.annotation): + continue + inner = _strip_optional(field.annotation) + if not isinstance(inner, type) or not issubclass(inner, BaseModel): + continue + for sub_name, sub_field in inner.model_fields.items(): + sub_key = f"{field_name}.{sub_name}" + value = placeholders.get(sub_key, _placeholder_for_field(sub_field)) + if sub_field.description: + lines.append(f"# {sub_field.description}") + lines.append(f"{sub_key} = {value}") + lines.append("") + + nested_blocks: list[str] = [] + for field_name, field in model.model_fields.items(): + if not _is_basemodel_type(field.annotation): + continue + if field_name in dotted_nested: + continue + inner = _strip_optional(field.annotation) + if not isinstance(inner, type) or not issubclass(inner, BaseModel): + continue + nested_path = f"{section}.{field_name}" + nested_blocks.append( + _render_section(nested_path, inner, placeholders=None, extra_comments=None) + ) + + if nested_blocks: + if lines and lines[-1] == "": + lines.pop() + lines.append("") + lines.extend(nested_blocks) + if lines and lines[-1] == "": lines.pop() return "\n".join(lines) @@ -167,6 +227,15 @@ def _render_section( sections = [ "# Generated from OpenSandbox config schema. Remove sections you do not use.", _render_section("server", ServerConfig), + _render_section( + "renew_intent", + RenewIntentConfig, + extra_comments=[ + "Renew-intent: top-level section (not under [server]). " + "Redis options use dotted keys in this table (redis.enabled, redis.queue_key, …)." + ], + dotted_nested=frozenset({"redis"}), + ), _render_section("runtime", RuntimeConfig), _render_section("docker", DockerConfig), _render_section( diff --git a/server/src/config.py b/server/src/config.py index 201e4ee9c..0abc012e7 100644 --- a/server/src/config.py +++ b/server/src/config.py @@ -87,6 +87,71 @@ def _is_wildcard_domain(host: str) -> bool: return bool(_WILDCARD_DOMAIN_RE.match(host)) +class RenewIntentRedisConfig(BaseModel): + """🧪 [EXPERIMENTAL] Redis list consumer for renew-intent queue (ingress gateway path).""" + + enabled: bool = Field( + default=False, + description=( + "🧪 [EXPERIMENTAL] When true, server workers consume renew intents from Redis " + "(ingress gateway path)." + ), + ) + dsn: Optional[str] = Field( + default=None, + description=( + '🧪 [EXPERIMENTAL] Redis DSN (e.g. "redis://127.0.0.1:6379/0"). ' + "Required when redis.enabled is true." + ), + ) + queue_key: str = Field( + default="opensandbox:renew:intent", + min_length=1, + description="🧪 [EXPERIMENTAL] Redis List key for LPUSH/BRPOP renew-intent JSON payloads.", + ) + consumer_concurrency: int = Field( + default=8, + ge=1, + description="🧪 [EXPERIMENTAL] Number of concurrent BRPOP worker tasks.", + ) + + @model_validator(mode="after") + def require_dsn_when_redis_enabled(self) -> "RenewIntentRedisConfig": + if self.enabled and (self.dsn is None or not str(self.dsn).strip()): + raise ValueError( + "[renew_intent] redis.dsn must be set when redis.enabled is true." + ) + return self + + +class RenewIntentConfig(BaseModel): + """🧪 [EXPERIMENTAL] Renew sandbox expiration when access is observed (proxy and/or Redis queue).""" + + enabled: bool = Field( + default=False, + description=( + "🧪 [EXPERIMENTAL] Master switch for auto-renew on reverse-proxy access and/or Redis " + "ingress intents. When false, renew-intent logic is off." + ), + ) + min_interval_seconds: int = Field( + default=60, + ge=1, + description=( + "🧪 [EXPERIMENTAL] Minimum seconds between successful renewals for the same sandbox " + "(cooldown)." + ), + ) + redis: RenewIntentRedisConfig = Field( + default_factory=RenewIntentRedisConfig, + description=( + "🧪 [EXPERIMENTAL] Redis queue consumer for ingress gateway renew-intent mode. " + "In TOML, set keys under the same [renew_intent] table as redis.enabled, " + "redis.dsn, redis.queue_key, redis.consumer_concurrency (dotted keys)." + ), + ) + + class GatewayRouteModeConfig(BaseModel): """Routing strategy for gateway ingress exposure.""" @@ -494,6 +559,10 @@ class AppConfig(BaseModel): """Root application configuration model.""" server: ServerConfig = Field(default_factory=ServerConfig) + renew_intent: RenewIntentConfig = Field( + default_factory=RenewIntentConfig, + description="Auto-renew sandbox expiration when reverse-proxy access is observed.", + ) runtime: RuntimeConfig = Field(..., description="Sandbox runtime configuration.") kubernetes: Optional[KubernetesRuntimeConfig] = None agent_sandbox: Optional["AgentSandboxRuntimeConfig"] = None @@ -616,6 +685,8 @@ def get_config_path() -> Path: __all__ = [ "AppConfig", + "RenewIntentConfig", + "RenewIntentRedisConfig", "ServerConfig", "RuntimeConfig", "IngressConfig", diff --git a/server/src/extensions/__init__.py b/server/src/extensions/__init__.py new file mode 100644 index 000000000..36ac51719 --- /dev/null +++ b/server/src/extensions/__init__.py @@ -0,0 +1,37 @@ +# Copyright 2026 Alibaba Group Holding Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +CreateSandbox ``extensions`` shared logic: well-known keys, HTTP validation, workload storage codec. +""" + +from src.extensions.codec import apply_access_renew_extend_seconds_to_mapping +from src.extensions.keys import ( + ACCESS_RENEW_EXTEND_SECONDS_KEY, + ACCESS_RENEW_EXTEND_SECONDS_METADATA_KEY, +) +from src.extensions.validation import ( + ACCESS_RENEW_EXTEND_SECONDS_MAX, + ACCESS_RENEW_EXTEND_SECONDS_MIN, + validate_extensions, +) + +__all__ = [ + "ACCESS_RENEW_EXTEND_SECONDS_KEY", + "ACCESS_RENEW_EXTEND_SECONDS_METADATA_KEY", + "ACCESS_RENEW_EXTEND_SECONDS_MIN", + "ACCESS_RENEW_EXTEND_SECONDS_MAX", + "validate_extensions", + "apply_access_renew_extend_seconds_to_mapping", +] diff --git a/server/src/extensions/codec.py b/server/src/extensions/codec.py new file mode 100644 index 000000000..3f4ac8534 --- /dev/null +++ b/server/src/extensions/codec.py @@ -0,0 +1,44 @@ +# Copyright 2026 Alibaba Group Holding Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +from typing import Dict, MutableMapping, Optional + +from src.extensions.keys import ( + ACCESS_RENEW_EXTEND_SECONDS_KEY, + ACCESS_RENEW_EXTEND_SECONDS_METADATA_KEY, +) + + +def apply_access_renew_extend_seconds_to_mapping( + mapping: MutableMapping[str, str], + extensions: Optional[Dict[str, str]], + *, + metadata_key: str = ACCESS_RENEW_EXTEND_SECONDS_METADATA_KEY, +) -> None: + """ + If ``extensions`` include ``access.renew.extend.seconds``, set ``mapping[metadata_key]``. + + ``mapping`` may be Kubernetes annotations or Docker container labels. + """ + if not extensions: + return + raw = extensions.get(ACCESS_RENEW_EXTEND_SECONDS_KEY) + if raw is None: + return + s = str(raw).strip() + if not s: + return + mapping[metadata_key] = s diff --git a/server/src/extensions/keys.py b/server/src/extensions/keys.py new file mode 100644 index 000000000..7c77d92d8 --- /dev/null +++ b/server/src/extensions/keys.py @@ -0,0 +1,19 @@ +# Copyright 2026 Alibaba Group Holding Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Well-known CreateSandboxRequest.extensions keys and workload storage keys.""" + +ACCESS_RENEW_EXTEND_SECONDS_KEY = "access.renew.extend.seconds" +# Kubernetes annotation or Docker label value (plain seconds string). +ACCESS_RENEW_EXTEND_SECONDS_METADATA_KEY = "opensandbox.io/access-renew-extend-seconds" diff --git a/server/src/extensions/validation.py b/server/src/extensions/validation.py new file mode 100644 index 000000000..2287a76b8 --- /dev/null +++ b/server/src/extensions/validation.py @@ -0,0 +1,106 @@ +# Copyright 2026 Alibaba Group Holding Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +from typing import Dict, Optional + +from fastapi import HTTPException, status + +from src.extensions.keys import ACCESS_RENEW_EXTEND_SECONDS_KEY +from src.services.constants import SandboxErrorCodes + +ACCESS_RENEW_EXTEND_SECONDS_MIN = 300 # 5 minutes +ACCESS_RENEW_EXTEND_SECONDS_MAX = 86400 # 24 hours + + +def _validate_access_renew_extend_seconds(extensions: Dict[str, str]) -> None: + """ + If ``access.renew.extend.seconds`` is set, require a base-10 integer in [MIN, MAX] seconds. + + Args: + extensions: Non-empty extension map (may omit this key). + + Raises: + HTTPException: 400 when the key is present but invalid. + """ + key = ACCESS_RENEW_EXTEND_SECONDS_KEY + if key not in extensions: + return + raw = extensions[key] + if raw is None: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail={ + "code": SandboxErrorCodes.INVALID_PARAMETER, + "message": ( + f'Invalid extensions["{key}"]: expected a string of digits between ' + f"{ACCESS_RENEW_EXTEND_SECONDS_MIN} and {ACCESS_RENEW_EXTEND_SECONDS_MAX} " + "(5 minutes to 24 hours inclusive)." + ), + }, + ) + s = str(raw).strip() + if not s: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail={ + "code": SandboxErrorCodes.INVALID_PARAMETER, + "message": ( + f'Invalid extensions["{key}"]: empty value; omit the key to disable renew-on-access, ' + f"or use an integer between {ACCESS_RENEW_EXTEND_SECONDS_MIN} and " + f"{ACCESS_RENEW_EXTEND_SECONDS_MAX} seconds." + ), + }, + ) + try: + n = int(s) + except ValueError: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail={ + "code": SandboxErrorCodes.INVALID_PARAMETER, + "message": ( + f'Invalid extensions["{key}"]: must be a base-10 integer string ' + f"between {ACCESS_RENEW_EXTEND_SECONDS_MIN} and {ACCESS_RENEW_EXTEND_SECONDS_MAX}, got {raw!r}." + ), + }, + ) from None + if n < ACCESS_RENEW_EXTEND_SECONDS_MIN or n > ACCESS_RENEW_EXTEND_SECONDS_MAX: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail={ + "code": SandboxErrorCodes.INVALID_PARAMETER, + "message": ( + f'Invalid extensions["{key}"]: must be between {ACCESS_RENEW_EXTEND_SECONDS_MIN} and ' + f"{ACCESS_RENEW_EXTEND_SECONDS_MAX} seconds (5 minutes to 24 hours inclusive), got {n}." + ), + }, + ) + + +def validate_extensions(extensions: Optional[Dict[str, str]]) -> None: + """ + Validate well-known keys in ``extensions`` for sandbox creation. + + Args: + extensions: Optional opaque extension map from the create request. + + Raises: + HTTPException: 400 when a known key is present but invalid. + """ + if not extensions: + return + + _validate_access_renew_extend_seconds(extensions) diff --git a/server/src/integrations/__init__.py b/server/src/integrations/__init__.py new file mode 100644 index 000000000..97ce475ff --- /dev/null +++ b/server/src/integrations/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2026 Alibaba Group Holding Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""External system integrations (renew-intent Redis consumer, ...).""" diff --git a/server/src/integrations/renew_intent/__init__.py b/server/src/integrations/renew_intent/__init__.py new file mode 100644 index 000000000..3ed6ff7e4 --- /dev/null +++ b/server/src/integrations/renew_intent/__init__.py @@ -0,0 +1,23 @@ +# Copyright 2026 Alibaba Group Holding Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from src.integrations.renew_intent.consumer import RenewIntentConsumer, start_renew_intent_consumer +from src.integrations.renew_intent.runner import RenewIntentRunner, start_renew_intent_runner + +__all__ = [ + "RenewIntentConsumer", + "RenewIntentRunner", + "start_renew_intent_consumer", + "start_renew_intent_runner", +] diff --git a/server/src/integrations/renew_intent/constants.py b/server/src/integrations/renew_intent/constants.py new file mode 100644 index 000000000..6793869fe --- /dev/null +++ b/server/src/integrations/renew_intent/constants.py @@ -0,0 +1,22 @@ +# Copyright 2026 Alibaba Group Holding Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Drop intents whose observed_at is older than this (vs wall clock). +INTENT_MAX_AGE_SECONDS = 300 + +# BRPOP block timeout so workers periodically observe shutdown. +BRPOP_TIMEOUT_SECONDS = 5 + +# Server proxy renew: max sandbox_ids tracked (LRU); caps memory. +PROXY_RENEW_MAX_TRACKED_SANDBOXES = 8192 diff --git a/server/src/integrations/renew_intent/consumer.py b/server/src/integrations/renew_intent/consumer.py new file mode 100644 index 000000000..742381281 --- /dev/null +++ b/server/src/integrations/renew_intent/consumer.py @@ -0,0 +1,336 @@ +# Copyright 2026 Alibaba Group Holding Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Single renew-intent pipeline: Redis BRPOP feeders + proxy submits → one asyncio queue → processors.""" + +from __future__ import annotations + +import asyncio +import logging +import time +from collections import OrderedDict +from dataclasses import dataclass +from datetime import datetime, timezone +from functools import partial +from typing import TYPE_CHECKING, Optional + +from redis.exceptions import RedisError + +from src.config import AppConfig +from src.integrations.renew_intent.constants import ( + BRPOP_TIMEOUT_SECONDS, + INTENT_MAX_AGE_SECONDS, + PROXY_RENEW_MAX_TRACKED_SANDBOXES, +) +from src.integrations.renew_intent.controller import AccessRenewController +from src.integrations.renew_intent.intent import parse_renew_intent_json +from src.integrations.renew_intent.logutil import ( + RENEW_EVENT_WORKERS_NOT_STARTED, + RENEW_EVENT_WORKERS_STARTED, + RENEW_SOURCE_REDIS_QUEUE, + RENEW_SOURCE_SERVER_PROXY, + renew_bundle, +) +from src.integrations.renew_intent.redis_client import connect_renew_intent_redis_from_config +from src.services.extension_service import ExtensionService, require_extension_service +from src.services.factory import create_sandbox_service +from src.services.sandbox_service import SandboxService + +if TYPE_CHECKING: + from redis.asyncio import Redis + +logger = logging.getLogger(__name__) + + +@dataclass(frozen=True) +class RenewWorkItem: + """One unit of work for the shared renew pipeline.""" + + source: str + sandbox_id: str + observed_at: datetime + + +@dataclass +class _MemSandboxState: + lock: asyncio.Lock + last_success_monotonic: float | None = None + + +class RenewIntentConsumer: + """ + Feeds renew work from Redis BRPOP (optional) and server-proxy ``schedule`` into one queue. + Per-sandbox ``asyncio.Lock`` serializes work; without Redis, ``min_interval`` throttles proxy + renews (ingress throttling is producer-side). + """ + + def __init__( + self, + app_config: AppConfig, + sandbox_service: SandboxService, + extension_service: ExtensionService, + redis_client: Optional["Redis"], + ) -> None: + self._app_config = app_config + self._redis = redis_client + ri = app_config.renew_intent + self._queue_key = ri.redis.queue_key + self._feeder_count = ri.redis.consumer_concurrency if redis_client else 0 + self._processor_count = max(1, ri.redis.consumer_concurrency) + self._min_interval = float(ri.min_interval_seconds) + self._controller = AccessRenewController(sandbox_service, extension_service) + self._work_queue: asyncio.Queue[RenewWorkItem] = asyncio.Queue() + self._stop = asyncio.Event() + self._tasks: list[asyncio.Task[None]] = [] + self._mem_states: OrderedDict[str, _MemSandboxState] = OrderedDict() + self._max_tracked = PROXY_RENEW_MAX_TRACKED_SANDBOXES + + @classmethod + async def start( + cls, + app_config: AppConfig, + sandbox_service: SandboxService, + extension_service: ExtensionService, + ) -> Optional["RenewIntentConsumer"]: + if not app_config.renew_intent.enabled: + return None + + redis_client: Optional["Redis"] = None + if app_config.renew_intent.redis.enabled: + try: + redis_client = await connect_renew_intent_redis_from_config(app_config) + except (RedisError, OSError, TimeoutError) as exc: + line, ex = renew_bundle( + event=RENEW_EVENT_WORKERS_NOT_STARTED, + source=RENEW_SOURCE_REDIS_QUEUE, + skip_reason="redis_connect_failed", + error_type=type(exc).__name__, + ) + logger.error(f"renew_intent {line} error={exc!s}", extra=ex) + redis_client = None + if redis_client is None and app_config.renew_intent.redis.enabled: + line, ex = renew_bundle( + event=RENEW_EVENT_WORKERS_NOT_STARTED, + source=RENEW_SOURCE_REDIS_QUEUE, + skip_reason="redis_client_none", + ) + logger.warning( + f"renew_intent {line}; continuing with proxy-only renew pipeline", + extra=ex, + ) + + consumer = cls(app_config, sandbox_service, extension_service, redis_client) + consumer._spawn_tasks() + if redis_client is not None: + line, ex = renew_bundle( + event=RENEW_EVENT_WORKERS_STARTED, + source=RENEW_SOURCE_REDIS_QUEUE, + worker_count=consumer._feeder_count + consumer._processor_count, + queue_key=consumer._queue_key, + ) + logger.info( + f"🧪 [EXPERIMENTAL] renew_intent is enabled: Redis BRPOP feeders + " + f"unified processors started ({line})", + extra=ex, + ) + else: + logger.info( + "🧪 [EXPERIMENTAL] renew_intent is enabled: unified in-process renew pipeline " + "(proxy path only; no Redis BRPOP)" + ) + return consumer + + def submit_from_proxy(self, sandbox_id: str) -> None: + """Enqueue renew work from ``/sandboxes/.../proxy/...`` (non-blocking).""" + if not self._app_config.renew_intent.enabled: + return + asyncio.create_task( + self._enqueue_proxy(sandbox_id), + name=f"renew_intent_proxy_enqueue_{sandbox_id}", + ) + + async def _enqueue_proxy(self, sandbox_id: str) -> None: + await self._work_queue.put( + RenewWorkItem( + source=RENEW_SOURCE_SERVER_PROXY, + sandbox_id=sandbox_id, + observed_at=datetime.now(timezone.utc), + ) + ) + + def _spawn_tasks(self) -> None: + for i in range(self._processor_count): + self._tasks.append( + asyncio.create_task( + self._processor_loop(i), + name=f"renew_intent_processor_{i}", + ) + ) + for i in range(self._feeder_count): + self._tasks.append( + asyncio.create_task( + self._brpop_feeder_loop(i), + name=f"renew_intent_brpop_{i}", + ) + ) + + @staticmethod + def _is_stale(observed_at: datetime) -> bool: + now = datetime.now(timezone.utc) + age = (now - observed_at).total_seconds() + return age > INTENT_MAX_AGE_SECONDS + + def _ensure_mru_mem(self, sandbox_id: str) -> _MemSandboxState: + if sandbox_id in self._mem_states: + st = self._mem_states[sandbox_id] + self._mem_states.move_to_end(sandbox_id) + else: + st = _MemSandboxState(lock=asyncio.Lock()) + self._mem_states[sandbox_id] = st + self._mem_states.move_to_end(sandbox_id) + self._evict_mem_lru_unlocked() + return st + + def _evict_mem_lru_unlocked(self) -> None: + rotations = 0 + max_rotations = max(len(self._mem_states), 1) + while len(self._mem_states) > self._max_tracked and rotations < max_rotations: + k, st = self._mem_states.popitem(last=False) + if st.lock.locked(): + self._mem_states[k] = st + self._mem_states.move_to_end(k) + rotations += 1 + else: + rotations = 0 + + async def _brpop_feeder_loop(self, worker_id: int) -> None: + assert self._redis is not None + while not self._stop.is_set(): + try: + result = await self._redis.brpop( + self._queue_key, + BRPOP_TIMEOUT_SECONDS, + ) + except asyncio.CancelledError: + raise + except (RedisError, OSError) as exc: + line, ex = renew_bundle( + event="worker_redis_error", + source=RENEW_SOURCE_REDIS_QUEUE, + worker_id=worker_id, + error_type=type(exc).__name__, + ) + logger.warning(f"renew_intent {line} error={exc!s}", extra=ex) + await asyncio.sleep(1.0) + continue + + if result is None: + continue + _, payload = result + if not isinstance(payload, str): + continue + try: + intent = parse_renew_intent_json(payload) + if intent is None: + continue + if self._is_stale(intent.observed_at): + continue + await self._work_queue.put( + RenewWorkItem( + source=RENEW_SOURCE_REDIS_QUEUE, + sandbox_id=intent.sandbox_id, + observed_at=intent.observed_at, + ) + ) + except Exception as exc: + line, ex = renew_bundle( + event="worker_handle_error", + source=RENEW_SOURCE_REDIS_QUEUE, + worker_id=worker_id, + error_type=type(exc).__name__, + ) + logger.exception(f"renew_intent {line} error={exc!s}", extra=ex) + + async def _processor_loop(self, worker_id: int) -> None: + while not self._stop.is_set(): + try: + work = await asyncio.wait_for(self._work_queue.get(), timeout=1.0) + except asyncio.TimeoutError: + continue + except asyncio.CancelledError: + raise + try: + await self._process_work(work) + except Exception as exc: + line, ex = renew_bundle( + event="processor_error", + source=work.source, + sandbox_id=work.sandbox_id, + worker_id=worker_id, + error_type=type(exc).__name__, + ) + logger.exception(f"renew_intent {line} error={exc!s}", extra=ex) + finally: + self._work_queue.task_done() + + async def _process_work(self, work: RenewWorkItem) -> None: + if self._redis is None and work.source != RENEW_SOURCE_SERVER_PROXY: + return + + st = self._ensure_mru_mem(work.sandbox_id) + async with st.lock: + if self._redis is not None: + await self._controller.renew_after_gates(work.sandbox_id, source=work.source) + return + + now = time.monotonic() + if ( + st.last_success_monotonic is not None + and (now - st.last_success_monotonic) < self._min_interval + ): + return + ok = await asyncio.to_thread( + partial( + self._controller.attempt_renew_sync, + work.sandbox_id, + source=work.source, + ) + ) + if ok: + st.last_success_monotonic = time.monotonic() + + async def stop(self) -> None: + self._stop.set() + for t in self._tasks: + t.cancel() + await asyncio.gather(*self._tasks, return_exceptions=True) + self._tasks.clear() + if self._redis is not None: + try: + await self._redis.aclose() + except Exception as exc: + logger.debug(f"renew_intent redis_close error={exc!s}") + + +async def start_renew_intent_consumer( + app_config: AppConfig, + sandbox_service: SandboxService | None = None, + extension_service: ExtensionService | None = None, +) -> Optional[RenewIntentConsumer]: + """Start consumer or ``None`` when ``renew_intent.enabled`` is false.""" + if sandbox_service is None: + sandbox_service = create_sandbox_service(config=app_config) + if extension_service is None: + extension_service = require_extension_service(sandbox_service) + return await RenewIntentConsumer.start(app_config, sandbox_service, extension_service) diff --git a/server/src/integrations/renew_intent/controller.py b/server/src/integrations/renew_intent/controller.py new file mode 100644 index 000000000..d65bff53b --- /dev/null +++ b/server/src/integrations/renew_intent/controller.py @@ -0,0 +1,129 @@ +# Copyright 2026 Alibaba Group Holding Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Renew access path: eligibility checks then ``renew_expiration``.""" + +from __future__ import annotations + +import asyncio +import logging +from datetime import datetime, timedelta, timezone +from typing import TYPE_CHECKING + +from fastapi import HTTPException + +from src.api.schema import RenewSandboxExpirationRequest +from src.integrations.renew_intent.intent import RenewIntent +from src.integrations.renew_intent.logutil import ( + RENEW_EVENT_FAILED, + RENEW_EVENT_SUCCEEDED, + RENEW_SOURCE_REDIS_QUEUE, + RENEW_SOURCE_SERVER_PROXY, + renew_bundle, +) + +if TYPE_CHECKING: + from src.services.extension_service import ExtensionService + from src.services.sandbox_service import SandboxService + +logger = logging.getLogger(__name__) + + +def _http_detail_str(detail: object) -> str: + if isinstance(detail, dict): + return str(detail.get("message", detail)) + return str(detail) + + +class AccessRenewController: + """Eligibility gates and ``renew_expiration``; rate limiting is expected upstream (ingress / proxy).""" + + def __init__( + self, + sandbox_service: "SandboxService", + extension_service: "ExtensionService", + ) -> None: + self._sandbox_service = sandbox_service + self._extension_service = extension_service + + def _try_renew_sync(self, sandbox_id: str, *, source: str) -> bool: + try: + sandbox = self._sandbox_service.get_sandbox(sandbox_id) + except HTTPException: + return False + + if sandbox.status.state.lower() != "running": + return False + + if sandbox.expires_at is None: + return False + + extend = self._extension_service.get_access_renew_extend_seconds(sandbox_id) + if extend is None: + return False + + now = datetime.now(timezone.utc) + current = sandbox.expires_at + if current.tzinfo is None: + current = current.replace(tzinfo=timezone.utc) + + candidate = now + timedelta(seconds=extend) + new_expires = max(candidate, current) + + req = RenewSandboxExpirationRequest(expires_at=new_expires) + try: + self._sandbox_service.renew_expiration(sandbox_id, req) + except HTTPException as exc: + detail_s = _http_detail_str(exc.detail) + line, ex = renew_bundle( + event=RENEW_EVENT_FAILED, + source=source, + sandbox_id=sandbox_id, + skip_reason="renew_expiration_rejected", + http_detail=detail_s, + http_status=getattr(exc, "status_code", None), + ) + logger.warning(f"renew_intent {line} detail={detail_s}", extra=ex) + return False + except Exception as exc: + line, ex = renew_bundle( + event=RENEW_EVENT_FAILED, + source=source, + sandbox_id=sandbox_id, + skip_reason="renew_expiration_error", + error_type=type(exc).__name__, + ) + logger.exception(f"renew_intent {line}", extra=ex) + return False + + new_expires_iso = new_expires.isoformat() + line, ex = renew_bundle( + event=RENEW_EVENT_SUCCEEDED, + source=source, + sandbox_id=sandbox_id, + new_expires_at=new_expires_iso, + ) + logger.info(f"renew_intent {line}", extra=ex) + return True + + def attempt_renew_sync(self, sandbox_id: str, *, source: str = RENEW_SOURCE_SERVER_PROXY) -> bool: + """Run gates + renew (sync).""" + return self._try_renew_sync(sandbox_id, source=source) + + async def renew_after_gates(self, sandbox_id: str, *, source: str) -> None: + """Run renew in a worker thread (caller holds per-sandbox serialization).""" + await asyncio.to_thread(self._try_renew_sync, sandbox_id, source=source) + + async def process_intent_after_lock(self, intent: RenewIntent) -> None: + await self.renew_after_gates(intent.sandbox_id, source=RENEW_SOURCE_REDIS_QUEUE) diff --git a/server/src/integrations/renew_intent/intent.py b/server/src/integrations/renew_intent/intent.py new file mode 100644 index 000000000..633e63480 --- /dev/null +++ b/server/src/integrations/renew_intent/intent.py @@ -0,0 +1,86 @@ +# Copyright 2026 Alibaba Group Holding Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Renew-intent JSON (matches components/ingress/pkg/renewintent Intent).""" + +from __future__ import annotations + +import json +from dataclasses import dataclass +from datetime import datetime, timezone +from typing import Any, Optional + + +@dataclass(frozen=True) +class RenewIntent: + sandbox_id: str + observed_at: datetime + port: Optional[int] = None + request_uri: Optional[str] = None + + +def _parse_rfc3339_time(value: str) -> Optional[datetime]: + s = value.strip() + if s.endswith("Z"): + s = s[:-1] + "+00:00" + # Ingress uses Go RFC3339Nano (up to 9 fractional digits); CPython fromisoformat allows at most 6. + dot = s.find(".") + if dot != -1: + end = dot + 1 + while end < len(s) and s[end].isdigit(): + end += 1 + frac = s[dot + 1 : end] + if len(frac) > 6: + s = s[: dot + 1] + frac[:6] + s[end:] + try: + dt = datetime.fromisoformat(s) + except ValueError: + return None + if dt.tzinfo is None: + dt = dt.replace(tzinfo=timezone.utc) + return dt + + +def parse_renew_intent_json(raw: str) -> Optional[RenewIntent]: + """Parse ingress LPUSH JSON payload; return ``None`` if invalid.""" + try: + data: dict[str, Any] = json.loads(raw) + except (json.JSONDecodeError, TypeError): + return None + sid = data.get("sandbox_id") + if not isinstance(sid, str) or not sid.strip(): + return None + obs_raw = data.get("observed_at") + if not isinstance(obs_raw, str) or not obs_raw.strip(): + return None + observed_at = _parse_rfc3339_time(obs_raw) + if observed_at is None: + return None + + port: Optional[int] = None + if "port" in data and data["port"] is not None: + try: + port = int(data["port"]) + except (TypeError, ValueError): + port = None + + uri = data.get("request_uri") + request_uri = uri if isinstance(uri, str) else None + + return RenewIntent( + sandbox_id=sid.strip(), + observed_at=observed_at, + port=port, + request_uri=request_uri, + ) diff --git a/server/src/integrations/renew_intent/logutil.py b/server/src/integrations/renew_intent/logutil.py new file mode 100644 index 000000000..94132da10 --- /dev/null +++ b/server/src/integrations/renew_intent/logutil.py @@ -0,0 +1,59 @@ +# Copyright 2026 Alibaba Group Holding Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Structured ``renew_*`` keys for ``logging`` ``extra`` and message suffix lines.""" + +from __future__ import annotations + +from typing import Any + +RENEW_SOURCE_SERVER_PROXY = "server_proxy" +RENEW_SOURCE_REDIS_QUEUE = "redis_queue" + +RENEW_EVENT_SUCCEEDED = "renew_succeeded" +RENEW_EVENT_FAILED = "renew_failed" +RENEW_EVENT_TASK_FAILED = "renew_task_failed" +RENEW_EVENT_WORKERS_STARTED = "workers_started" +RENEW_EVENT_WORKERS_NOT_STARTED = "workers_not_started" +RENEW_EVENT_REDIS_CONNECTED = "redis_connected" + + +def _renew_extra( + *, + event: str, + source: str, + sandbox_id: str | None = None, + skip_reason: str | None = None, + **fields: Any, +) -> dict[str, Any]: + out: dict[str, Any] = { + "renew_event": event, + "renew_source": source, + } + if sandbox_id is not None: + out["renew_sandbox_id"] = sandbox_id + if skip_reason is not None: + out["renew_skip_reason"] = skip_reason + for k, v in fields.items(): + if v is not None: + key = k if k.startswith("renew_") else f"renew_{k}" + out[key] = v + return out + + +def renew_bundle(**kwargs: Any) -> tuple[str, dict[str, Any]]: + """``(k=v line, extra dict)`` for one log call.""" + extra = _renew_extra(**kwargs) + line = " ".join(f"{k}={extra[k]!s}" for k in sorted(extra.keys())) + return line, extra diff --git a/server/src/integrations/renew_intent/proxy_renew.py b/server/src/integrations/renew_intent/proxy_renew.py new file mode 100644 index 000000000..9204c679c --- /dev/null +++ b/server/src/integrations/renew_intent/proxy_renew.py @@ -0,0 +1,40 @@ +# Copyright 2026 Alibaba Group Holding Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Server proxy path: enqueue renew work into ``RenewIntentConsumer`` (non-blocking).""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + from src.config import AppConfig + from src.integrations.renew_intent.consumer import RenewIntentConsumer + + +class ProxyRenewCoordinator: + """Forward ``/sandboxes/{id}/proxy/...`` hits into the unified renew consumer.""" + + def __init__( + self, + app_config: "AppConfig", + consumer: Optional["RenewIntentConsumer"], + ) -> None: + self._app_config = app_config + self._consumer = consumer + + def schedule(self, sandbox_id: str) -> None: + if not self._app_config.renew_intent.enabled or self._consumer is None: + return + self._consumer.submit_from_proxy(sandbox_id) diff --git a/server/src/integrations/renew_intent/redis_client.py b/server/src/integrations/renew_intent/redis_client.py new file mode 100644 index 000000000..e8e589a36 --- /dev/null +++ b/server/src/integrations/renew_intent/redis_client.py @@ -0,0 +1,66 @@ +# Copyright 2026 Alibaba Group Holding Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Async Redis client for renew-intent queue consumers.""" + +from __future__ import annotations + +import logging +from typing import Optional + +import redis.asyncio as redis_async +from redis.asyncio import Redis + +from src.config import AppConfig +from src.integrations.renew_intent.logutil import ( + RENEW_EVENT_REDIS_CONNECTED, + RENEW_SOURCE_REDIS_QUEUE, + renew_bundle, +) + +logger = logging.getLogger(__name__) + + +async def connect_renew_intent_redis_from_config( + app_config: AppConfig, +) -> Optional[Redis]: + """Connect (with ``PING``) or ``None`` if renew-intent Redis is disabled.""" + ri = app_config.renew_intent + if not ri.enabled or not ri.redis.enabled: + return None + + dsn = ri.redis.dsn + if dsn is None or not str(dsn).strip(): + return None + + client = redis_async.from_url( + str(dsn).strip(), + decode_responses=True, + ) + await client.ping() + line, ex = renew_bundle( + event=RENEW_EVENT_REDIS_CONNECTED, + source=RENEW_SOURCE_REDIS_QUEUE, + queue_key=ri.redis.queue_key, + consumer_concurrency=ri.redis.consumer_concurrency, + ) + logger.info(f"renew_intent {line}", extra=ex) + return client + + +async def close_renew_intent_redis_client(client: Optional[Redis]) -> None: + """Close client; no-op for ``None``.""" + if client is None: + return + await client.aclose() diff --git a/server/src/integrations/renew_intent/runner.py b/server/src/integrations/renew_intent/runner.py new file mode 100644 index 000000000..b72b4de48 --- /dev/null +++ b/server/src/integrations/renew_intent/runner.py @@ -0,0 +1,39 @@ +# Copyright 2026 Alibaba Group Holding Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Backward-compatible aliases for the unified renew-intent consumer.""" + +from __future__ import annotations + +from typing import Optional + +from src.config import AppConfig +from src.integrations.renew_intent.consumer import ( + RenewIntentConsumer, + start_renew_intent_consumer, +) +from src.services.extension_service import ExtensionService +from src.services.sandbox_service import SandboxService + +RenewIntentRunner = RenewIntentConsumer + + +async def start_renew_intent_runner( + app_config: AppConfig, + sandbox_service: SandboxService | None = None, + extension_service: ExtensionService | None = None, +) -> Optional[RenewIntentConsumer]: + return await start_renew_intent_consumer( + app_config, sandbox_service, extension_service + ) diff --git a/server/src/main.py b/server/src/main.py index ee6a89b96..0b18f7738 100644 --- a/server/src/main.py +++ b/server/src/main.py @@ -31,6 +31,7 @@ from fastapi.responses import JSONResponse from src.config import load_config +from src.integrations.renew_intent import start_renew_intent_consumer from uvicorn.config import LOGGING_CONFIG as UVICORN_LOGGING_CONFIG # Load configuration before initializing routers/middleware @@ -69,9 +70,11 @@ getattr(logging, app_config.server.log_level.upper(), logging.INFO) ) -from src.api.lifecycle import router # noqa: E402 +from src.api.lifecycle import router, sandbox_service # noqa: E402 +from src.integrations.renew_intent.proxy_renew import ProxyRenewCoordinator # noqa: E402 from src.middleware.auth import AuthMiddleware # noqa: E402 from src.middleware.request_id import RequestIdMiddleware # noqa: E402 +from src.services.extension_service import require_extension_service # noqa: E402 from src.services.runtime_resolver import ( # noqa: E402 validate_secure_runtime_on_startup, ) @@ -110,7 +113,24 @@ async def lifespan(app: FastAPI): logger.error("Secure runtime validation failed: %s", exc) raise + ext = require_extension_service(sandbox_service) + app.state.renew_intent_consumer = await start_renew_intent_consumer( + app_config, + sandbox_service, + ext, + ) + app.state.renew_intent_runner = app.state.renew_intent_consumer + + app.state.proxy_renew_coordinator = ProxyRenewCoordinator( + app_config, + app.state.renew_intent_consumer, + ) + yield + + consumer = getattr(app.state, "renew_intent_consumer", None) + if consumer is not None: + await consumer.stop() await app.state.http_client.aclose() diff --git a/server/src/services/__init__.py b/server/src/services/__init__.py index 0ec10d341..3fa510a3c 100644 --- a/server/src/services/__init__.py +++ b/server/src/services/__init__.py @@ -15,12 +15,15 @@ """Sandbox service implementations.""" from src.services.docker import DockerSandboxService +from src.services.extension_service import ExtensionService, require_extension_service from src.services.k8s.kubernetes_service import KubernetesSandboxService from src.services.factory import create_sandbox_service from src.services.sandbox_service import SandboxService __all__ = [ "SandboxService", + "ExtensionService", + "require_extension_service", "DockerSandboxService", "KubernetesSandboxService", "create_sandbox_service", diff --git a/server/src/services/docker.py b/server/src/services/docker.py index a08004d75..1b1d9e01e 100644 --- a/server/src/services/docker.py +++ b/server/src/services/docker.py @@ -43,6 +43,10 @@ from docker.errors import DockerException, ImageNotFound, NotFound as DockerNotFound from fastapi import HTTPException, status +from src.extensions import ( + ACCESS_RENEW_EXTEND_SECONDS_METADATA_KEY, + apply_access_renew_extend_seconds_to_mapping, +) from src.api.schema import ( CreateSandboxRequest, CreateSandboxResponse, @@ -58,6 +62,7 @@ SandboxStatus, ) from src.config import AppConfig, get_config +from src.services.extension_service import ExtensionService from src.services.constants import ( EGRESS_MODE_ENV, EGRESS_RULES_ENV, @@ -123,7 +128,7 @@ class PendingSandbox: status: SandboxStatus -class DockerSandboxService(OSSFSMixin, SandboxService): +class DockerSandboxService(OSSFSMixin, SandboxService, ExtensionService): """ Docker-based implementation of SandboxService. @@ -610,7 +615,13 @@ def _container_to_sandbox(self, container, sandbox_id: Optional[str] = None) -> metadata = { key: value for key, value in labels.items() - if key not in {SANDBOX_ID_LABEL, SANDBOX_EXPIRES_AT_LABEL, SANDBOX_MANUAL_CLEANUP_LABEL} + if key + not in { + SANDBOX_ID_LABEL, + SANDBOX_EXPIRES_AT_LABEL, + SANDBOX_MANUAL_CLEANUP_LABEL, + ACCESS_RENEW_EXTEND_SECONDS_METADATA_KEY, + } } or None entrypoint = container.attrs.get("Config", {}).get("Cmd") or [] if isinstance(entrypoint, str): @@ -1639,6 +1650,20 @@ def resume_sandbox(self, sandbox_id: str) -> None: }, ) from exc + def get_access_renew_extend_seconds(self, sandbox_id: str) -> Optional[int]: + try: + container = self._get_container_by_sandbox_id(sandbox_id) + except HTTPException: + return None + labels = container.attrs.get("Config", {}).get("Labels") or {} + raw = labels.get(ACCESS_RENEW_EXTEND_SECONDS_METADATA_KEY) + if raw is None or not str(raw).strip(): + return None + try: + return int(str(raw).strip()) + except ValueError: + return None + def renew_expiration( self, sandbox_id: str, @@ -1860,6 +1885,8 @@ def _build_labels_and_env( else: labels[SANDBOX_EXPIRES_AT_LABEL] = expires_at.isoformat() + apply_access_renew_extend_seconds_to_mapping(labels, request.extensions) + env_dict = request.env or {} environment = [] for key, value in env_dict.items(): diff --git a/server/src/services/extension_service.py b/server/src/services/extension_service.py new file mode 100644 index 000000000..1e7d493ec --- /dev/null +++ b/server/src/services/extension_service.py @@ -0,0 +1,41 @@ +# Copyright 2026 Alibaba Group Holding Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Extension-related reads from persisted workload metadata (separate from core lifecycle API).""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + from src.services.sandbox_service import SandboxService + + +class ExtensionService(ABC): + """Workload extension metadata reads; kept off ``SandboxService`` lifecycle surface.""" + + @abstractmethod + def get_access_renew_extend_seconds(self, sandbox_id: str) -> Optional[int]: + """Seconds from workload metadata for access-driven renew, or None if missing/invalid.""" + pass + + +def require_extension_service(sandbox_service: "SandboxService") -> ExtensionService: + """Assert the runtime sandbox service also implements ExtensionService (Docker/K8s do).""" + if isinstance(sandbox_service, ExtensionService): + return sandbox_service + raise TypeError( + f"{type(sandbox_service).__name__} must implement ExtensionService for renew-intent" + ) diff --git a/server/src/services/k8s/batchsandbox_provider.py b/server/src/services/k8s/batchsandbox_provider.py index b9571b987..eab1837f5 100644 --- a/server/src/services/k8s/batchsandbox_provider.py +++ b/server/src/services/k8s/batchsandbox_provider.py @@ -178,6 +178,7 @@ def create_workload( expires_at=expires_at, entrypoint=entrypoint, env=env, + annotations=annotations, ) # Extract extra pod spec fragments from template (volumes/volumeMounts only). @@ -315,6 +316,7 @@ def _create_workload_from_pool( expires_at: Optional[datetime], entrypoint: List[str], env: Dict[str, str], + annotations: Optional[Dict[str, str]] = None, ) -> Dict[str, Any]: """ Create BatchSandbox workload from a pre-warmed resource pool. @@ -355,6 +357,8 @@ def _create_workload_from_pool( }, "spec": spec, } + if annotations: + runtime_manifest["metadata"]["annotations"] = annotations # Pool-based creation does not need template merging # Create BatchSandbox directly diff --git a/server/src/services/k8s/kubernetes_service.py b/server/src/services/k8s/kubernetes_service.py index 66b9b4130..1ee417c41 100644 --- a/server/src/services/k8s/kubernetes_service.py +++ b/server/src/services/k8s/kubernetes_service.py @@ -27,6 +27,8 @@ from fastapi import HTTPException, status +from src.extensions import apply_access_renew_extend_seconds_to_mapping +from src.extensions.keys import ACCESS_RENEW_EXTEND_SECONDS_METADATA_KEY from src.api.schema import ( CreateSandboxRequest, CreateSandboxResponse, @@ -50,6 +52,7 @@ from src.services.endpoint_auth import generate_egress_token from src.services.endpoint_auth import build_egress_auth_headers, merge_endpoint_headers from src.services.helpers import matches_filter +from src.services.extension_service import ExtensionService from src.services.sandbox_service import SandboxService from src.services.validators import ( calculate_expiration_or_raise, @@ -66,7 +69,7 @@ logger = logging.getLogger(__name__) -class KubernetesSandboxService(SandboxService): +class KubernetesSandboxService(SandboxService, ExtensionService): """ Kubernetes-based implementation of SandboxService. @@ -315,7 +318,9 @@ async def create_sandbox(self, request: CreateSandboxRequest) -> CreateSandboxRe egress_image = self.app_config.egress.image if self.app_config.egress else None egress_auth_token = generate_egress_token() annotations[SANDBOX_EGRESS_AUTH_TOKEN_METADATA_KEY] = egress_auth_token - + + apply_access_renew_extend_seconds_to_mapping(annotations, request.extensions) + # Validate volumes before creating workload ensure_volumes_valid( request.volumes, @@ -583,7 +588,28 @@ def resume_sandbox(self, sandbox_id: str) -> None: "message": "Resume operation is not supported in Kubernetes runtime", }, ) - + + def get_access_renew_extend_seconds(self, sandbox_id: str) -> Optional[int]: + workload = self.workload_provider.get_workload( + sandbox_id=sandbox_id, + namespace=self.namespace, + ) + if not workload: + return None + if isinstance(workload, dict): + annotations = workload.get("metadata", {}).get("annotations") or {} + else: + md = getattr(workload, "metadata", None) + raw_ann = getattr(md, "annotations", None) if md else None + annotations = raw_ann if isinstance(raw_ann, dict) else {} + raw = annotations.get(ACCESS_RENEW_EXTEND_SECONDS_METADATA_KEY) + if raw is None or not str(raw).strip(): + return None + try: + return int(str(raw).strip()) + except ValueError: + return None + def renew_expiration( self, sandbox_id: str, @@ -764,7 +790,7 @@ def _build_sandbox_from_workload(self, workload: Any) -> Sandbox: spec = workload.spec labels = metadata.labels or {} creation_timestamp = metadata.creation_timestamp - + sandbox_id = labels.get(SANDBOX_ID_LABEL, "") # Get expiration from provider diff --git a/server/tests/test_config.py b/server/tests/test_config.py index 33b69ad76..7d46638c5 100644 --- a/server/tests/test_config.py +++ b/server/tests/test_config.py @@ -15,10 +15,12 @@ import textwrap import pytest +from pydantic import ValidationError from src import config as config_module from src.config import ( AppConfig, + RenewIntentRedisConfig, EGRESS_MODE_DNS, EGRESS_MODE_DNS_NFT, EgressConfig, @@ -89,6 +91,89 @@ def test_server_config_defaults_include_max_sandbox_timeout(): assert server_cfg.max_sandbox_timeout_seconds is None +def test_renew_intent_defaults(): + cfg = AppConfig(runtime=RuntimeConfig(type="docker", execd_image="opensandbox/execd:latest")) + ar = cfg.renew_intent + assert ar.enabled is False + assert ar.min_interval_seconds == 60 + assert ar.redis.enabled is False + assert ar.redis.dsn is None + assert ar.redis.queue_key == "opensandbox:renew:intent" + assert ar.redis.consumer_concurrency == 8 + + +def test_renew_intent_redis_requires_dsn_when_enabled(): + with pytest.raises(ValidationError): + RenewIntentRedisConfig(enabled=True, dsn=None) + with pytest.raises(ValidationError): + RenewIntentRedisConfig(enabled=True, dsn=" ") + cfg = RenewIntentRedisConfig(enabled=True, dsn="redis://127.0.0.1:6379/0") + assert cfg.dsn == "redis://127.0.0.1:6379/0" + + +def test_load_config_renew_intent_dotted_redis_keys(tmp_path, monkeypatch): + _reset_config(monkeypatch) + toml = textwrap.dedent( + """ + [server] + host = "127.0.0.1" + port = 9000 + + [renew_intent] + enabled = true + min_interval_seconds = 30 + redis.enabled = true + redis.dsn = "redis://example:6379/1" + redis.queue_key = "custom:renew" + redis.consumer_concurrency = 4 + + [runtime] + type = "docker" + execd_image = "opensandbox/execd:test" + """ + ) + config_path = tmp_path / "config.toml" + config_path.write_text(toml) + + loaded = config_module.load_config(config_path) + ar = loaded.renew_intent + assert ar.enabled is True + assert ar.min_interval_seconds == 30 + assert ar.redis.enabled is True + assert ar.redis.dsn == "redis://example:6379/1" + assert ar.redis.queue_key == "custom:renew" + assert ar.redis.consumer_concurrency == 4 + + +def test_load_config_renew_intent_legacy_redis_subtable(tmp_path, monkeypatch): + """[renew_intent.redis] remains accepted (same parsed shape as dotted keys).""" + _reset_config(monkeypatch) + toml = textwrap.dedent( + """ + [server] + host = "127.0.0.1" + port = 9000 + + [renew_intent] + enabled = true + + [renew_intent.redis] + enabled = true + dsn = "redis://legacy:6379/0" + + [runtime] + type = "docker" + execd_image = "opensandbox/execd:test" + """ + ) + config_path = tmp_path / "config.toml" + config_path.write_text(toml) + + loaded = config_module.load_config(config_path) + assert loaded.renew_intent.redis.enabled is True + assert loaded.renew_intent.redis.dsn == "redis://legacy:6379/0" + + def test_kubernetes_runtime_fills_missing_block(): server_cfg = ServerConfig() runtime_cfg = RuntimeConfig(type="kubernetes", execd_image="opensandbox/execd:latest") diff --git a/server/tests/test_docker_service.py b/server/tests/test_docker_service.py index b6d3205c8..57b9deb1c 100644 --- a/server/tests/test_docker_service.py +++ b/server/tests/test_docker_service.py @@ -31,6 +31,7 @@ StorageConfig, IngressConfig, ) +from src.extensions import ACCESS_RENEW_EXTEND_SECONDS_METADATA_KEY from src.services.constants import EGRESS_MODE_ENV, OPENSANDBOX_EGRESS_TOKEN from src.services.constants import ( SANDBOX_EGRESS_AUTH_TOKEN_METADATA_KEY, @@ -736,6 +737,21 @@ def test_build_labels_marks_manual_cleanup_without_expiration(): assert "opensandbox.io/expires-at" not in labels +def test_build_labels_stores_extensions_json(): + service = DockerSandboxService(config=_app_config()) + request = CreateSandboxRequest( + image=ImageSpec(uri="python:3.11"), + resourceLimits=ResourceLimits(root={}), + env={}, + entrypoint=["python"], + extensions={"access.renew.extend.seconds": "3600"}, + ) + + labels, _ = service._build_labels_and_env("sandbox-ext", request, None) + + assert labels[ACCESS_RENEW_EXTEND_SECONDS_METADATA_KEY] == "3600" + + @pytest.mark.asyncio @patch("src.services.docker.docker") async def test_create_sandbox_with_manual_cleanup_completes_full_create_path(mock_docker): diff --git a/server/tests/test_extensions.py b/server/tests/test_extensions.py new file mode 100644 index 000000000..b1703c92b --- /dev/null +++ b/server/tests/test_extensions.py @@ -0,0 +1,97 @@ +# Copyright 2026 Alibaba Group Holding Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +from fastapi import HTTPException + +from src.extensions import ( + ACCESS_RENEW_EXTEND_SECONDS_KEY, + ACCESS_RENEW_EXTEND_SECONDS_MAX, + ACCESS_RENEW_EXTEND_SECONDS_METADATA_KEY, + ACCESS_RENEW_EXTEND_SECONDS_MIN, + apply_access_renew_extend_seconds_to_mapping, + validate_extensions, +) + + +class TestValidateCreateSandboxExtensionsAccessRenewExtendSeconds: + """access.renew.extend.seconds in [300, 86400] when present.""" + + def test_omitted_extensions_ok(self): + validate_extensions(None) + + def test_extensions_without_key_ok(self): + validate_extensions({"other": "x"}) + + def test_boundary_min_ok(self): + validate_extensions( + {ACCESS_RENEW_EXTEND_SECONDS_KEY: str(ACCESS_RENEW_EXTEND_SECONDS_MIN)} + ) + + def test_boundary_max_ok(self): + validate_extensions( + {ACCESS_RENEW_EXTEND_SECONDS_KEY: str(ACCESS_RENEW_EXTEND_SECONDS_MAX)} + ) + + def test_typical_value_ok(self): + validate_extensions({ACCESS_RENEW_EXTEND_SECONDS_KEY: "1800"}) + + def test_whitespace_trimmed_ok(self): + validate_extensions({ACCESS_RENEW_EXTEND_SECONDS_KEY: " 1800 "}) + + def test_below_min_rejected(self): + with pytest.raises(HTTPException) as exc: + validate_extensions( + {ACCESS_RENEW_EXTEND_SECONDS_KEY: str(ACCESS_RENEW_EXTEND_SECONDS_MIN - 1)} + ) + assert exc.value.status_code == 400 + + def test_above_max_rejected(self): + with pytest.raises(HTTPException) as exc: + validate_extensions( + {ACCESS_RENEW_EXTEND_SECONDS_KEY: str(ACCESS_RENEW_EXTEND_SECONDS_MAX + 1)} + ) + assert exc.value.status_code == 400 + + def test_non_integer_string_rejected(self): + with pytest.raises(HTTPException) as exc: + validate_extensions({ACCESS_RENEW_EXTEND_SECONDS_KEY: "abc"}) + assert exc.value.status_code == 400 + + def test_empty_string_rejected(self): + with pytest.raises(HTTPException) as exc: + validate_extensions({ACCESS_RENEW_EXTEND_SECONDS_KEY: ""}) + assert exc.value.status_code == 400 + + +class TestAccessRenewExtendSecondsStorage: + def test_apply_to_mapping_with_mixed_extension_keys(self): + m: dict[str, str] = {} + apply_access_renew_extend_seconds_to_mapping( + m, + {"other": "x", ACCESS_RENEW_EXTEND_SECONDS_KEY: "3600"}, + ) + assert m == {ACCESS_RENEW_EXTEND_SECONDS_METADATA_KEY: "3600"} + + def test_apply_to_mapping_sets_default_key(self): + m: dict[str, str] = {} + apply_access_renew_extend_seconds_to_mapping( + m, {ACCESS_RENEW_EXTEND_SECONDS_KEY: "1200"} + ) + assert m == {ACCESS_RENEW_EXTEND_SECONDS_METADATA_KEY: "1200"} + + def test_apply_to_mapping_noop_when_key_absent(self): + m: dict[str, str] = {"x": "1"} + apply_access_renew_extend_seconds_to_mapping(m, {"poolRef": "p"}) + assert m == {"x": "1"} diff --git a/server/tests/test_integrations_redis.py b/server/tests/test_integrations_redis.py new file mode 100644 index 000000000..b970ad1b5 --- /dev/null +++ b/server/tests/test_integrations_redis.py @@ -0,0 +1,89 @@ +# Copyright 2026 Alibaba Group Holding Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest.mock import AsyncMock, patch + +import pytest + +from src.config import AppConfig, RenewIntentConfig, RenewIntentRedisConfig, RuntimeConfig, ServerConfig +from src.integrations.renew_intent.redis_client import ( + close_renew_intent_redis_client, + connect_renew_intent_redis_from_config, +) + + +def _minimal_app_config(roa: RenewIntentConfig) -> AppConfig: + return AppConfig( + server=ServerConfig(), + runtime=RuntimeConfig(type="docker", execd_image="execd:latest"), + renew_intent=roa, + ) + + +@pytest.mark.asyncio +async def test_connect_returns_none_when_renew_intent_disabled(): + roa = RenewIntentConfig( + enabled=False, + redis=RenewIntentRedisConfig(enabled=True, dsn="redis://x"), + ) + cfg = _minimal_app_config(roa) + assert await connect_renew_intent_redis_from_config(cfg) is None + + +@pytest.mark.asyncio +async def test_connect_returns_none_when_redis_disabled(): + roa = RenewIntentConfig( + enabled=True, + redis=RenewIntentRedisConfig(enabled=False), + ) + cfg = _minimal_app_config(roa) + assert await connect_renew_intent_redis_from_config(cfg) is None + + +@pytest.mark.asyncio +@patch("src.integrations.renew_intent.redis_client.redis_async") +async def test_connect_pings_when_enabled(mock_redis_mod): + mock_client = AsyncMock() + mock_redis_mod.from_url.return_value = mock_client + roa = RenewIntentConfig( + enabled=True, + redis=RenewIntentRedisConfig( + enabled=True, + dsn="redis://127.0.0.1:6379/0", + queue_key="q", + consumer_concurrency=2, + ), + ) + cfg = _minimal_app_config(roa) + + client = await connect_renew_intent_redis_from_config(cfg) + + assert client is mock_client + mock_redis_mod.from_url.assert_called_once() + call_kw = mock_redis_mod.from_url.call_args + assert call_kw[0][0] == "redis://127.0.0.1:6379/0" + assert call_kw[1].get("decode_responses") is True + mock_client.ping.assert_awaited_once() + + +@pytest.mark.asyncio +async def test_close_none_is_safe(): + await close_renew_intent_redis_client(None) + + +@pytest.mark.asyncio +async def test_close_client(): + client = AsyncMock() + await close_renew_intent_redis_client(client) + client.aclose.assert_awaited_once() diff --git a/server/tests/test_proxy_renew_coordinator.py b/server/tests/test_proxy_renew_coordinator.py new file mode 100644 index 000000000..e7306e35d --- /dev/null +++ b/server/tests/test_proxy_renew_coordinator.py @@ -0,0 +1,139 @@ +# Copyright 2026 Alibaba Group Holding Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import asyncio +from unittest.mock import MagicMock + +import pytest + +from src.config import AppConfig, RenewIntentConfig, RuntimeConfig, ServerConfig +from src.integrations.renew_intent.consumer import RenewIntentConsumer, RenewWorkItem +from src.integrations.renew_intent.logutil import RENEW_SOURCE_SERVER_PROXY +from src.integrations.renew_intent.proxy_renew import ProxyRenewCoordinator + + +def _app_config(*, renew_enabled: bool = True, min_interval: int = 60) -> AppConfig: + return AppConfig( + server=ServerConfig(), + renew_intent=RenewIntentConfig( + enabled=renew_enabled, + min_interval_seconds=min_interval, + ), + runtime=RuntimeConfig(type="docker", execd_image="opensandbox/execd:latest"), + ) + + +def _consumer(cfg: AppConfig) -> RenewIntentConsumer: + return RenewIntentConsumer(cfg, MagicMock(), MagicMock(), redis_client=None) + + +@pytest.mark.asyncio +async def test_proxy_schedule_noop_when_disabled(monkeypatch): + cfg = _app_config(renew_enabled=False) + coord = ProxyRenewCoordinator(cfg, _consumer(cfg)) + created: list[asyncio.Task[None]] = [] + + def capture_task(coro, *, name=None): + t = asyncio.get_event_loop().create_task(coro, name=name) + created.append(t) + return t + + monkeypatch.setattr(asyncio, "create_task", capture_task) + coord.schedule("sbx-1") + await asyncio.sleep(0) + assert created == [] + + +@pytest.mark.asyncio +async def test_proxy_schedule_noop_when_consumer_none(): + cfg = _app_config(renew_enabled=True) + coord = ProxyRenewCoordinator(cfg, None) + coord.schedule("sbx-1") + + +@pytest.mark.asyncio +async def test_proxy_min_interval_skips_second_attempt(monkeypatch): + cfg = _app_config(renew_enabled=True, min_interval=60) + consumer = _consumer(cfg) + attempts = {"n": 0} + + def attempt(_sid: str, *, source: str) -> bool: + attempts["n"] += 1 + return True + + consumer._controller.attempt_renew_sync = attempt # type: ignore[method-assign] + + seq = iter([100.0, 100.0, 100.5]) + + def mono(): + return next(seq, 999.0) + + monkeypatch.setattr( + "src.integrations.renew_intent.consumer.time.monotonic", + mono, + ) + + work = RenewWorkItem( + source=RENEW_SOURCE_SERVER_PROXY, + sandbox_id="sbx-1", + observed_at=MagicMock(), + ) + await consumer._process_work(work) + await consumer._process_work(work) + assert attempts["n"] == 1 + + +@pytest.mark.asyncio +async def test_proxy_second_attempt_after_cooldown_window(monkeypatch): + cfg = _app_config(renew_enabled=True, min_interval=60) + consumer = _consumer(cfg) + attempts = {"n": 0} + + def attempt(_sid: str, *, source: str) -> bool: + attempts["n"] += 1 + return True + + consumer._controller.attempt_renew_sync = attempt # type: ignore[method-assign] + + seq = iter([100.0, 100.0, 200.0, 200.0]) + + def mono(): + return next(seq, 999.0) + + monkeypatch.setattr( + "src.integrations.renew_intent.consumer.time.monotonic", + mono, + ) + + work = RenewWorkItem( + source=RENEW_SOURCE_SERVER_PROXY, + sandbox_id="sbx-1", + observed_at=MagicMock(), + ) + await consumer._process_work(work) + await consumer._process_work(work) + assert attempts["n"] == 2 + + +def test_consumer_mem_lru_drops_oldest_unlocked_entries(): + cfg = _app_config(renew_enabled=True) + consumer = _consumer(cfg) + consumer._max_tracked = 2 + + consumer._ensure_mru_mem("a") + consumer._ensure_mru_mem("b") + assert set(consumer._mem_states) == {"a", "b"} + + consumer._ensure_mru_mem("c") + assert set(consumer._mem_states) == {"b", "c"} diff --git a/server/tests/test_renew_intent.py b/server/tests/test_renew_intent.py new file mode 100644 index 000000000..64f617b68 --- /dev/null +++ b/server/tests/test_renew_intent.py @@ -0,0 +1,50 @@ +# Copyright 2026 Alibaba Group Holding Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Renew-intent JSON parsing tests.""" + +from datetime import datetime, timedelta, timezone + +import pytest + +from src.integrations.renew_intent.intent import parse_renew_intent_json +from src.integrations.renew_intent.consumer import RenewIntentConsumer + + +def test_parse_matches_ingress_intent_shape(): + raw = ( + '{"sandbox_id":"abc","observed_at":"2026-03-22T12:00:00.123456789Z",' + '"port":8080,"request_uri":"/x"}' + ) + intent = parse_renew_intent_json(raw) + assert intent is not None + assert intent.sandbox_id == "abc" + assert intent.port == 8080 + assert intent.request_uri == "/x" + assert intent.observed_at.tzinfo is not None + + +def test_parse_rejects_bad_json(): + assert parse_renew_intent_json("not json") is None + + +@pytest.mark.parametrize( + "observed_at,expect_stale", + [ + (datetime.now(timezone.utc) - timedelta(seconds=400), True), + (datetime.now(timezone.utc) - timedelta(seconds=10), False), + ], +) +def test_stale_gate(observed_at, expect_stale): + assert RenewIntentConsumer._is_stale(observed_at) is expect_stale diff --git a/server/uv.lock b/server/uv.lock index 075539b86..8511263fb 100644 --- a/server/uv.lock +++ b/server/uv.lock @@ -34,6 +34,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7f/9c/36c5c37947ebfb8c7f22e0eb6e4d188ee2d53aa3880f3f2744fb894f0cb1/anyio-4.12.0-py3-none-any.whl", hash = "sha256:dad2376a628f98eeca4881fc56cd06affd18f659b17a747d3ff0307ced94b1bb", size = 113362, upload-time = "2025-11-28T23:36:57.897Z" }, ] +[[package]] +name = "async-timeout" +version = "5.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a5/ae/136395dfbfe00dfc94da3f3e136d0b13f394cba8f4841120e34226265780/async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3", size = 9274, upload-time = "2024-11-06T16:41:39.6Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fe/ba/e2081de779ca30d473f21f5b30e0e737c438205440784c7dfc81efc2b029/async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c", size = 6233, upload-time = "2024-11-06T16:41:37.9Z" }, +] + [[package]] name = "backports-asyncio-runner" version = "1.2.0" @@ -425,6 +434,7 @@ dependencies = [ { name = "pydantic" }, { name = "pydantic-settings" }, { name = "pyyaml" }, + { name = "redis" }, { name = "tomli", marker = "python_full_version < '3.11'" }, { name = "uvicorn" }, ] @@ -447,6 +457,7 @@ requires-dist = [ { name = "pydantic" }, { name = "pydantic-settings" }, { name = "pyyaml" }, + { name = "redis", specifier = ">=5" }, { name = "tomli", marker = "python_full_version < '3.11'" }, { name = "uvicorn" }, ] @@ -800,6 +811,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" }, ] +[[package]] +name = "redis" +version = "7.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "async-timeout", marker = "python_full_version < '3.11.3'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/da/82/4d1a5279f6c1251d3d2a603a798a1137c657de9b12cfc1fba4858232c4d2/redis-7.3.0.tar.gz", hash = "sha256:4d1b768aafcf41b01022410b3cc4f15a07d9b3d6fe0c66fc967da2c88e551034", size = 4928081, upload-time = "2026-03-06T18:18:16.287Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f0/28/84e57fce7819e81ec5aa1bd31c42b89607241f4fb1a3ea5b0d2dbeaea26c/redis-7.3.0-py3-none-any.whl", hash = "sha256:9d4fcb002a12a5e3c3fbe005d59c48a2cc231f87fbb2f6b70c2d89bb64fec364", size = 404379, upload-time = "2026-03-06T18:18:14.583Z" }, +] + [[package]] name = "requests" version = "2.32.5" diff --git a/specs/sandbox-lifecycle.yml b/specs/sandbox-lifecycle.yml index db09f1b39..bbf4f63ae 100644 --- a/specs/sandbox-lifecycle.yml +++ b/specs/sandbox-lifecycle.yml @@ -790,6 +790,9 @@ components: **Best Practices**: - **Namespacing**: Use prefixed keys (e.g., `storage.id`) to prevent collisions. - **Pass-through**: SDKs and middleware must treat this object as opaque and pass it through transparently. + + **Well-known keys**: + - `access.renew.extend.seconds` (optional): Decimal integer string from **300** to **86400** (5 minutes to 24 hours inclusive). Opts the sandbox into OSEP-0009 renew-on-access and sets per-renewal extension seconds. Omit to disable. Invalid values are rejected at creation with HTTP 400 (validated on the lifecycle create endpoint via `validate_extensions` in server `src/extensions/validation.py`). ResourceLimits: type: object description: | diff --git a/tests/python/tests/test_sandbox_e2e.py b/tests/python/tests/test_sandbox_e2e.py index c1e8e7259..a0ffa19f8 100644 --- a/tests/python/tests/test_sandbox_e2e.py +++ b/tests/python/tests/test_sandbox_e2e.py @@ -67,6 +67,9 @@ logger = logging.getLogger(__name__) +# Keep in sync with server ``src/extensions/keys.py`` +ACCESS_RENEW_EXTEND_SECONDS_KEY = "access.renew.extend.seconds" + def _now_ms() -> int: return int(time.time() * 1000) @@ -426,9 +429,10 @@ async def test_01aa_network_policy_get_and_patch(self): pass await sandbox.close() - @pytest.mark.timeout(180) + @pytest.mark.timeout(240) @pytest.mark.order(1) async def test_01ab_network_policy_get_and_patch_with_server_proxy(self): + """Also covers access renew on proxy traffic (needs ``[renew_intent] enabled = true``).""" if is_kubernetes_runtime(): pytest.skip("Network policy is not covered in the Kubernetes runtime suite") @@ -437,17 +441,25 @@ async def test_01ab_network_policy_get_and_patch_with_server_proxy(self): logger.info("=" * 80) cfg = create_connection_config_server_proxy() + assert cfg.use_server_proxy is True + sandbox_ttl = timedelta(minutes=4) sandbox = await Sandbox.create( image=SandboxImageSpec(get_sandbox_image()), connection_config=cfg, - timeout=timedelta(minutes=5), - ready_timeout=timedelta(seconds=30), + timeout=sandbox_ttl, + ready_timeout=timedelta(seconds=90), + extensions={ACCESS_RENEW_EXTEND_SECONDS_KEY: "300"}, network_policy=NetworkPolicy( defaultAction="deny", egress=[NetworkRule(action="allow", target="pypi.org")], ), ) try: + boot = await sandbox.get_info() + assert boot.expires_at is not None + # Baseline from create contract only: ready/ping may already move expires_at. + nominal_expires_at = boot.created_at + sandbox_ttl + await asyncio.sleep(5) egress_endpoint = await sandbox.get_endpoint(DEFAULT_EGRESS_PORT) @@ -481,6 +493,27 @@ async def test_01ab_network_policy_get_and_patch_with_server_proxy(self): rule.target == "pypi.org" and rule.action == "deny" for rule in patched_policy.egress ) + + assert await sandbox.is_healthy() + + deadline = time.monotonic() + 30.0 + min_delta = timedelta(seconds=30) + bumped = False + while time.monotonic() < deadline: + info = await sandbox.get_info() + if info.expires_at is not None and info.expires_at > nominal_expires_at + min_delta: + bumped = True + logger.info( + "Access renew: expires_at=%s above nominal (created_at+timeout)=%s", + info.expires_at, + nominal_expires_at, + ) + break + await asyncio.sleep(2.0) + assert bumped, ( + "expires_at did not exceed created_at + create timeout + slack after proxied traffic; " + "set [renew_intent] enabled = true on the lifecycle server." + ) finally: try: await sandbox.kill()