diff --git a/.github/workflows/real-k8s-e2e.yml b/.github/workflows/real-k8s-e2e.yml
index 7d0ed1e24..f512a263e 100644
--- a/.github/workflows/real-k8s-e2e.yml
+++ b/.github/workflows/real-k8s-e2e.yml
@@ -1,36 +1,26 @@
-name: Real K8s E2E Tests
+name: Real Kubernetes E2E Tests (nightly build)
permissions:
contents: read
on:
+ workflow_dispatch:
+ schedule:
+ - cron: "0 20 * * *"
pull_request:
branches: [ main ]
paths:
- - 'server/src/**'
- - 'server/Dockerfile'
- - 'server/pyproject.toml'
- - 'server/uv.lock'
- - 'server/example.config.toml'
- - 'server/example.config.k8s.toml'
- - 'server/example.batchsandbox-template.yaml'
- - 'components/execd/**'
- - 'components/egress/**'
- - 'sdks/sandbox/python/**'
- - 'sdks/code-interpreter/python/**'
- - 'tests/python/**'
+ - 'workflows/real-k8s-e2e.yml'
- 'scripts/python-k8s-e2e.sh'
- - 'kubernetes/**'
- push:
- branches: [ main ]
+ - 'kubernetes/charts/**'
concurrency:
- group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+ group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
python-k8s-e2e:
- name: Python E2E (kind + kubernetes runtime)
+ name: Python E2E
runs-on: ubuntu-latest
env:
KIND_CLUSTER: opensandbox-e2e
diff --git a/README.md b/README.md
index 50fb5db45..97ba47a10 100644
--- a/README.md
+++ b/README.md
@@ -34,6 +34,9 @@
+
+
+
diff --git a/docs/README_zh.md b/docs/README_zh.md
index a35426c6e..9cca9d20d 100644
--- a/docs/README_zh.md
+++ b/docs/README_zh.md
@@ -28,6 +28,9 @@
+
+
+
diff --git a/scripts/python-k8s-e2e.sh b/scripts/python-k8s-e2e.sh
index 4c2aaccb8..33d4a2ae9 100644
--- a/scripts/python-k8s-e2e.sh
+++ b/scripts/python-k8s-e2e.sh
@@ -28,10 +28,10 @@ CONTROLLER_IMG="${CONTROLLER_IMG:-opensandbox/controller:e2e-local}"
SERVER_IMG="${SERVER_IMG:-opensandbox/server:e2e-local}"
EXECD_IMG="${EXECD_IMG:-opensandbox/execd:e2e-local}"
EGRESS_IMG="${EGRESS_IMG:-opensandbox/egress:e2e-local}"
-CODE_INTERPRETER_IMG="${CODE_INTERPRETER_IMG:-opensandbox/code-interpreter:latest}"
SERVER_RELEASE="${SERVER_RELEASE:-opensandbox-server}"
SERVER_VALUES_FILE="${SERVER_VALUES_FILE:-/tmp/opensandbox-server-values.yaml}"
PORT_FORWARD_LOG="${PORT_FORWARD_LOG:-/tmp/opensandbox-server-port-forward.log}"
+SANDBOX_TEST_IMAGE="${SANDBOX_TEST_IMAGE:-ubuntu:latest}"
SERVER_IMG_REPOSITORY="${SERVER_IMG%:*}"
SERVER_IMG_TAG="${SERVER_IMG##*:}"
@@ -57,12 +57,12 @@ cd "${REPO_ROOT}"
docker build -f server/Dockerfile -t "${SERVER_IMG}" server
docker build -f components/execd/Dockerfile -t "${EXECD_IMG}" "${REPO_ROOT}"
docker build -f components/egress/Dockerfile -t "${EGRESS_IMG}" "${REPO_ROOT}"
-docker pull "${CODE_INTERPRETER_IMG}"
+docker pull "${SANDBOX_TEST_IMAGE}"
kind load docker-image --name "${KIND_CLUSTER}" "${SERVER_IMG}"
kind load docker-image --name "${KIND_CLUSTER}" "${EXECD_IMG}"
kind load docker-image --name "${KIND_CLUSTER}" "${EGRESS_IMG}"
-kind load docker-image --name "${KIND_CLUSTER}" "${CODE_INTERPRETER_IMG}"
+kind load docker-image --name "${KIND_CLUSTER}" "${SANDBOX_TEST_IMAGE}"
kubectl get namespace "${E2E_NAMESPACE}" >/dev/null 2>&1 || kubectl create namespace "${E2E_NAMESPACE}"
@@ -244,11 +244,11 @@ cd ../../..
export OPENSANDBOX_TEST_DOMAIN="localhost:8080"
export OPENSANDBOX_TEST_PROTOCOL="http"
export OPENSANDBOX_TEST_API_KEY=""
-export OPENSANDBOX_SANDBOX_DEFAULT_IMAGE="${CODE_INTERPRETER_IMG}"
+export OPENSANDBOX_SANDBOX_DEFAULT_IMAGE="${SANDBOX_TEST_IMAGE}"
export OPENSANDBOX_E2E_RUNTIME="kubernetes"
export OPENSANDBOX_TEST_USE_SERVER_PROXY="true"
export OPENSANDBOX_TEST_PVC_NAME="${PVC_NAME}"
cd tests/python
uv sync --all-extras --refresh
-make test
+make test-kubernetes-mini
diff --git a/server/src/services/k8s/kubernetes_service.py b/server/src/services/k8s/kubernetes_service.py
index f84afc350..66b9b4130 100644
--- a/server/src/services/k8s/kubernetes_service.py
+++ b/server/src/services/k8s/kubernetes_service.py
@@ -375,10 +375,9 @@ async def create_sandbox(self, request: CreateSandboxRequest) -> CreateSandboxRe
entrypoint=request.entrypoint,
)
- except HTTPException:
- # Clean up on failure
+ except HTTPException as e:
try:
- logger.warning(f"Creation failed, cleaning up sandbox: {sandbox_id}")
+ logger.error(f"Creation failed, cleaning up sandbox {sandbox_id}: {e}")
self.workload_provider.delete_workload(sandbox_id, self.namespace)
except Exception as cleanup_ex:
logger.error(f"Failed to cleanup sandbox {sandbox_id}", exc_info=cleanup_ex)
diff --git a/tests/python/Makefile b/tests/python/Makefile
index 268787974..81ab9eb72 100644
--- a/tests/python/Makefile
+++ b/tests/python/Makefile
@@ -1,4 +1,4 @@
-.PHONY: sync sync-dev test test-sandbox test-manager test-code lint fmt
+.PHONY: sync sync-dev test test-k8s test-sandbox test-manager test-code lint fmt
sync:
uv sync
@@ -9,6 +9,11 @@ sync-dev:
test:
uv run pytest
+test-kubernetes-mini:
+ uv run pytest \
+ --ignore=tests/test_code_interpreter_e2e.py \
+ --ignore=tests/test_code_interpreter_e2e_sync.py
+
test-sandbox:
uv run pytest tests/test_sandbox_e2e.py
diff --git a/tests/python/tests/test_sandbox_e2e.py b/tests/python/tests/test_sandbox_e2e.py
index aa9afa79e..c1e8e7259 100644
--- a/tests/python/tests/test_sandbox_e2e.py
+++ b/tests/python/tests/test_sandbox_e2e.py
@@ -207,13 +207,16 @@ async def test_01_sandbox_lifecycle_and_health(self):
assert info.created_at is not None
assert info.expires_at is not None
assert info.expires_at > info.created_at
- assert info.entrypoint == ["tail", "-f", "/dev/null"]
+ # Docker runtime reports the SDK default as-is; Kubernetes may prefix bootstrap.sh.
+ assert info.entrypoint[-3:] == ["tail", "-f", "/dev/null"], info.entrypoint
duration = info.expires_at - info.created_at
+ # Matches Sandbox.create(..., timeout=timedelta(minutes=5)); allow skew across runtimes.
min_duration = timedelta(minutes=1)
- max_duration = timedelta(minutes=3)
- assert min_duration <= duration <= max_duration, \
- f"Duration {duration} should be between 1 and 3 minutes"
+ max_duration = timedelta(minutes=6)
+ assert min_duration <= duration <= max_duration, (
+ f"Duration {duration} should be between {min_duration} and {max_duration}"
+ )
assert info.metadata is not None
assert info.metadata.get("tag") == "e2e-test"
@@ -324,6 +327,9 @@ async def test_01b_manual_cleanup(self):
@pytest.mark.timeout(120)
@pytest.mark.order(1)
async def test_01a_network_policy_create(self):
+ if is_kubernetes_runtime():
+ pytest.skip("Network policy is not covered in the Kubernetes runtime suite")
+
logger.info("=" * 80)
logger.info("TEST 1a: Creating sandbox with networkPolicy (async)")
logger.info("=" * 80)
@@ -355,6 +361,9 @@ async def test_01a_network_policy_create(self):
@pytest.mark.timeout(180)
@pytest.mark.order(1)
async def test_01aa_network_policy_get_and_patch(self):
+ if is_kubernetes_runtime():
+ pytest.skip("Network policy is not covered in the Kubernetes runtime suite")
+
logger.info("=" * 80)
logger.info("TEST 1aa: networkPolicy get/patch (async)")
logger.info("=" * 80)
@@ -420,6 +429,9 @@ async def test_01aa_network_policy_get_and_patch(self):
@pytest.mark.timeout(180)
@pytest.mark.order(1)
async def test_01ab_network_policy_get_and_patch_with_server_proxy(self):
+ if is_kubernetes_runtime():
+ pytest.skip("Network policy is not covered in the Kubernetes runtime suite")
+
logger.info("=" * 80)
logger.info("TEST 1ab: networkPolicy get/patch with server proxy (async)")
logger.info("=" * 80)
diff --git a/tests/python/tests/test_sandbox_e2e_sync.py b/tests/python/tests/test_sandbox_e2e_sync.py
index 16ae9063e..51f553b1b 100644
--- a/tests/python/tests/test_sandbox_e2e_sync.py
+++ b/tests/python/tests/test_sandbox_e2e_sync.py
@@ -205,13 +205,15 @@ def test_01_sandbox_lifecycle_and_health(self) -> None:
assert info.created_at is not None
assert info.expires_at is not None
assert info.expires_at > info.created_at
- assert info.entrypoint == ["tail", "-f", "/dev/null"]
+ # Docker runtime reports the SDK default as-is; Kubernetes may prefix bootstrap.sh.
+ assert info.entrypoint[-3:] == ["tail", "-f", "/dev/null"], info.entrypoint
duration = info.expires_at - info.created_at
+ # Matches SandboxSync.create(..., timeout=timedelta(minutes=5)); allow skew across runtimes.
min_duration = timedelta(minutes=1)
- max_duration = timedelta(minutes=3)
+ max_duration = timedelta(minutes=6)
assert min_duration <= duration <= max_duration, (
- f"Duration {duration} should be between 1 and 3 minutes"
+ f"Duration {duration} should be between {min_duration} and {max_duration}"
)
assert info.metadata is not None
@@ -287,6 +289,9 @@ def test_01b_manual_cleanup(self) -> None:
@pytest.mark.timeout(120)
@pytest.mark.order(1)
def test_01a_network_policy_create(self) -> None:
+ if is_kubernetes_runtime():
+ pytest.skip("Network policy is not covered in the Kubernetes runtime suite")
+
logger.info("=" * 80)
logger.info("TEST 1a: Creating sandbox with networkPolicy (sync)")
logger.info("=" * 80)
@@ -322,6 +327,9 @@ def test_01a_network_policy_create(self) -> None:
@pytest.mark.timeout(180)
@pytest.mark.order(1)
def test_01aa_network_policy_get_and_patch(self) -> None:
+ if is_kubernetes_runtime():
+ pytest.skip("Network policy is not covered in the Kubernetes runtime suite")
+
logger.info("=" * 80)
logger.info("TEST 1aa: networkPolicy get/patch (sync)")
logger.info("=" * 80)
diff --git a/tests/python/tests/test_sandbox_manager_e2e.py b/tests/python/tests/test_sandbox_manager_e2e.py
index 08a41301f..c8737b50e 100644
--- a/tests/python/tests/test_sandbox_manager_e2e.py
+++ b/tests/python/tests/test_sandbox_manager_e2e.py
@@ -32,6 +32,7 @@
import pytest
from opensandbox import Sandbox, SandboxManager
from opensandbox.config import ConnectionConfig
+from opensandbox.exceptions import SandboxApiException
from opensandbox.models.sandboxes import (
SandboxFilter,
SandboxImageSpec,
@@ -41,6 +42,10 @@
logger = logging.getLogger(__name__)
+# Kubernetes may use Pending / Allocated during lifecycle; narrow filters omit them and list E2E flakes.
+_STATES_OR_BROAD = ["Pending", "Allocated", "Running", "Paused"]
+_STATES_NOT_PAUSED = ["Pending", "Allocated", "Running"]
+
async def _create_sandbox(
*,
@@ -54,7 +59,7 @@ async def _create_sandbox(
return await Sandbox.create(
image=SandboxImageSpec(image),
connection_config=connection_config,
- resource={"cpu": "1", "memory": "2Gi"},
+ resource={"cpu": "100m", "memory": "64Mi"},
timeout=timeout,
ready_timeout=ready_timeout,
metadata=metadata,
@@ -91,6 +96,8 @@ class TestSandboxManagerE2E:
s1: Sandbox | None = None
s2: Sandbox | None = None
s3: Sandbox | None = None
+ #: True if s3 was paused successfully (Docker); False if pause is unsupported (e.g. Kubernetes HTTP 501).
+ s3_paused: bool = False
@pytest.fixture(scope="class", autouse=True)
async def _manager_setup(self, request):
@@ -134,9 +141,20 @@ async def _manager_setup(self, request):
assert await cls.s2.is_healthy() is True
assert await cls.s3.is_healthy() is True
- # Pause s3 to create a deterministic non-Running state for OR-state tests.
- await cls.manager.pause_sandbox(cls.s3.id)
- await _wait_for_state(manager=cls.manager, sandbox_id=cls.s3.id, expected_state="Paused")
+ cls.s3_paused = False
+ try:
+ await cls.manager.pause_sandbox(cls.s3.id)
+ await _wait_for_state(manager=cls.manager, sandbox_id=cls.s3.id, expected_state="Paused")
+ cls.s3_paused = True
+ except SandboxApiException as exc:
+ # Kubernetes runtime returns 501 for pause; keep all sandboxes Running and relax state-filter asserts.
+ if exc.status_code == 501:
+ logger.warning(
+ "pause_sandbox not supported (HTTP %s); manager state-filter E2E uses all-Running sandboxes",
+ exc.status_code,
+ )
+ else:
+ raise
try:
yield
@@ -175,7 +193,11 @@ async def test_01_states_filter_or_logic(self):
# states filter is OR: should return sandboxes in ANY of the requested states.
result = await manager.list_sandbox_infos(
- SandboxFilter(states=["Running", "Paused"], metadata={"tag": TestSandboxManagerE2E.tag}, page_size=50)
+ SandboxFilter(
+ states=_STATES_OR_BROAD,
+ metadata={"tag": TestSandboxManagerE2E.tag},
+ page_size=50,
+ )
)
ids = {info.id for info in result.sandbox_infos}
assert {TestSandboxManagerE2E.s1.id, TestSandboxManagerE2E.s2.id, TestSandboxManagerE2E.s3.id}.issubset(ids)
@@ -184,17 +206,29 @@ async def test_01_states_filter_or_logic(self):
SandboxFilter(states=["Paused"], metadata={"tag": TestSandboxManagerE2E.tag}, page_size=50)
)
paused_ids = {info.id for info in paused_only.sandbox_infos}
- assert TestSandboxManagerE2E.s3.id in paused_ids
- assert TestSandboxManagerE2E.s1.id not in paused_ids
- assert TestSandboxManagerE2E.s2.id not in paused_ids
-
running_only = await manager.list_sandbox_infos(
- SandboxFilter(states=["Running"], metadata={"tag": TestSandboxManagerE2E.tag}, page_size=50)
+ SandboxFilter(
+ states=_STATES_NOT_PAUSED,
+ metadata={"tag": TestSandboxManagerE2E.tag},
+ page_size=50,
+ )
)
running_ids = {info.id for info in running_only.sandbox_infos}
- assert TestSandboxManagerE2E.s1.id in running_ids
- assert TestSandboxManagerE2E.s2.id in running_ids
- assert TestSandboxManagerE2E.s3.id not in running_ids
+
+ if TestSandboxManagerE2E.s3_paused:
+ assert TestSandboxManagerE2E.s3.id in paused_ids
+ assert TestSandboxManagerE2E.s1.id not in paused_ids
+ assert TestSandboxManagerE2E.s2.id not in paused_ids
+ assert TestSandboxManagerE2E.s1.id in running_ids
+ assert TestSandboxManagerE2E.s2.id in running_ids
+ assert TestSandboxManagerE2E.s3.id not in running_ids
+ else:
+ assert TestSandboxManagerE2E.s3.id not in paused_ids
+ assert TestSandboxManagerE2E.s1.id not in paused_ids
+ assert TestSandboxManagerE2E.s2.id not in paused_ids
+ assert TestSandboxManagerE2E.s1.id in running_ids
+ assert TestSandboxManagerE2E.s2.id in running_ids
+ assert TestSandboxManagerE2E.s3.id in running_ids
@pytest.mark.timeout(600)
async def test_02_metadata_filter_and_logic(self):
diff --git a/tests/python/tests/test_sandbox_manager_e2e_sync.py b/tests/python/tests/test_sandbox_manager_e2e_sync.py
index efb206cc2..b5a5336c6 100644
--- a/tests/python/tests/test_sandbox_manager_e2e_sync.py
+++ b/tests/python/tests/test_sandbox_manager_e2e_sync.py
@@ -27,8 +27,11 @@
from datetime import timedelta
from uuid import uuid4
+import logging
+
import pytest
from opensandbox import SandboxManagerSync, SandboxSync
+from opensandbox.exceptions import SandboxApiException
from opensandbox.models.sandboxes import (
SandboxFilter,
SandboxImageSpec,
@@ -36,6 +39,12 @@
from tests.base_e2e_test import create_connection_config_sync, get_sandbox_image
+logger = logging.getLogger(__name__)
+
+# Kubernetes may use Pending / Allocated during lifecycle; narrow filters omit them and list E2E flakes.
+_STATES_OR_BROAD = ["Pending", "Allocated", "Running", "Paused"]
+_STATES_NOT_PAUSED = ["Pending", "Allocated", "Running"]
+
class TestSandboxManagerE2ESync:
@pytest.mark.timeout(600)
@@ -50,7 +59,7 @@ def test_01_states_filter_or_logic(self):
s1 = SandboxSync.create(
image=SandboxImageSpec(get_sandbox_image()),
connection_config=cfg,
- resource={"cpu": "1", "memory": "2Gi"},
+ resource={"cpu": "100m", "memory": "64Mi"},
timeout=timedelta(minutes=5),
ready_timeout=timedelta(seconds=60),
metadata={"tag": tag, "team": "t1", "env": "prod"},
@@ -60,7 +69,7 @@ def test_01_states_filter_or_logic(self):
s2 = SandboxSync.create(
image=SandboxImageSpec(get_sandbox_image()),
connection_config=cfg,
- resource={"cpu": "1", "memory": "2Gi"},
+ resource={"cpu": "100m", "memory": "64Mi"},
timeout=timedelta(minutes=5),
ready_timeout=timedelta(seconds=60),
metadata={"tag": tag, "team": "t1", "env": "dev"},
@@ -70,7 +79,7 @@ def test_01_states_filter_or_logic(self):
s3 = SandboxSync.create(
image=SandboxImageSpec(get_sandbox_image()),
connection_config=cfg,
- resource={"cpu": "1", "memory": "2Gi"},
+ resource={"cpu": "100m", "memory": "64Mi"},
timeout=timedelta(minutes=5),
ready_timeout=timedelta(seconds=60),
metadata={"tag": tag, "env": "prod"},
@@ -82,19 +91,29 @@ def test_01_states_filter_or_logic(self):
assert s2.is_healthy() is True
assert s3.is_healthy() is True
- # Pause s3 and wait for state transition
- manager.pause_sandbox(s3.id)
- deadline = time.time() + 180
- while time.time() < deadline:
- info = manager.get_sandbox_info(s3.id)
- if info.status.state == "Paused":
- break
- time.sleep(1)
- assert manager.get_sandbox_info(s3.id).status.state == "Paused"
-
- # OR states
+ s3_paused = False
+ try:
+ manager.pause_sandbox(s3.id)
+ deadline = time.time() + 180
+ while time.time() < deadline:
+ info = manager.get_sandbox_info(s3.id)
+ if info.status.state == "Paused":
+ break
+ time.sleep(1)
+ assert manager.get_sandbox_info(s3.id).status.state == "Paused"
+ s3_paused = True
+ except SandboxApiException as exc:
+ if exc.status_code == 501:
+ logger.warning(
+ "pause_sandbox not supported (HTTP %s); manager state-filter E2E uses all-Running sandboxes",
+ exc.status_code,
+ )
+ else:
+ raise
+
+ # OR states (broad: K8s lifecycle is not only Running/Paused)
both = manager.list_sandbox_infos(
- SandboxFilter(states=["Running", "Paused"], metadata={"tag": tag}, page_size=50)
+ SandboxFilter(states=_STATES_OR_BROAD, metadata={"tag": tag}, page_size=50)
)
ids = {info.id for info in both.sandbox_infos}
assert {s1.id, s2.id, s3.id}.issubset(ids)
@@ -103,17 +122,25 @@ def test_01_states_filter_or_logic(self):
SandboxFilter(states=["Paused"], metadata={"tag": tag}, page_size=50)
)
paused_ids = {info.id for info in paused_only.sandbox_infos}
- assert s3.id in paused_ids
- assert s1.id not in paused_ids
- assert s2.id not in paused_ids
-
running_only = manager.list_sandbox_infos(
- SandboxFilter(states=["Running"], metadata={"tag": tag}, page_size=50)
+ SandboxFilter(states=_STATES_NOT_PAUSED, metadata={"tag": tag}, page_size=50)
)
running_ids = {info.id for info in running_only.sandbox_infos}
- assert s1.id in running_ids
- assert s2.id in running_ids
- assert s3.id not in running_ids
+
+ if s3_paused:
+ assert s3.id in paused_ids
+ assert s1.id not in paused_ids
+ assert s2.id not in paused_ids
+ assert s1.id in running_ids
+ assert s2.id in running_ids
+ assert s3.id not in running_ids
+ else:
+ assert s3.id not in paused_ids
+ assert s1.id not in paused_ids
+ assert s2.id not in paused_ids
+ assert s1.id in running_ids
+ assert s2.id in running_ids
+ assert s3.id in running_ids
finally:
for s in [s1, s2, s3]:
if s is None:
@@ -140,7 +167,7 @@ def test_02_metadata_filter_and_logic(self):
s1 = SandboxSync.create(
image=SandboxImageSpec(get_sandbox_image()),
connection_config=cfg,
- resource={"cpu": "1", "memory": "2Gi"},
+ resource={"cpu": "100m", "memory": "64Mi"},
timeout=timedelta(minutes=5),
ready_timeout=timedelta(seconds=60),
metadata={"tag": tag, "team": "t1", "env": "prod"},
@@ -150,7 +177,7 @@ def test_02_metadata_filter_and_logic(self):
s2 = SandboxSync.create(
image=SandboxImageSpec(get_sandbox_image()),
connection_config=cfg,
- resource={"cpu": "1", "memory": "2Gi"},
+ resource={"cpu": "100m", "memory": "64Mi"},
timeout=timedelta(minutes=5),
ready_timeout=timedelta(seconds=60),
metadata={"tag": tag, "team": "t1", "env": "dev"},
@@ -160,7 +187,7 @@ def test_02_metadata_filter_and_logic(self):
s3 = SandboxSync.create(
image=SandboxImageSpec(get_sandbox_image()),
connection_config=cfg,
- resource={"cpu": "1", "memory": "2Gi"},
+ resource={"cpu": "100m", "memory": "64Mi"},
timeout=timedelta(minutes=5),
ready_timeout=timedelta(seconds=60),
metadata={"tag": tag, "env": "prod"},