diff --git a/.env.example b/.env.example index d1d4092..3bca089 100644 --- a/.env.example +++ b/.env.example @@ -6,6 +6,28 @@ API_KEY=your-secure-api-key-here-change-this-in-production # API_KEYS=key1,key2,key3 # Additional API keys (comma-separated) # MASTER_API_KEY=your-secure-master-key # Required for admin dashboard CLI +# +# AUTH_ENABLED=true # Set to false to disable x-api-key/Basic auth checks +# # on user endpoints. Use only when running behind a +# # trusted network boundary. /api/v1/admin/* still +# # requires MASTER_API_KEY regardless. +# +# Three ways clients can authenticate when AUTH_ENABLED=true: +# 1. x-api-key: (recommended for proxies) +# 2. Authorization: Basic base64(":") (LibreChat URL credentials) +# e.g. LIBRECHAT_CODE_BASEURL=https://@your-api/v1 +# 3. (none, when AUTH_ENABLED=false) + +# ── Sandbox network access (skill installs) ─────────────────── +# When ENABLE_SANDBOX_NETWORK=true, sandboxes can reach the internet but only +# through an inline allowlist proxy that permits PyPI, npm, Go modules, and +# crates.io. Required for skills that pip/npm/go install dependencies at +# runtime. Off by default (sandboxes are isolated). +# +# ENABLE_SANDBOX_NETWORK=false +# SANDBOX_EGRESS_PORT=18443 # local-only, sandbox -> proxy +# SANDBOX_EGRESS_ALLOWLIST= # comma-separated extra hosts +# SKILL_DEPS_PATH=/opt/skill-deps # backing volume mount # ── Redis ─────────────────────────────────────────────────────── REDIS_HOST=localhost @@ -13,12 +35,13 @@ REDIS_PORT=6379 # REDIS_PASSWORD= # REDIS_URL=redis://localhost:6379/0 # Alternative to individual settings -# ── MinIO / S3 ───────────────────────────────────────────────── -MINIO_ENDPOINT=localhost:9000 -MINIO_ACCESS_KEY=minioadmin -MINIO_SECRET_KEY=minioadmin -# MINIO_SECURE=false -# MINIO_BUCKET=code-interpreter-files +# ── S3 Storage (Garage) ──────────────────────────────────────── +S3_ENDPOINT=localhost:3900 +S3_ACCESS_KEY=GKminioadmin0000 +S3_SECRET_KEY=minioadminsecret +# S3_SECURE=false +# S3_BUCKET=code-interpreter-files +# S3_REGION=garage # ── Execution Limits ─────────────────────────────────────────── # MAX_EXECUTION_TIME=30 # Seconds (default: 30) @@ -35,7 +58,7 @@ MINIO_SECRET_KEY=minioadmin # PORT=8000 # External host port published by docker compose # ── SSL/HTTPS ────────────────────────────────────────────────── -# HTTPS works the same with docker-compose.yml and docker-compose.prod.yml: +# HTTPS configuration: # 1. SSL_CERTS_PATH is a host path mounted to /app/ssl inside the container # 2. SSL_CERT_FILE and SSL_KEY_FILE must be container paths under /app/ssl # diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8764169..da2301b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -3,10 +3,6 @@ name: CI on: pull_request: branches: [main, dev] - push: - branches: [main, dev] - tags: ["v*.*.*"] - merge_group: concurrency: group: ci-${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} @@ -17,33 +13,8 @@ permissions: env: PYTHON_VERSION: "3.11" - LOCAL_API_IMAGE_AMD64: code-interpreter:ci-amd64 - LOCAL_API_IMAGE_ARM64: code-interpreter:ci-arm64 jobs: - changes: - runs-on: ubuntu-latest - outputs: - runtime: ${{ steps.filter.outputs.runtime }} - container: ${{ steps.filter.outputs.container }} - steps: - - uses: actions/checkout@v4 - - - id: filter - uses: dorny/paths-filter@v3 - with: - filters: | - runtime: - - 'Dockerfile' - - 'docker/requirements/**' - container: - - 'Dockerfile' - - 'docker/**' - - 'src/**' - - 'dashboard/**' - - 'requirements.txt' - - 'docker-compose.yml' - static: runs-on: ubuntu-latest steps: @@ -91,48 +62,9 @@ jobs: pip install pytest pytest-asyncio pytest-cov pytest-mock - name: Run unit tests - run: | - mkdir -p test-results - pytest tests/unit/ --junitxml=test-results/unit.xml - - - name: Upload unit results - if: always() - uses: actions/upload-artifact@v4 - with: - name: unit-results - path: test-results/ - if-no-files-found: ignore - - integration-contract: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - uses: actions/setup-python@v6 - with: - python-version: ${{ env.PYTHON_VERSION }} - cache: pip + run: pytest tests/unit/ - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements.txt - pip install pytest pytest-asyncio pytest-cov pytest-mock - - - name: Run contract integration tests - run: | - mkdir -p test-results - pytest tests/integration/ -m contract_only --junitxml=test-results/integration-contract.xml - - - name: Upload contract integration results - if: always() - uses: actions/upload-artifact@v4 - with: - name: integration-contract-results - path: test-results/ - if-no-files-found: ignore - - integration-core: + integration: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -148,259 +80,5 @@ jobs: pip install -r requirements.txt pip install pytest pytest-asyncio pytest-cov pytest-mock - - name: Run core integration tests - run: | - mkdir -p test-results - pytest tests/integration/ -m "not contract_only" --junitxml=test-results/integration-core.xml - - - name: Upload core integration results - if: always() - uses: actions/upload-artifact@v4 - with: - name: integration-core-results - path: test-results/ - if-no-files-found: ignore - - build-app-amd64: - needs: [changes] - runs-on: ubuntu-24.04 - steps: - - uses: actions/checkout@v4 - - - uses: docker/setup-buildx-action@v3 - - - name: Build amd64 app candidate - run: | - docker buildx build \ - --load \ - --target app \ - --tag "${LOCAL_API_IMAGE_AMD64}" \ - --cache-from "type=gha,scope=app-amd64" \ - --cache-to "type=gha,scope=app-amd64,mode=max" \ - . - - - name: Inspect amd64 image - run: docker image inspect "${LOCAL_API_IMAGE_AMD64}" >/dev/null - - functional-smoke-amd64: - needs: [changes] - runs-on: ubuntu-24.04 - timeout-minutes: 45 - steps: - - uses: actions/checkout@v4 - - - uses: actions/setup-python@v6 - with: - python-version: ${{ env.PYTHON_VERSION }} - cache: pip - - - uses: docker/setup-buildx-action@v3 - - - name: Install test dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements.txt - pip install pytest pytest-asyncio pytest-cov pytest-mock - - - name: Build local amd64 test image - run: | - docker buildx build \ - --load \ - --target app \ - --tag "${LOCAL_API_IMAGE_AMD64}" \ - --cache-from "type=gha,scope=app-amd64" \ - --cache-to "type=gha,scope=app-amd64,mode=max" \ - . - - - name: Start live stack - env: - API_IMAGE: ${{ env.LOCAL_API_IMAGE_AMD64 }} - run: | - cp .env.example .env - docker compose up -d - - - name: Wait for API - run: | - if ! scripts/ci/wait_for_api.sh http://localhost:8000/health 24 5; then - docker compose logs --no-color api - exit 1 - fi - - - name: Run live smoke tests - env: - API_BASE: http://localhost:8000 - API_KEY: your-secure-api-key-here-change-this-in-production - run: | - mkdir -p test-results - pytest tests/functional/ \ - -m "live_api and not slow and not client_replay" \ - -v \ - --junitxml=test-results/functional-smoke-amd64.xml - - - name: Capture compose logs on failure - if: failure() - run: docker compose logs --no-color > compose-amd64.log - - - name: Upload functional smoke artifacts - if: always() - uses: actions/upload-artifact@v4 - with: - name: functional-smoke-amd64 - path: | - test-results/ - compose-amd64.log - if-no-files-found: ignore - - - name: Stop live stack - if: always() - run: docker compose down -v - - client-replay-amd64: - needs: [changes] - runs-on: ubuntu-24.04 - timeout-minutes: 45 - steps: - - uses: actions/checkout@v4 - - - uses: actions/setup-python@v6 - with: - python-version: ${{ env.PYTHON_VERSION }} - cache: pip - - - uses: docker/setup-buildx-action@v3 - - - name: Install test dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements.txt - pip install pytest pytest-asyncio pytest-cov pytest-mock - - - name: Build local amd64 replay image - run: | - docker buildx build \ - --load \ - --target app \ - --tag "${LOCAL_API_IMAGE_AMD64}" \ - --cache-from "type=gha,scope=app-amd64" \ - --cache-to "type=gha,scope=app-amd64,mode=max" \ - . - - - name: Start live stack - env: - API_IMAGE: ${{ env.LOCAL_API_IMAGE_AMD64 }} - run: | - cp .env.example .env - docker compose up -d - - - name: Wait for API - run: | - if ! scripts/ci/wait_for_api.sh http://localhost:8000/health 24 5; then - docker compose logs --no-color api - exit 1 - fi - - - name: Run client replay tests - env: - API_BASE: http://localhost:8000 - API_KEY: your-secure-api-key-here-change-this-in-production - run: | - mkdir -p test-results - pytest tests/functional/ \ - -m client_replay \ - -v \ - --junitxml=test-results/client-replay-amd64.xml - - - name: Capture compose logs on failure - if: failure() - run: docker compose logs --no-color > compose-client-replay.log - - - name: Upload client replay artifacts - if: always() - uses: actions/upload-artifact@v4 - with: - name: client-replay-amd64 - path: | - test-results/ - compose-client-replay.log - if-no-files-found: ignore - - - name: Stop live stack - if: always() - run: docker compose down -v - - arm64-smoke-conditional: - if: needs.changes.outputs.container == 'true' - needs: [changes] - runs-on: ubuntu-24.04-arm - timeout-minutes: 35 - steps: - - uses: actions/checkout@v4 - - - uses: actions/setup-python@v6 - with: - python-version: ${{ env.PYTHON_VERSION }} - cache: pip - - - uses: docker/setup-buildx-action@v3 - - - name: Install test dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements.txt - pip install pytest pytest-asyncio pytest-cov pytest-mock - - - name: Build local arm64 test image - run: | - docker buildx build \ - --load \ - --target app \ - --tag "${LOCAL_API_IMAGE_ARM64}" \ - --cache-from "type=gha,scope=app-arm64" \ - --cache-to "type=gha,scope=app-arm64,mode=max" \ - . - - - name: Start live stack - env: - API_IMAGE: ${{ env.LOCAL_API_IMAGE_ARM64 }} - run: | - cp .env.example .env - docker compose up -d - - - name: Wait for API - run: | - if ! scripts/ci/wait_for_api.sh http://localhost:8000/health 24 5; then - docker compose logs --no-color api - exit 1 - fi - - - name: Run arm64 smoke suite - env: - API_BASE: http://localhost:8000 - API_KEY: your-secure-api-key-here-change-this-in-production - run: | - mkdir -p test-results - pytest \ - tests/functional/test_health.py \ - tests/functional/test_exec_workflow.py::TestSessionWorkflow::test_execution_creates_session \ - tests/functional/test_files.py::TestFileUpload::test_upload_single_file \ - tests/functional/test_ptc.py::TestPTCInitialExecution::test_ptc_simple_code_completes \ - -v \ - --junitxml=test-results/arm64-smoke.xml - - - name: Capture compose logs on failure - if: failure() - run: docker compose logs --no-color > compose-arm64.log - - - name: Upload arm64 smoke artifacts - if: always() - uses: actions/upload-artifact@v4 - with: - name: arm64-smoke - path: | - test-results/ - compose-arm64.log - if-no-files-found: ignore - - - name: Stop live stack - if: always() - run: docker compose down -v + - name: Run integration tests + run: pytest tests/integration/ diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml deleted file mode 100644 index e598c08..0000000 --- a/.github/workflows/nightly.yml +++ /dev/null @@ -1,232 +0,0 @@ -name: Nightly Validation - -on: - schedule: - - cron: '23 4 * * *' - workflow_dispatch: - -concurrency: - group: nightly-${{ github.ref }} - cancel-in-progress: true - -permissions: - contents: read - -env: - PYTHON_VERSION: "3.11" - LOCAL_API_IMAGE_AMD64: code-interpreter:nightly-amd64 - LOCAL_API_IMAGE_ARM64: code-interpreter:nightly-arm64 - -jobs: - full-live-amd64: - runs-on: ubuntu-24.04 - timeout-minutes: 75 - steps: - - uses: actions/checkout@v4 - - - uses: actions/setup-python@v6 - with: - python-version: ${{ env.PYTHON_VERSION }} - cache: pip - - - uses: docker/setup-buildx-action@v3 - - - name: Install test dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements.txt - pip install pytest pytest-asyncio pytest-cov pytest-mock - - - name: Build nightly amd64 image - run: | - docker buildx build \ - --load \ - --target app \ - --tag "${LOCAL_API_IMAGE_AMD64}" \ - --cache-from "type=gha,scope=nightly-app-amd64" \ - --cache-to "type=gha,scope=nightly-app-amd64,mode=max" \ - . - - - name: Start nightly stack - env: - API_IMAGE: ${{ env.LOCAL_API_IMAGE_AMD64 }} - run: | - cp .env.example .env - docker compose up -d - - - name: Wait for API - run: | - if ! scripts/ci/wait_for_api.sh http://localhost:8000/health 24 5; then - docker compose logs --no-color api - exit 1 - fi - - - name: Run full live functional suite - env: - API_BASE: http://localhost:8000 - API_KEY: your-secure-api-key-here-change-this-in-production - run: | - mkdir -p test-results - pytest tests/functional/ -m live_api -v --junitxml=test-results/nightly-full-live-amd64.xml - - - name: Capture compose logs on failure - if: failure() - run: docker compose logs --no-color > nightly-full-amd64.log - - - name: Upload nightly full artifacts - if: always() - uses: actions/upload-artifact@v4 - with: - name: nightly-full-live-amd64 - path: | - test-results/ - nightly-full-amd64.log - if-no-files-found: ignore - - - name: Stop nightly stack - if: always() - run: docker compose down -v - - slow-live-amd64: - runs-on: ubuntu-24.04 - timeout-minutes: 75 - steps: - - uses: actions/checkout@v4 - - - uses: actions/setup-python@v6 - with: - python-version: ${{ env.PYTHON_VERSION }} - cache: pip - - - uses: docker/setup-buildx-action@v3 - - - name: Install test dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements.txt - pip install pytest pytest-asyncio pytest-cov pytest-mock - - - name: Build nightly amd64 image - run: | - docker buildx build \ - --load \ - --target app \ - --tag "${LOCAL_API_IMAGE_AMD64}" \ - --cache-from "type=gha,scope=nightly-app-amd64" \ - --cache-to "type=gha,scope=nightly-app-amd64,mode=max" \ - . - - - name: Start nightly stack - env: - API_IMAGE: ${{ env.LOCAL_API_IMAGE_AMD64 }} - run: | - cp .env.example .env - docker compose up -d - - - name: Wait for API - run: | - if ! scripts/ci/wait_for_api.sh http://localhost:8000/health 24 5; then - docker compose logs --no-color api - exit 1 - fi - - - name: Run slow live suite - env: - API_BASE: http://localhost:8000 - API_KEY: your-secure-api-key-here-change-this-in-production - run: | - mkdir -p test-results - pytest tests/functional/ -m slow -v --junitxml=test-results/nightly-slow-live-amd64.xml - - - name: Capture compose logs on failure - if: failure() - run: docker compose logs --no-color > nightly-slow-amd64.log - - - name: Upload nightly slow artifacts - if: always() - uses: actions/upload-artifact@v4 - with: - name: nightly-slow-live-amd64 - path: | - test-results/ - nightly-slow-amd64.log - if-no-files-found: ignore - - - name: Stop nightly stack - if: always() - run: docker compose down -v - - arm64-smoke: - runs-on: ubuntu-24.04-arm - timeout-minutes: 40 - steps: - - uses: actions/checkout@v4 - - - uses: actions/setup-python@v6 - with: - python-version: ${{ env.PYTHON_VERSION }} - cache: pip - - - uses: docker/setup-buildx-action@v3 - - - name: Install test dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements.txt - pip install pytest pytest-asyncio pytest-cov pytest-mock - - - name: Build nightly arm64 image - run: | - docker buildx build \ - --load \ - --target app \ - --tag "${LOCAL_API_IMAGE_ARM64}" \ - --cache-from "type=gha,scope=nightly-app-arm64" \ - --cache-to "type=gha,scope=nightly-app-arm64,mode=max" \ - . - - - name: Start nightly arm64 stack - env: - API_IMAGE: ${{ env.LOCAL_API_IMAGE_ARM64 }} - run: | - cp .env.example .env - docker compose up -d - - - name: Wait for API - run: | - if ! scripts/ci/wait_for_api.sh http://localhost:8000/health 24 5; then - docker compose logs --no-color api - exit 1 - fi - - - name: Run nightly arm64 smoke suite - env: - API_BASE: http://localhost:8000 - API_KEY: your-secure-api-key-here-change-this-in-production - run: | - mkdir -p test-results - pytest \ - tests/functional/test_health.py \ - tests/functional/test_exec_workflow.py::TestSessionWorkflow::test_execution_creates_session \ - tests/functional/test_files.py::TestFileUpload::test_upload_single_file \ - tests/functional/test_ptc.py::TestPTCInitialExecution::test_ptc_simple_code_completes \ - -v \ - --junitxml=test-results/nightly-arm64-smoke.xml - - - name: Capture compose logs on failure - if: failure() - run: docker compose logs --no-color > nightly-arm64.log - - - name: Upload nightly arm64 artifacts - if: always() - uses: actions/upload-artifact@v4 - with: - name: nightly-arm64-smoke - path: | - test-results/ - nightly-arm64.log - if-no-files-found: ignore - - - name: Stop nightly arm64 stack - if: always() - run: docker compose down -v diff --git a/AGENTS.md b/AGENTS.md deleted file mode 100644 index c5778ec..0000000 --- a/AGENTS.md +++ /dev/null @@ -1,43 +0,0 @@ -# Repository Guidelines - -## Project Structure & Module Organization -Core application code lives in `src/`. Use `src/api/` for FastAPI routes, `src/services/` for orchestration and business logic, `src/services/sandbox/` and `src/services/container/` for execution backends, `src/models/` for request/response models, and `src/config/` for environment-driven settings. Supporting docs are in `docs/`, dashboard assets in `dashboard/`, container/runtime files in `docker/`, and helper scripts in `scripts/`. - -Tests are split by scope: `tests/unit/` for isolated service logic, `tests/integration/` for API and dependency-backed flows, `tests/functional/` for live endpoint testing, and `tests/snapshots/` for stored response fixtures. - -## Build, Test, and Development Commands -Set up a local environment with: - -```bash -python -m venv .venv -source .venv/bin/activate -pip install -r requirements.txt -cp .env.example .env -``` - -Run locally with `uvicorn src.main:app --reload`. Start required services with `docker compose up -d`, and build the sandbox image with `docker build -t code-interpreter:nsjail .`. - -Key verification commands: - -```bash -pytest tests/unit/ -pytest tests/integration/ -pytest tests/functional/ -v -pytest --cov=src tests/ -black src/ --check -flake8 src/ -mypy src/ -bandit -r src/ -s B104,B108 --severity-level high -``` - -## Coding Style & Naming Conventions -Target Python 3.11+ with 4-space indentation, explicit type hints, and small async-friendly service boundaries. Follow Black formatting and keep code Flake8- and MyPy-clean. Use `snake_case` for modules, functions, and variables; `PascalCase` for classes and Pydantic models; and `UPPER_SNAKE_CASE` for constants and env names. - -## Testing Guidelines -Pytest, `pytest-asyncio`, and `pytest-cov` are the standard tools. Name files `test_*.py` and mirror the component under test where practical, for example `tests/unit/test_session_service.py`. Add unit coverage for new logic first, then integration coverage for endpoint or storage changes. Functional tests use `API_BASE`, `API_KEY`, and `API_TIMEOUT`; keep them stable against a real running API. - -## Commit & Pull Request Guidelines -Recent history uses short imperative subjects with prefixes such as `fix:`, `docs:`, `chore(...)`, and `feat:`. Keep the first line under 72 characters and reference issues in the body when relevant. Pull requests should explain behavior changes, note config or API contract impacts, and include the commands you ran. Add screenshots when changing the admin dashboard or other visible UI. - -## Security & Configuration Tips -Never commit populated `.env` files, API keys, or storage credentials. Use `.env.example` as the template, and review `docs/CONFIGURATION.md` and `docs/SECURITY.md` before changing auth, sandboxing, Redis, or MinIO behavior. diff --git a/Dockerfile b/Dockerfile index 2b82c15..f9e68ae 100644 --- a/Dockerfile +++ b/Dockerfile @@ -26,6 +26,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ flex bison \ curl wget ca-certificates gnupg software-properties-common \ libssl-dev libffi-dev libxml2-dev libxslt-dev zlib1g-dev \ + jq iptables \ && rm -rf /var/lib/apt/lists/* RUN git clone https://github.com/google/nsjail.git /tmp/nsjail && \ @@ -51,6 +52,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ portaudio19-dev flac ffmpeg \ libpulse-dev libsdl2-dev libsdl2-mixer-dev libsdl2-image-dev libsdl2-ttf-dev \ antiword unrtf \ + libreoffice-impress libreoffice-writer libreoffice-calc libreoffice-common \ && rm -rf /var/lib/apt/lists/* COPY docker/requirements/python-core.txt /tmp/python-core.txt @@ -231,7 +233,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ # ============================================ RUN mkdir -p /var/lib/code-interpreter/sandboxes && \ mkdir -p /mnt/data && \ - mkdir -p /tmp/empty_proc + mkdir -p /var/lib/code-interpreter/empty_proc RUN groupadd -g 1001 codeuser && \ useradd -u 1001 -g codeuser -m codeuser && \ @@ -276,8 +278,9 @@ WORKDIR /app # Keep the application layer thin so app-only changes do not invalidate runtime stages. COPY docker/repl_server.py /opt/repl_server.py COPY docker/ptc_server.py /opt/ptc_server.py +COPY docker/ptc_bash_server.py /opt/ptc_bash_server.py COPY docker/entrypoint.sh /opt/entrypoint.sh -RUN chmod +x /opt/repl_server.py /opt/ptc_server.py /opt/entrypoint.sh +RUN chmod +x /opt/repl_server.py /opt/ptc_server.py /opt/ptc_bash_server.py /opt/entrypoint.sh COPY requirements.txt /tmp/requirements.txt RUN --mount=type=cache,target=/root/.cache/pip \ diff --git a/README.md b/README.md index 89957a7..a42c15c 100644 --- a/README.md +++ b/README.md @@ -24,18 +24,17 @@ Most users should run the published Docker image from GHCR. You do not need to b # The default settings work out-of-the-box for local usage ``` -3. **Pull and start the published stack** +3. **Pull and start the stack** ```bash - docker compose -f docker-compose.prod.yml pull - docker compose -f docker-compose.prod.yml up -d + docker compose pull + docker compose up -d ``` By default this uses `ghcr.io/usnavy13/librecodeinterpreter:main`. To pin a different published tag: ```bash - API_IMAGE=ghcr.io/usnavy13/librecodeinterpreter: \ - docker compose -f docker-compose.prod.yml up -d + API_IMAGE=ghcr.io/usnavy13/librecodeinterpreter: docker compose up -d ``` 4. **Verify the API** @@ -47,25 +46,25 @@ Most users should run the published Docker image from GHCR. You do not need to b The API will be available at `http://localhost:8000`. Visit `http://localhost:8000/docs` for the interactive API documentation. -To enable HTTPS with either compose file, set `PORT`, `ENABLE_HTTPS`, `SSL_CERTS_PATH`, `SSL_CERT_FILE`, and `SSL_KEY_FILE` in `.env`. `SSL_CERTS_PATH` is the host path mounted into the container at `/app/ssl`, while `SSL_CERT_FILE` and `SSL_KEY_FILE` must point to the certificate files inside the container. See [docs/CONFIGURATION.md](docs/CONFIGURATION.md#sslhttps-configuration). +To enable HTTPS, set `PORT`, `ENABLE_HTTPS`, `SSL_CERTS_PATH`, `SSL_CERT_FILE`, and `SSL_KEY_FILE` in `.env`. `SSL_CERTS_PATH` is the host path mounted into the container at `/app/ssl`, while `SSL_CERT_FILE` and `SSL_KEY_FILE` must point to the certificate files inside the container. See [docs/CONFIGURATION.md](docs/CONFIGURATION.md#sslhttps-configuration). ### Common Consumer Commands ```bash # View API logs -docker compose -f docker-compose.prod.yml logs -f api +docker compose logs -f api # Stop the stack -docker compose -f docker-compose.prod.yml down +docker compose down # Update to the latest published image -docker compose -f docker-compose.prod.yml pull -docker compose -f docker-compose.prod.yml up -d +docker compose pull +docker compose up -d ``` ### Published Image Channels -The project now publishes two app-image channels: +The project publishes two app-image channels: - `ghcr.io/usnavy13/librecodeinterpreter` - stable branch tags: `main`, `latest` @@ -74,15 +73,15 @@ The project now publishes two app-image channels: - development branch tags: `dev`, `latest` - immutable build tags: `sha-` -`docker-compose.prod.yml` stays pinned to the stable package by default: +`docker-compose.yml` is pinned to the stable package by default: ```yaml -image: ghcr.io/usnavy13/librecodeinterpreter:main +image: ${API_IMAGE:-ghcr.io/usnavy13/librecodeinterpreter:main} ``` ### Use A Local Override File -If you want to pull the current `dev` image or build from your working tree without changing tracked compose files: +If you want to pull the current `dev` image or build from your working tree without changing tracked compose files, use a local override. Compose auto-merges `docker-compose.override.yml` on top of `docker-compose.yml`, so no extra `-f` flags are needed. 1. Copy the example override: @@ -90,29 +89,26 @@ If you want to pull the current `dev` image or build from your working tree with cp docker-compose.override.example.yml docker-compose.override.yml ``` -2. Use it with the production compose stack: +2. Bring the stack up: ```bash - docker compose -f docker-compose.prod.yml -f docker-compose.override.yml pull - docker compose -f docker-compose.prod.yml -f docker-compose.override.yml up -d + docker compose pull + docker compose up -d ``` -The checked-in example defaults to `ghcr.io/usnavy13/librecodeinterpreter-dev:latest`. -If you want to build from your local checkout instead, edit `docker-compose.override.yml` -and switch to the commented `build:` block in the example. In that case, skip the -`pull` step and run: +The checked-in example defaults to `ghcr.io/usnavy13/librecodeinterpreter-dev:latest`. To build from your local checkout instead, edit `docker-compose.override.yml` and switch to the commented `build:` block. In that case, skip `pull` and run: ```bash -docker compose -f docker-compose.prod.yml -f docker-compose.override.yml up --build -d +docker compose up --build -d ``` ## Build From Source -If you are developing locally or need to customize the image, use the source-backed workflow instead: +If you are developing locally or need to customize the image: ```bash docker build --target app -t code-interpreter:nsjail . -docker compose up -d +API_IMAGE=code-interpreter:nsjail docker compose up -d ``` The Dockerfile keeps `runtime-core` and `runtime-r` as internal build stages, but only the unified `app` image is published for deployment. @@ -125,7 +121,7 @@ A built-in admin dashboard is available at `http://localhost:8000/admin-dashboar - **Overview**: Real-time execution metrics, success rates, and performance graphs - **API Keys**: Create, view, and manage API keys with rate limiting -- **System Health**: Monitor Redis, MinIO, and sandbox pool status +- **System Health**: Monitor Redis, S3 storage, and sandbox pool status The dashboard requires the master API key for authentication. @@ -139,9 +135,9 @@ The dashboard requires the master API key for authentication. - **Programmatic Tool Calling (PTC)**: Enables AI agents to execute code that invokes external tools mid-execution via `POST /exec/programmatic`, with multi-round continuation support - **File Management**: Upload, download, and manage files within execution sessions - **Session Management**: Redis-based session handling with automatic cleanup -- **S3-Compatible Storage**: MinIO integration for persistent file storage +- **S3-Compatible Storage**: Garage (S3-compatible) integration for persistent file storage - **Authentication**: API key-based authentication for secure access -- **HTTPS/SSL Support**: Optional in-container SSL/TLS termination for both compose workflows +- **HTTPS/SSL Support**: Optional in-container SSL/TLS termination - **Health Monitoring**: Comprehensive health check endpoints for all dependencies - **Metrics Collection**: Execution and API metrics for monitoring and debugging - **Unicode Support**: Full Unicode filename support in file downloads @@ -187,7 +183,7 @@ The service is highly configurable via environment variables. | Category | Description | | ------------- | ------------------------------------------- | | **API** | Host, port, and security settings. | -| **Storage** | Redis and MinIO/S3 connection details. | +| **Storage** | Redis and S3 (Garage / MinIO / AWS) connection details. | | **Resources** | Per-execution memory, CPU, and time limits. | | **Pools** | Sandbox pool sizing and warmup settings. | @@ -207,11 +203,10 @@ For comprehensive testing details, see [TESTING.md](docs/TESTING.md). ## CI/CD -GitHub Actions is split into three workflows: +GitHub Actions is split into two workflows: -- `ci.yml`: PR validation and required checks -- `release.yml`: publish multi-arch app images for `main`, `dev`, and release tags -- `nightly.yml`: build the app image locally and run slow/full live validation +- `ci.yml`: PR validation — static analysis (flake8, black, mypy, bandit), unit tests, and integration tests +- `release.yml`: publishes multi-arch app images for `main`, `dev`, and release tags Published images use native `amd64` and `arm64` builds and are exposed as separate stable and dev GHCR packages. diff --git a/Reference/Behavior.md b/Reference/Behavior.md deleted file mode 100644 index dbbfaee..0000000 --- a/Reference/Behavior.md +++ /dev/null @@ -1,318 +0,0 @@ -## **COMPREHENSIVE BASELINE REFERENCE FOR CODE EXECUTION TOOLS** - -Based on extensive testing, here's the complete baseline reference for the code execution tools behavior: - ---- - -## **1. ENVIRONMENT SPECIFICATIONS** - -### **System Environment** - -- **OS**: Linux 6.8.0-47-generic x86_64 -- **Python**: 3.12.0 (CPython) -- **Working Directory**: `/mnt/data` (full read/write/execute permissions) -- **User Context**: UID/GID 60342 (containerized/sandboxed environment) - -### **Available Packages** - -- ✅ **Available**: All Python standard library, numpy, pandas, matplotlib (with Agg backend), csv, json, sqlite3, pathlib, datetime, collections, etc. -- ❌ **Missing**: requests, psutil, network-related packages - -### **Resource Limits** - -- **Memory**: Can allocate 50MB+ without issues (no hard limit discovered) -- **CPU**: No apparent execution timeout for reasonable computation -- **Recursion**: Standard Python limit (1000 stack frames) -- **File Creation**: Can create hundreds of files without issues - ---- - -## **2. SESSION PERSISTENCE BEHAVIOR** - -### **Variables & Code State** ❌ **NO PERSISTENCE** - -- Variables, functions, classes, and imports **DO NOT** persist between executions -- Each execution starts with a clean Python interpreter -- Custom imports and module state is reset every time - -### **File Persistence** ✅ **COMPLEX PERSISTENCE** - -#### **User Uploaded Files** - -- ✅ **Persist with original names** (e.g., `summary_sheet.csv`) -- ✅ **Remain directly accessible** across all sessions -- ✅ **Can be modified** despite "read-only" warnings - -#### **Generated Files** - -- ✅ **Persist but with encoding behavior**: - - **Immediate access**: Files can be accessed by original name in same execution - - **Cross-session access**: Files become base64-encoded names - - **Encoding pattern**: `original_name.ext` → `base64(filename)` (e.g., `ZXhlY3V0aW9uXzJfZmlsZS50eHQ=`) -- ✅ **Content preserved** exactly -- ✅ **Can be modified** despite system claiming "read-only" - -#### **Special File Behaviors** - -- Files with special characters (spaces, symbols) maintain original names -- Some files get prefixed with session identifiers (e.g., `TzlwSVktR1lwX__session_test_file.txt`) -- User uploaded files maintain stability across all sessions - ---- - -## **3. SECURITY & LIMITATIONS** - -### **Network Access** ❌ **COMPLETELY BLOCKED** - -- No DNS resolution -- No HTTP/HTTPS access -- No socket connections to external hosts - -### **File System Access** - -- ✅ **Full access** to `/mnt/data` (working directory) -- ✅ **Read access** to `/etc/passwd`, `/usr`, `/bin` -- ❌ **No access** to `/root`, `/etc/shadow` -- ✅ **Basic system info** accessible (`/proc` limited) - -### **System Commands** - -- ✅ **Available**: `ls`, `pwd`, `cat`, `grep`, `find`, `whoami`, `id`, `ps`, `uname` -- ✅ **subprocess** module works for system commands -- ❌ **No network tools**: `curl`, `wget` unavailable - -### **Process Isolation** - -- Running in containerized environment -- Limited access to system processes -- Cannot access parent process information beyond basic PID - ---- - -## **4. FILE MANAGEMENT DETAILED BEHAVIOR** - -### **Session ID Usage** - -- **Purpose**: Loads previously created files into current working directory -- **Loading behavior**: Files appear with their encoded names -- **Access pattern**: Can read both original and encoded filenames - -### **File Creation Workflow** - -1. **Same execution**: File accessible by original name -2. **Next session with session_id**: File appears as base64-encoded name -3. **User uploaded files**: Always accessible by original name - -### **File Modification** - -- Despite "read-only" warnings, files **CAN** be modified -- Modifications persist across sessions -- New files can be created in any session - -### **File Naming Conventions** - -- **User uploads**: Keep original names (`summary_sheet.csv`) -- **Generated files**: Become base64 encoded (`ZXhlY3V0aW9uXzJfZmlsZS50eHQ=` = `execution_2_file.txt`) -- **Special names**: Files with spaces/symbols often keep original names - ---- - -## **5. CRITICAL IMPLEMENTATION NOTES** - -### **For Drop-in Replacement Development** - -1. **Session Persistence**: Must implement file persistence but NOT variable persistence -2. **File Encoding**: Need base64 encoding system for generated files -3. **User Upload Handling**: User uploaded files need special handling to maintain original names -4. **Read-only Claims**: System claims files are read-only but actually allows modification -5. **Network Isolation**: Must block all network access completely -6. **Environment Reset**: Each execution must start with clean Python interpreter -7. **Working Directory**: Must use `/mnt/data` as working directory -8. **Session ID Mechanism**: Must implement session file loading with proper encoding/decoding - -### **Key Behavioral Quirks** - -- Files are claimed to be "read-only" but are actually writable -- Variable state is completely ephemeral, but file state is persistent -- User uploaded files have privileged status (no encoding) -- Base64 encoding is applied inconsistently (some files escape it) -- Session IDs are required to access previously generated files - -This baseline reference provides a comprehensive understanding of the code execution environment behavior that your drop-in replacement will need to replicate exactly. - -# **ADDENDUM: SESSION HANDLING LOGIC REFERENCE** - -## **For Server-Side Code Execution Service Implementation** - ---- - -## **OVERVIEW** - -This addendum provides technical specifications for implementing session handling logic in a drop-in replacement code execution service, based on analysis of the LibreChat client-side implementation expectations. - ---- - -## **1. SESSION LIFECYCLE MANAGEMENT** - -### **Session Creation** - -- **Trigger**: First code execution request with no existing file context -- **Requirement**: Server must generate unique session identifier -- **Format**: Alphanumeric string (observed: `sess_abc123` pattern) -- **Persistence**: Session must persist until explicit expiration - -### **Session Validation** - -- **Client Behavior**: Calls `GET /files/{session_id}?detail=summary` before reusing session -- **Required Response**: - ```json - [ - { - "name": "path/to/file", - "lastModified": "ISO-8601-timestamp" - } - ] - ``` -- **Validation Logic**: Client checks `lastModified` timestamp to determine session viability - -### **Session Expiration Handling** - -- **Client Expectation**: Expired sessions trigger file re-upload to new session -- **Implementation**: Server should expire sessions after reasonable inactivity period -- **File Recovery**: Support file uploads to restore previous state in new sessions - ---- - -## **2. FILE PERSISTENCE ARCHITECTURE** - -### **File Identifier Format** - -``` -{session_id}/{file_id}[?entity_id={entity_id}] -``` - -### **File Upload Endpoint** - -```http -POST /upload -Headers: - - X-API-Key: {api_key} - - User-Id: {user_id} - - User-Agent: LibreChat/1.0 - - Content-Type: multipart/form-data - -Body: - - file: {file_stream} - - entity_id: {optional_entity_id} -``` - -**Required Response:** - -```json -{ - "message": "success", - "session_id": "sess_abc123", - "files": [ - { - "fileId": "file_789", - "filename": "data.csv" - } - ] -} -``` - -### **File Download Endpoint** - -```http -GET /download/{session_id}/{file_id} -Headers: - - X-API-Key: {api_key} - - User-Agent: LibreChat/1.0 - -Response: File binary data -``` - ---- - -## **3. SESSION STATE MANAGEMENT** - -### **Variable State** - -- **Reset Policy**: All Python variables/imports reset between executions -- **No Persistence**: Each execution starts with clean interpreter state -- **Client Expectation**: Matches baseline reference behavior exactly - -### **File State** - -- **Persistence Required**: Files must survive between executions within session -- **Working Directory**: Files accessible at `/mnt/data/{filename}` -- **Encoding Behavior**: Implement base64 encoding for generated files (per baseline reference) - -### **Session Context Sharing** - -```javascript -// Client sends file context for session reuse: -{ - files: [ - { - id: "file_789", - session_id: "sess_abc123", - name: "data.csv", - }, - ]; -} -``` - ---- - -## **4. CROSS-SESSION FILE MANAGEMENT** - -### **File Re-upload Logic** - -When session expires, client will: - -1. **Download** file from local storage via `getDownloadStream()` -2. **Re-upload** to new session via `uploadCodeEnvFile()` -3. **Update** database with new `{session_id}/{file_id}` identifier - -### **Implementation Requirements** - -- Support file uploads from multiple expired sessions into single new session -- Maintain file naming consistency across session boundaries -- Handle entity_id parameter for file grouping/ownership - ---- - -## **5. USER ISOLATION & SECURITY** - -### **User Identification** - -- **Header**: `User-Id: {user_id}` on all requests -- **Isolation**: Sessions must be scoped to specific users -- **Security**: Prevent cross-user file access - -### **Authentication** - -- **Method**: `X-API-Key` header validation -- **Requirement**: Consistent with `LIBRECHAT_CODE_API_KEY` configuration - ---- - -## **6. ERROR HANDLING & EDGE CASES** - -### **Session Not Found** - -- **Scenario**: Client references non-existent session_id -- **Response**: 404 with appropriate error message -- **Client Behavior**: Triggers file re-upload workflow - -### **File Not Found** - -- **Scenario**: Missing file in session -- **Client Behavior**: Re-uploads file from local storage -- **Server Response**: Accept uploads to restore missing files - -### **Invalid Session State** - -- **Scenario**: Corrupted or partially expired session -- **Handling**: Graceful degradation with file recovery options diff --git a/Reference/codeInterpereterAPI spec.json b/Reference/codeInterpereterAPI spec.json deleted file mode 100644 index cf67c9d..0000000 --- a/Reference/codeInterpereterAPI spec.json +++ /dev/null @@ -1,462 +0,0 @@ -{ - "openapi": "3.0.0", - "info": { - "title": "LibreChat Code Interpreter API", - "version": "1.0.0", - "description": "API for sandbox code execution and file management" - }, - "servers": [ - { - "url": "https://api.librechat.ai/v1", - "description": "Production server" - } - ], - "security": [ - { - "ApiKeyAuth": [] - } - ], - "components": { - "securitySchemes": { - "ApiKeyAuth": { - "type": "apiKey", - "in": "header", - "name": "x-api-key" - } - }, - "schemas": { - "FileRef": { - "type": "object", - "properties": { - "id": { - "type": "string" - }, - "name": { - "type": "string" - }, - "path": { - "type": "string" - } - } - }, - "RequestFile": { - "type": "object", - "properties": { - "id": { - "type": "string" - }, - "session_id": { - "type": "string" - }, - "name": { - "type": "string" - } - }, - "required": ["id", "session_id", "name"] - }, - "ExecuteResponse": { - "type": "object", - "properties": { - "run": { - "type": "object", - "properties": { - "stdout": { - "type": "string" - }, - "stderr": { - "type": "string" - }, - "code": { - "type": "integer", - "nullable": true - }, - "signal": { - "type": "string", - "nullable": true - }, - "output": { - "type": "string" - }, - "memory": { - "type": "integer", - "nullable": true - }, - "message": { - "type": "string", - "nullable": true - }, - "status": { - "type": "string", - "nullable": true - }, - "cpu_time": { - "type": "number", - "nullable": true - }, - "wall_time": { - "type": "number", - "nullable": true - } - } - }, - "language": { - "type": "string" - }, - "version": { - "type": "string" - }, - "session_id": { - "type": "string" - }, - "files": { - "type": "array", - "items": { - "$ref": "#/components/schemas/FileRef" - } - } - } - }, - "RequestBody": { - "type": "object", - "required": ["code", "lang"], - "properties": { - "code": { - "type": "string", - "description": "The source code to be executed" - }, - "lang": { - "type": "string", - "description": "The programming language of the code", - "enum": [ - "c", - "cpp", - "d", - "f90", - "go", - "java", - "js", - "php", - "py", - "rs", - "ts", - "r" - ], - "example": "py" - }, - "args": { - "type": "string", - "description": "Optional command line arguments to pass to the program" - }, - "user_id": { - "type": "string", - "description": "Optional user identifier" - }, - "entity_id": { - "type": "string", - "description": "Optional assistant/agent identifier for file sharing and reference. Must be a valid nanoid-compatible string.", - "maxLength": 40, - "pattern": "^[A-Za-z0-9_-]+$", - "example": "asst_axIyVEqAa3UVppsVP3WTl5So" - }, - "files": { - "type": "array", - "description": "Array of file references to be used during execution", - "items": { - "$ref": "#/components/schemas/RequestFile" - } - } - } - }, - "FileObject": { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "id": { - "type": "string" - }, - "session_id": { - "type": "string" - }, - "content": { - "type": "string" - }, - "size": { - "type": "number" - }, - "lastModified": { - "type": "string" - }, - "etag": { - "type": "string" - }, - "metadata": { - "type": "object", - "properties": { - "content-type": { - "type": "string" - }, - "original-filename": { - "type": "string" - } - } - }, - "contentType": { - "type": "string" - } - } - }, - "UploadResponse": { - "type": "object", - "properties": { - "message": { - "type": "string" - }, - "session_id": { - "type": "string" - }, - "files": { - "type": "array", - "items": { - "$ref": "#/components/schemas/FileObject" - } - } - } - }, - "Error": { - "type": "object", - "properties": { - "error": { - "type": "string" - }, - "details": { - "type": "string" - } - } - } - } - }, - "paths": { - "/exec": { - "post": { - "summary": "Execute code", - "description": "Execute code with specified language and parameters", - "operationId": "executeCode", - "requestBody": { - "required": true, - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/RequestBody" - } - } - } - }, - "responses": { - "200": { - "description": "Successful execution", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/ExecuteResponse" - } - } - } - }, - "401": { - "description": "Unauthorized", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Error" - } - } - } - }, - "503": { - "description": "Service unavailable", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Error" - } - } - } - } - } - } - }, - "/download/{session_id}/{fileId}": { - "get": { - "summary": "Download a file", - "parameters": [ - { - "name": "session_id", - "in": "path", - "required": true, - "schema": { - "type": "string" - } - }, - { - "name": "fileId", - "in": "path", - "required": true, - "schema": { - "type": "string" - } - } - ], - "responses": { - "200": { - "description": "File content", - "content": { - "application/octet-stream": { - "schema": { - "type": "string", - "format": "binary" - } - } - } - }, - "404": { - "description": "File not found", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Error" - } - } - } - } - } - } - }, - "/upload": { - "post": { - "summary": "Upload files", - "requestBody": { - "required": true, - "content": { - "multipart/form-data": { - "schema": { - "type": "object", - "properties": { - "entity_id": { - "type": "string" - }, - "files": { - "type": "array", - "items": { - "type": "string", - "format": "binary" - } - } - } - } - } - } - }, - "responses": { - "200": { - "description": "Successful upload", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/UploadResponse" - } - } - } - }, - "413": { - "description": "File size limit exceeded", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Error" - } - } - } - } - } - } - }, - "/files/{session_id}": { - "get": { - "summary": "Get files information", - "parameters": [ - { - "name": "session_id", - "in": "path", - "required": true, - "schema": { - "type": "string" - } - }, - { - "name": "detail", - "in": "query", - "schema": { - "type": "string", - "default": "simple" - } - } - ], - "responses": { - "200": { - "description": "Files information", - "content": { - "application/json": { - "schema": { - "type": "array", - "items": { - "$ref": "#/components/schemas/FileObject" - } - } - } - } - } - } - } - }, - "/files/{session_id}/{fileId}": { - "delete": { - "summary": "Delete a file", - "parameters": [ - { - "name": "session_id", - "in": "path", - "required": true, - "schema": { - "type": "string" - } - }, - { - "name": "fileId", - "in": "path", - "required": true, - "schema": { - "type": "string" - } - } - ], - "responses": { - "200": { - "description": "File deleted successfully" - }, - "500": { - "description": "Error deleting file", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Error" - } - } - } - } - } - } - } - } -} diff --git a/Reference/openapi-generated.json b/Reference/openapi-generated.json deleted file mode 100644 index 82ff5cd..0000000 --- a/Reference/openapi-generated.json +++ /dev/null @@ -1,782 +0,0 @@ -{ - "openapi": "3.1.0", - "info": { - "title": "Code Interpreter API", - "description": "A secure API for executing code in isolated environments", - "version": "1.0.0" - }, - "paths": { - "/health": { - "get": { - "tags": ["health", "monitoring"], - "summary": "Basic health check", - "description": "Basic health check endpoint that doesn't require authentication.", - "operationId": "basic_health_check_health_get", - "responses": { - "200": { - "description": "Successful Response", - "content": { - "application/json": { - "schema": {} - } - } - } - } - } - }, - "/config": { - "get": { - "summary": "Config Info", - "description": "Configuration information endpoint (non-sensitive data only).", - "operationId": "config_info_config_get", - "responses": { - "200": { - "description": "Successful Response", - "content": { - "application/json": { - "schema": {} - } - } - } - } - } - }, - "/upload": { - "post": { - "tags": ["files"], - "summary": "Upload File", - "description": "Upload files with multipart form handling - LibreChat compatible.\n\nAccepts files in either 'file' (singular) or 'files' (plural) field names.\nLibreChat uses 'file' while our tests use 'files'.", - "operationId": "upload_file_upload_post", - "requestBody": { - "content": { - "multipart/form-data": { - "schema": { - "$ref": "#/components/schemas/Body_upload_file_upload_post" - } - } - } - }, - "responses": { - "200": { - "description": "Successful Response", - "content": { - "application/json": { - "schema": {} - } - } - }, - "422": { - "description": "Validation Error", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/HTTPValidationError" - } - } - } - } - } - } - }, - "/files/{session_id}": { - "get": { - "tags": ["files"], - "summary": "List Files", - "description": "List all files in a session with optional detail parameter - LibreChat compatible.", - "operationId": "list_files_files__session_id__get", - "parameters": [ - { - "name": "session_id", - "in": "path", - "required": true, - "schema": { - "type": "string", - "title": "Session Id" - } - }, - { - "name": "detail", - "in": "query", - "required": false, - "schema": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "description": "Detail level: 'simple' for basic info, otherwise full details", - "title": "Detail" - }, - "description": "Detail level: 'simple' for basic info, otherwise full details" - } - ], - "responses": { - "200": { - "description": "Successful Response", - "content": { - "application/json": { - "schema": {} - } - } - }, - "422": { - "description": "Validation Error", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/HTTPValidationError" - } - } - } - } - } - } - }, - "/download/{session_id}/{file_id}": { - "get": { - "tags": ["files"], - "summary": "Download File", - "description": "Download a file directly - LibreChat compatible.", - "operationId": "download_file_download__session_id___file_id__get", - "parameters": [ - { - "name": "session_id", - "in": "path", - "required": true, - "schema": { - "type": "string", - "title": "Session Id" - } - }, - { - "name": "file_id", - "in": "path", - "required": true, - "schema": { - "type": "string", - "title": "File Id" - } - } - ], - "responses": { - "200": { - "description": "Successful Response", - "content": { - "application/json": { - "schema": {} - } - } - }, - "422": { - "description": "Validation Error", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/HTTPValidationError" - } - } - } - } - } - }, - "options": { - "tags": ["files"], - "summary": "Download File Options", - "description": "Handle OPTIONS preflight request for download endpoint.", - "operationId": "download_file_options_download__session_id___file_id__options", - "parameters": [ - { - "name": "session_id", - "in": "path", - "required": true, - "schema": { - "type": "string", - "title": "Session Id" - } - }, - { - "name": "file_id", - "in": "path", - "required": true, - "schema": { - "type": "string", - "title": "File Id" - } - } - ], - "responses": { - "200": { - "description": "Successful Response", - "content": { - "application/json": { - "schema": {} - } - } - }, - "422": { - "description": "Validation Error", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/HTTPValidationError" - } - } - } - } - } - } - }, - "/files/{session_id}/{file_id}": { - "delete": { - "tags": ["files"], - "summary": "Delete File", - "description": "Delete a file from the session - LibreChat compatible.", - "operationId": "delete_file_files__session_id___file_id__delete", - "parameters": [ - { - "name": "session_id", - "in": "path", - "required": true, - "schema": { - "type": "string", - "title": "Session Id" - } - }, - { - "name": "file_id", - "in": "path", - "required": true, - "schema": { - "type": "string", - "title": "File Id" - } - } - ], - "responses": { - "200": { - "description": "Successful Response", - "content": { - "application/json": { - "schema": {} - } - } - }, - "422": { - "description": "Validation Error", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/HTTPValidationError" - } - } - } - } - } - } - }, - "/exec": { - "post": { - "tags": ["exec"], - "summary": "Execute Code", - "description": "Execute code with specified language and parameters.\n\nThis endpoint is compatible with LibreChat's Code Interpreter API.\nIt supports 12 programming languages: py, js, ts, go, java, c, cpp, php, rs, r, f90, d\n\nPython sessions support state persistence - variables and functions defined in\none execution are available in subsequent executions within the same session.\nState is stored in Redis (2 hour TTL) with automatic archival to MinIO for\nlong-term storage (7 day TTL).\n\nArgs:\n request: Execution request with code, language, and optional files\n session_service: Session management service\n file_service: File storage service\n execution_service: Code execution service\n state_service: Python state persistence service (Redis)\n state_archival_service: Python state archival service (MinIO)\n\nReturns:\n ExecResponse with session_id, stdout, stderr, and generated files", - "operationId": "execute_code_exec_post", - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/ExecRequest" - } - } - }, - "required": true - }, - "responses": { - "200": { - "description": "Successful Response", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/ExecResponse" - } - } - } - }, - "422": { - "description": "Validation Error", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/HTTPValidationError" - } - } - } - } - } - } - }, - "/health/detailed": { - "get": { - "tags": ["health", "monitoring"], - "summary": "Detailed health check", - "description": "Detailed health check of all system dependencies.", - "operationId": "detailed_health_check_health_detailed_get", - "security": [ - { - "HTTPBearer": [] - } - ], - "parameters": [ - { - "name": "use_cache", - "in": "query", - "required": false, - "schema": { - "type": "boolean", - "description": "Use cached health check results", - "default": true, - "title": "Use Cache" - }, - "description": "Use cached health check results" - } - ], - "responses": { - "200": { - "description": "Successful Response", - "content": { - "application/json": { - "schema": {} - } - } - }, - "422": { - "description": "Validation Error", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/HTTPValidationError" - } - } - } - } - } - } - }, - "/health/redis": { - "get": { - "tags": ["health", "monitoring"], - "summary": "Redis health check", - "description": "Check Redis connectivity and performance.", - "operationId": "redis_health_check_health_redis_get", - "responses": { - "200": { - "description": "Successful Response", - "content": { - "application/json": { - "schema": {} - } - } - } - }, - "security": [ - { - "HTTPBearer": [] - } - ] - } - }, - "/health/minio": { - "get": { - "tags": ["health", "monitoring"], - "summary": "MinIO health check", - "description": "Check MinIO/S3 connectivity and performance.", - "operationId": "minio_health_check_health_minio_get", - "responses": { - "200": { - "description": "Successful Response", - "content": { - "application/json": { - "schema": {} - } - } - } - }, - "security": [ - { - "HTTPBearer": [] - } - ] - } - }, - "/health/docker": { - "get": { - "tags": ["health", "monitoring"], - "summary": "Docker health check", - "description": "Check Docker daemon connectivity and performance.", - "operationId": "docker_health_check_health_docker_get", - "responses": { - "200": { - "description": "Successful Response", - "content": { - "application/json": { - "schema": {} - } - } - } - }, - "security": [ - { - "HTTPBearer": [] - } - ] - } - }, - "/metrics": { - "get": { - "tags": ["health", "monitoring"], - "summary": "System metrics", - "description": "Get system metrics and statistics.", - "operationId": "get_metrics_metrics_get", - "responses": { - "200": { - "description": "Successful Response", - "content": { - "application/json": { - "schema": {} - } - } - } - }, - "security": [ - { - "HTTPBearer": [] - } - ] - } - }, - "/metrics/execution": { - "get": { - "tags": ["health", "monitoring"], - "summary": "Execution metrics", - "description": "Get code execution metrics and statistics.", - "operationId": "get_execution_metrics_metrics_execution_get", - "responses": { - "200": { - "description": "Successful Response", - "content": { - "application/json": { - "schema": {} - } - } - } - }, - "security": [ - { - "HTTPBearer": [] - } - ] - } - }, - "/metrics/api": { - "get": { - "tags": ["health", "monitoring"], - "summary": "API metrics", - "description": "Get API request metrics and statistics.", - "operationId": "get_api_metrics_metrics_api_get", - "responses": { - "200": { - "description": "Successful Response", - "content": { - "application/json": { - "schema": {} - } - } - } - }, - "security": [ - { - "HTTPBearer": [] - } - ] - } - }, - "/status": { - "get": { - "tags": ["health", "monitoring"], - "summary": "Service status", - "description": "Get comprehensive service status information.", - "operationId": "get_service_status_status_get", - "responses": { - "200": { - "description": "Successful Response", - "content": { - "application/json": { - "schema": {} - } - } - } - }, - "security": [ - { - "HTTPBearer": [] - } - ] - } - } - }, - "components": { - "schemas": { - "Body_upload_file_upload_post": { - "properties": { - "file": { - "anyOf": [ - { - "type": "string", - "format": "binary" - }, - { - "type": "null" - } - ], - "title": "File" - }, - "files": { - "anyOf": [ - { - "items": { - "type": "string", - "format": "binary" - }, - "type": "array" - }, - { - "type": "null" - } - ], - "title": "Files" - }, - "entity_id": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "title": "Entity Id" - } - }, - "type": "object", - "title": "Body_upload_file_upload_post" - }, - "ExecRequest": { - "properties": { - "code": { - "type": "string", - "title": "Code", - "description": "The source code to be executed" - }, - "lang": { - "type": "string", - "title": "Lang", - "description": "The programming language of the code" - }, - "args": { - "anyOf": [ - {}, - { - "type": "null" - } - ], - "title": "Args", - "description": "Optional command line arguments (any JSON type)" - }, - "user_id": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "title": "User Id", - "description": "Optional user identifier" - }, - "entity_id": { - "anyOf": [ - { - "type": "string", - "maxLength": 40, - "pattern": "^[A-Za-z0-9_-]+$" - }, - { - "type": "null" - } - ], - "title": "Entity Id", - "description": "Optional assistant/agent identifier for file sharing" - }, - "session_id": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "title": "Session Id", - "description": "Optional session ID to continue an existing session (for state persistence)" - }, - "files": { - "anyOf": [ - { - "items": { - "$ref": "#/components/schemas/RequestFile" - }, - "type": "array" - }, - { - "type": "null" - } - ], - "title": "Files", - "description": "Array of file references to be used during execution" - } - }, - "type": "object", - "required": ["code", "lang"], - "title": "ExecRequest", - "description": "Request model for /exec endpoint." - }, - "ExecResponse": { - "properties": { - "session_id": { - "type": "string", - "title": "Session Id" - }, - "files": { - "items": { - "$ref": "#/components/schemas/FileRef" - }, - "type": "array", - "title": "Files" - }, - "stdout": { - "type": "string", - "title": "Stdout", - "default": "" - }, - "stderr": { - "type": "string", - "title": "Stderr", - "default": "" - } - }, - "type": "object", - "required": ["session_id"], - "title": "ExecResponse", - "description": "Response model for /exec endpoint - LibreChat compatible format." - }, - "FileRef": { - "properties": { - "id": { - "type": "string", - "title": "Id" - }, - "name": { - "type": "string", - "title": "Name" - }, - "path": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "title": "Path" - } - }, - "type": "object", - "required": ["id", "name"], - "title": "FileRef", - "description": "File reference model for execution response." - }, - "HTTPValidationError": { - "properties": { - "detail": { - "items": { - "$ref": "#/components/schemas/ValidationError" - }, - "type": "array", - "title": "Detail" - } - }, - "type": "object", - "title": "HTTPValidationError" - }, - "RequestFile": { - "properties": { - "id": { - "type": "string", - "title": "Id" - }, - "session_id": { - "type": "string", - "title": "Session Id" - }, - "name": { - "type": "string", - "title": "Name" - } - }, - "type": "object", - "required": ["id", "session_id", "name"], - "title": "RequestFile", - "description": "Request file model." - }, - "ValidationError": { - "properties": { - "loc": { - "items": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "integer" - } - ] - }, - "type": "array", - "title": "Location" - }, - "msg": { - "type": "string", - "title": "Message" - }, - "type": { - "type": "string", - "title": "Error Type" - } - }, - "type": "object", - "required": ["loc", "msg", "type"], - "title": "ValidationError" - } - }, - "securitySchemes": { - "HTTPBearer": { - "type": "http", - "scheme": "bearer" - } - } - } -} diff --git a/docker-compose.override.example.yml b/docker-compose.override.example.yml index f578016..74ae1de 100644 --- a/docker-compose.override.example.yml +++ b/docker-compose.override.example.yml @@ -1,18 +1,25 @@ # Copy this file to `docker-compose.override.yml` to customize how the API image -# is sourced when running `docker compose -f docker-compose.prod.yml up -d`. +# is sourced when running `docker compose up -d`. Compose auto-merges +# `docker-compose.override.yml` on top of `docker-compose.yml`, so no extra `-f` +# flags are needed. # -# `docker-compose.override.yml` is ignored by git so you can keep a local choice -# without committing it. +# `docker-compose.override.yml` is gitignored, so your local choice stays out +# of version control. # -# Default example: pull the latest dev image package. -# To build from your local checkout instead, comment the `image`/`pull_policy` -# lines below and uncomment the `build` block. +# The base `docker-compose.yml` pulls the stable published image +# (`ghcr.io/usnavy13/librecodeinterpreter:main`). Use one of the snippets below +# to switch to the dev channel or to a local build. services: api: + # Option 1 — pull the latest dev image: image: ghcr.io/usnavy13/librecodeinterpreter-dev:latest pull_policy: always + # Option 2 — build from your local checkout. Comment the `image` and + # `pull_policy` lines above and uncomment the block below, then run: + # docker compose up --build -d + # # build: # context: . # target: app diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml deleted file mode 100644 index 50112f9..0000000 --- a/docker-compose.prod.yml +++ /dev/null @@ -1,96 +0,0 @@ -services: - api: - image: ${API_IMAGE:-ghcr.io/usnavy13/librecodeinterpreter:main} - pull_policy: always - container_name: code-interpreter-api - restart: unless-stopped - init: true - cap_add: - - SYS_ADMIN - security_opt: - - apparmor:unconfined - ports: - - "${PORT:-8000}:8000" - env_file: - - .env - environment: - - REDIS_HOST=redis - - MINIO_ENDPOINT=minio:9000 - volumes: - - sandbox-data:/var/lib/code-interpreter/sandboxes - # SSL_CERTS_PATH is a host path; SSL_CERT_FILE and SSL_KEY_FILE must point - # to the mounted files inside the container under /app/ssl. - - ${SSL_CERTS_PATH:-./ssl}:/app/ssl:ro - tmpfs: - - /app/data:size=100m - depends_on: - redis: - condition: service_healthy - minio-init: - condition: service_completed_successfully - healthcheck: - test: ["CMD-SHELL", "curl -fs http://localhost:8000/health || curl -fsk https://localhost:8000/health"] - interval: 30s - timeout: 15s - retries: 3 - start_period: 30s - - redis: - image: redis:7-alpine - container_name: code-interpreter-redis - restart: unless-stopped - ports: - - "127.0.0.1:${REDIS_PORT:-6379}:6379" - command: > - redis-server - --appendonly yes - --appendfsync everysec - --maxmemory 256mb - --maxmemory-policy allkeys-lru - volumes: - - redis-data:/data - healthcheck: - test: ["CMD", "redis-cli", "ping"] - interval: 10s - timeout: 5s - retries: 5 - - minio: - image: minio/minio:latest - container_name: code-interpreter-minio - restart: unless-stopped - ports: - - "127.0.0.1:${MINIO_PORT:-9000}:9000" - - "127.0.0.1:${MINIO_CONSOLE_PORT:-9001}:9001" - environment: - MINIO_ROOT_USER: ${MINIO_ACCESS_KEY:-minioadmin} - MINIO_ROOT_PASSWORD: ${MINIO_SECRET_KEY:-minioadmin} - command: server /data --console-address ":9001" - volumes: - - minio-data:/data - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] - interval: 10s - timeout: 5s - retries: 5 - - minio-init: - image: minio/mc:latest - depends_on: - minio: - condition: service_healthy - entrypoint: > - /bin/sh -c " - mc alias set myminio http://minio:9000 $${MINIO_ACCESS_KEY:-minioadmin} $${MINIO_SECRET_KEY:-minioadmin}; - mc mb --ignore-existing myminio/$${MINIO_BUCKET:-code-interpreter-files}; - exit 0; - " - environment: - MINIO_ACCESS_KEY: ${MINIO_ACCESS_KEY:-minioadmin} - MINIO_SECRET_KEY: ${MINIO_SECRET_KEY:-minioadmin} - MINIO_BUCKET: ${MINIO_BUCKET:-code-interpreter-files} - -volumes: - sandbox-data: - redis-data: - minio-data: diff --git a/docker-compose.yml b/docker-compose.yml index 962f49a..54569c9 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,18 +1,19 @@ services: # Code Interpreter API (unified image with nsjail sandboxing) api: - build: - context: . - target: app - image: ${API_IMAGE:-code-interpreter:nsjail} - pull_policy: never + image: ${API_IMAGE:-ghcr.io/usnavy13/librecodeinterpreter:main} + pull_policy: always container_name: code-interpreter-api restart: unless-stopped # tini init process reaps zombie nsjail/python child processes init: true - # nsjail requires these capabilities to create namespaces and cgroups + # nsjail requires these capabilities to create namespaces and cgroups. + # NET_ADMIN is required to install iptables egress rules for the sandbox uid + # when ENABLE_SANDBOX_NETWORK=true. Restricts sandbox traffic to the + # inline allowlist proxy and prevents SSRF to Redis/S3/etc. cap_add: - SYS_ADMIN + - NET_ADMIN security_opt: - apparmor:unconfined ports: @@ -22,26 +23,30 @@ services: environment: # Container-specific overrides (service discovery within compose network) - REDIS_HOST=redis - - MINIO_ENDPOINT=minio:9000 + - S3_ENDPOINT=garage:3900 volumes: - sandbox-data:/var/lib/code-interpreter/sandboxes + # Persistent skill-deps cache: pip/npm/go/cargo install here when + # ENABLE_SANDBOX_NETWORK=true so future executions reuse the install. + # Survives container restarts; purge with POST /api/v1/admin/skill-deps/purge. + - skill-deps:/opt/skill-deps # SSL_CERTS_PATH is a host path; SSL_CERT_FILE and SSL_KEY_FILE must point # to the mounted files inside the container under /app/ssl. - ${SSL_CERTS_PATH:-./ssl}:/app/ssl:ro tmpfs: + - /tmp:size=512m,mode=1777,noexec,nosuid,nodev - /app/data:size=100m depends_on: redis: condition: service_healthy - minio-init: - condition: service_completed_successfully + garage: + condition: service_healthy healthcheck: test: ["CMD-SHELL", "curl -fs http://localhost:8000/health || curl -fsk https://localhost:8000/health"] interval: 30s timeout: 15s retries: 3 start_period: 30s - # No /var/run/docker.sock mount needed # Redis for session management redis: @@ -64,44 +69,31 @@ services: timeout: 5s retries: 5 - # MinIO for file storage - minio: - image: minio/minio:latest - container_name: code-interpreter-minio + # Garage S3-compatible object storage + garage: + image: dxflrs/garage:v2.3.0 + container_name: code-interpreter-garage restart: unless-stopped + command: /garage server --single-node --default-bucket ports: - - "127.0.0.1:${MINIO_PORT:-9000}:9000" - - "127.0.0.1:${MINIO_CONSOLE_PORT:-9001}:9001" + - "127.0.0.1:${S3_PORT:-3900}:3900" + - "127.0.0.1:${GARAGE_ADMIN_PORT:-3903}:3903" environment: - MINIO_ROOT_USER: ${MINIO_ACCESS_KEY:-minioadmin} - MINIO_ROOT_PASSWORD: ${MINIO_SECRET_KEY:-minioadmin} - command: server /data --console-address ":9001" + GARAGE_DEFAULT_ACCESS_KEY: ${S3_ACCESS_KEY:-GKminioadmin0000} + GARAGE_DEFAULT_SECRET_KEY: ${S3_SECRET_KEY:-minioadminsecret} + GARAGE_DEFAULT_BUCKET: ${S3_BUCKET:-code-interpreter-files} volumes: - - minio-data:/data + - garage-data:/var/lib/garage + - ./garage.toml:/etc/garage.toml healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] + test: ["CMD", "/garage", "status"] interval: 10s timeout: 5s retries: 5 - - # MinIO bucket initialization - minio-init: - image: minio/mc:latest - depends_on: - minio: - condition: service_healthy - entrypoint: > - /bin/sh -c " - mc alias set myminio http://minio:9000 $${MINIO_ACCESS_KEY:-minioadmin} $${MINIO_SECRET_KEY:-minioadmin}; - mc mb --ignore-existing myminio/$${MINIO_BUCKET:-code-interpreter-files}; - exit 0; - " - environment: - MINIO_ACCESS_KEY: ${MINIO_ACCESS_KEY:-minioadmin} - MINIO_SECRET_KEY: ${MINIO_SECRET_KEY:-minioadmin} - MINIO_BUCKET: ${MINIO_BUCKET:-code-interpreter-files} + start_period: 10s volumes: sandbox-data: + skill-deps: redis-data: - minio-data: + garage-data: diff --git a/docker/ptc_bash_server.py b/docker/ptc_bash_server.py new file mode 100644 index 0000000..8a34262 --- /dev/null +++ b/docker/ptc_bash_server.py @@ -0,0 +1,392 @@ +#!/usr/bin/env python3 +"""Programmatic Tool Calling (PTC) Server for bash execution. + +Bash equivalent of `ptc_server.py`. Speaks the same outer JSON protocol +on stdin/stdout, but runs user **bash** code instead of Python. Tool calls +from bash are routed through two named pipes (FIFOs) so user code can +treat each tool as a regular bash function: + + weather_lookup '{"city":"NYC"}' + +The wrapper reads the result on stdout — caller pipes through `jq` to +extract fields. + +Outer protocol (identical to ptc_server.py): +1. Host writes {"code": "...", "tools": [{"name": ..., ...}]} + delimiter on stdin. +2. On each tool call, server writes + {"type": "tool_calls", "calls": [{"id": ..., "name": ..., "input": ...}]} + to stdout. +3. Host writes back + {"type": "tool_results", "results": [{"call_id": ..., "result": ..., "is_error": ...}]} + on stdin. +4. On bash exit, server writes + {"type": "completed", "stdout": ..., "stderr": ...}. +5. On any error, server writes {"type": "error", ...}. + +Inner protocol (bash <-> this script via FIFOs): +- bash writes one JSON line `{"name":"...","input":{...}}` to call.fifo, then + reads one line from result.fifo. The line on result.fifo is the JSON-encoded + `result` value (or `{"error": "..."}` if the host reported is_error). +- One tool call at a time; bash blocks on `read` until result arrives. + +Compromises (vs the Python PTC server): +- Sequential tool calls only — bash has no native asyncio. Acceptable because + shell scripts are inherently sequential. +- Tool inputs are JSON strings; user bash code uses `jq` to extract fields. +- `jq` is required in the sandbox image (entrypoint.sh fails fast if missing). +""" + +import asyncio +import json +import os +import re +import shlex +import sys +import uuid +from pathlib import Path + +DELIMITER = "\n---PTC_END---\n" + +# Per-sandbox FIFO directory, hidden so detect_generated_files skips it. +# Override via PTC_BASH_DIR for local testing outside the sandbox. +PTC_DIR = Path(os.environ.get("PTC_BASH_DIR", "/mnt/data/.ptc")) +CALL_FIFO = PTC_DIR / "call.fifo" +RESULT_FIFO = PTC_DIR / "result.fifo" +RC_PATH = PTC_DIR / "rc.sh" +USER_CODE_PATH = PTC_DIR / "user.sh" + +_real_stdin = sys.stdin +_real_stdout = sys.stdout + +# Bash identifier rules: [A-Za-z_][A-Za-z0-9_]*. Names that don't match +# get normalized via `_normalize_bash_name` so the user can still call the +# tool from bash — the SDK applies the same normalization client-side when +# generating code. +_VALID_BASH_NAME = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$") + +_BASH_RESERVED = frozenset( + { + "if", + "then", + "else", + "elif", + "fi", + "case", + "esac", + "for", + "while", + "until", + "do", + "done", + "in", + "function", + "select", + "time", + "coproc", + "declare", + "typeset", + "local", + "readonly", + "export", + "unset", + } +) + + +def _normalize_bash_name(name: str) -> str: + """Match SDK's normalizeToBashIdentifier so generated code can call functions.""" + result = re.sub(r"[-\s.]", "_", name) + result = re.sub(r"[^a-zA-Z0-9_]", "", result) + if result and result[0].isdigit(): + result = "_" + result + if result in _BASH_RESERVED: + result = result + "_tool" + return result or "_unnamed" + + +def _write_message(msg: dict) -> None: + _real_stdout.write(json.dumps(msg) + DELIMITER) + _real_stdout.flush() + + +def _read_message() -> dict: + buf = "" + while True: + line = _real_stdin.readline() + if not line: + raise EOFError("stdin closed") + buf += line + if DELIMITER in buf: + return json.loads(buf.split(DELIMITER)[0]) + + +def _generate_rcfile(tools: list) -> str: + """Generate one bash function per declared tool.""" + lines = [ + "# Auto-generated by ptc_bash_server.py — DO NOT edit", + "# Each tool function takes a JSON argument and returns the JSON result on stdout.", + f"export PTC_CALL_FIFO={shlex.quote(str(CALL_FIFO))}", + f"export PTC_RESULT_FIFO={shlex.quote(str(RESULT_FIFO))}", + "", + ] + for tool in tools: + name = tool.get("name", "") + func_name = _normalize_bash_name(name) + if not func_name or func_name == "_unnamed": + continue + lines.append( + f"{func_name}() {{\n" + # Use an explicit conditional rather than ${1:-{}} — the brace-default + # form parses as ${1:-{} followed by a literal }, which appends a + # stray brace whenever $1 is set. + f' local input_json="$1"\n' + f' if [ -z "$input_json" ]; then input_json="{{}}"; fi\n' + f" local payload\n" + f" payload=$(jq -c -n --arg name {shlex.quote(name)} " + f"--argjson input \"$input_json\" '{{name:$name,input:$input}}' 2>/dev/null) || \\\n" + f" payload=$(jq -c -n --arg name {shlex.quote(name)} " + f"--arg input \"$input_json\" '{{name:$name,input:$input}}')\n" + f' printf \'%s\\n\' "$payload" > "$PTC_CALL_FIFO"\n' + f" local result\n" + f' IFS= read -r result < "$PTC_RESULT_FIFO"\n' + f" printf '%s\\n' \"$result\"\n" + f"}}\n" + ) + return "\n".join(lines) + + +async def _run(code: str, tools: list) -> dict: + """Spawn bash with the generated rc and route tool calls between bash and the host.""" + PTC_DIR.mkdir(mode=0o755, exist_ok=True) + for fifo in (CALL_FIFO, RESULT_FIFO): + try: + os.unlink(fifo) + except FileNotFoundError: + pass + os.mkfifo(fifo, 0o600) + + RC_PATH.write_text(_generate_rcfile(tools)) + USER_CODE_PATH.write_text(code) + + # Spawn bash. --rcfile loads our wrappers; user code runs via -c so that + # functions defined in rc.sh are available without an interactive shell. + proc = await asyncio.create_subprocess_exec( + "bash", + "--rcfile", + str(RC_PATH), + "-i", + str(USER_CODE_PATH), + stdin=asyncio.subprocess.DEVNULL, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + + # Open both FIFOs O_RDWR so we never see EOF when bash closes its end + # between calls (bash opens/closes per call). Non-blocking so we can + # cooperate with asyncio.add_reader for the call channel. + call_fd = os.open(str(CALL_FIFO), os.O_RDWR | os.O_NONBLOCK) + result_fd = os.open(str(RESULT_FIFO), os.O_RDWR | os.O_NONBLOCK) + + loop = asyncio.get_event_loop() + call_queue: asyncio.Queue = asyncio.Queue() + call_buf = bytearray() + + def on_call_readable() -> None: + try: + chunk = os.read(call_fd, 4096) + except (BlockingIOError, OSError): + return + if not chunk: + return + call_buf.extend(chunk) + # Drain complete lines into the queue. + while b"\n" in call_buf: + idx = call_buf.index(b"\n") + line = bytes(call_buf[:idx]).decode("utf-8", errors="replace").strip() + del call_buf[: idx + 1] + if line: + call_queue.put_nowait(line) + + loop.add_reader(call_fd, on_call_readable) + bash_exit = asyncio.create_task(proc.wait()) + + error_message: str | None = None + + try: + while True: + get_call = asyncio.create_task(call_queue.get()) + done, _ = await asyncio.wait( + {get_call, bash_exit}, + return_when=asyncio.FIRST_COMPLETED, + ) + + if get_call in done: + line = get_call.result() + try: + call = json.loads(line) + except json.JSONDecodeError: + # Malformed call from bash — return an error so bash unblocks. + os.write(result_fd, b'{"error":"invalid JSON from bash"}\n') + continue + + call_id = uuid.uuid4().hex[:12] + _write_message( + { + "type": "tool_calls", + "calls": [ + { + "id": call_id, + "name": call.get("name"), + "input": call.get("input", {}), + } + ], + } + ) + + try: + response = _read_message() + except EOFError: + error_message = "Host stdin closed mid-execution" + break + + if response.get("type") != "tool_results": + error_message = ( + f"Expected tool_results, got {response.get('type')!r}" + ) + os.write(result_fd, b'{"error":"protocol mismatch"}\n') + break + + results = response.get("results", []) + target = next((r for r in results if r.get("call_id") == call_id), None) + if target is None and results: + target = results[0] + + if target is None: + os.write(result_fd, b'{"error":"no tool result returned"}\n') + continue + + if target.get("is_error"): + err = target.get("error_message", "tool error") + payload = json.dumps({"error": err}) + else: + # Round-trip the tool result as JSON. Strings, numbers, + # objects all serialize fine; user code parses with jq. + payload = json.dumps(target.get("result")) + + os.write(result_fd, (payload + "\n").encode("utf-8")) + + if bash_exit in done: + if not get_call.done(): + get_call.cancel() + try: + await get_call + except (asyncio.CancelledError, Exception): + pass + break + finally: + try: + loop.remove_reader(call_fd) + except (ValueError, OSError): + pass + + # Make sure bash has fully exited before we read its pipes. + if not bash_exit.done(): + try: + await asyncio.wait_for(bash_exit, timeout=5) + except asyncio.TimeoutError: + try: + proc.kill() + except ProcessLookupError: + pass + await proc.wait() + + stdout_bytes = b"" + stderr_bytes = b"" + if proc.stdout is not None: + try: + stdout_bytes = await proc.stdout.read() + except Exception: + pass + if proc.stderr is not None: + try: + stderr_bytes = await proc.stderr.read() + except Exception: + pass + + os.close(call_fd) + os.close(result_fd) + for fifo in (CALL_FIFO, RESULT_FIFO): + try: + os.unlink(fifo) + except FileNotFoundError: + pass + try: + RC_PATH.unlink() + except FileNotFoundError: + pass + try: + USER_CODE_PATH.unlink() + except FileNotFoundError: + pass + try: + PTC_DIR.rmdir() + except OSError: + # Directory may still contain files the user wrote; leave it for + # the orchestrator's normal sandbox teardown. + pass + + stdout = stdout_bytes.decode("utf-8", errors="replace") + stderr = stderr_bytes.decode("utf-8", errors="replace") + + if error_message: + return { + "type": "error", + "error": error_message, + "stdout": stdout, + "stderr": stderr, + } + + if proc.returncode == 0: + return {"type": "completed", "stdout": stdout, "stderr": stderr} + + return { + "type": "error", + "error": f"bash exited with code {proc.returncode}", + "stdout": stdout, + "stderr": stderr, + } + + +def main() -> None: + # Inside the sandbox /mnt/data exists; for local testing PTC_BASH_DIR + # points elsewhere and chdir is unnecessary. + if "PTC_BASH_DIR" not in os.environ: + try: + os.chdir("/mnt/data") + except OSError: + pass + + try: + request = _read_message() + except Exception as exc: + _write_message( + {"type": "error", "error": f"Failed to read initial request: {exc}"} + ) + return + + code = request.get("code", "") + tools = request.get("tools", []) + if not code: + _write_message({"type": "error", "error": "No code provided"}) + return + + try: + result = asyncio.run(_run(code, tools)) + except Exception as exc: + result = {"type": "error", "error": f"bash PTC failed: {exc}"} + + _write_message(result) + + +if __name__ == "__main__": + main() diff --git a/docker/ptc_server.py b/docker/ptc_server.py index 1b9b5ca..a32d440 100644 --- a/docker/ptc_server.py +++ b/docker/ptc_server.py @@ -26,6 +26,7 @@ import asyncio import json import os +import re import sys import traceback import uuid @@ -33,6 +34,58 @@ DELIMITER = "\n---PTC_END---\n" +_PYTHON_KEYWORDS = frozenset( + { + "False", + "None", + "True", + "and", + "as", + "assert", + "async", + "await", + "break", + "class", + "continue", + "def", + "del", + "elif", + "else", + "except", + "finally", + "for", + "from", + "global", + "if", + "import", + "in", + "is", + "lambda", + "nonlocal", + "not", + "or", + "pass", + "raise", + "return", + "try", + "while", + "with", + "yield", + } +) + + +def _normalize_python_name(name: str) -> str: + """Match SDK's normalizeToPythonIdentifier so generated code can call stubs.""" + result = re.sub(r"[-\s]", "_", name) + result = re.sub(r"[^a-zA-Z0-9_]", "", result) + if result and result[0].isdigit(): + result = "_" + result + if result in _PYTHON_KEYWORDS: + result = result + "_tool" + return result or "_unnamed" + + # Keep references to the REAL stdin/stdout for protocol communication. # User code's print() will be redirected to a StringIO capture buffer. _real_stdin = sys.stdin @@ -83,9 +136,7 @@ async def tool_stub(**kwargs): result_info = _tool_results_map.pop(call_id) if result_info.get("is_error"): - raise RuntimeError( - result_info.get("error_message", "Tool call failed") - ) + raise RuntimeError(result_info.get("error_message", "Tool call failed")) return result_info.get("result") tool_stub.__name__ = tool_name @@ -113,7 +164,8 @@ async def _execute_with_tools( pass for tool in tools: - namespace[tool["name"]] = _make_tool_stub(tool["name"]) + normalized = _normalize_python_name(tool["name"]) + namespace[normalized] = _make_tool_stub(tool["name"]) # Wrap user code in async function indented_code = "\n".join(" " + line for line in code.split("\n")) @@ -137,10 +189,12 @@ async def _execute_with_tools( calls_to_send = list(_pending_calls) _pending_calls.clear() - _write_message({ - "type": "tool_calls", - "calls": calls_to_send, - }) + _write_message( + { + "type": "tool_calls", + "calls": calls_to_send, + } + ) # Wait for results from host response = _read_message() @@ -179,10 +233,12 @@ def main(): try: request = _read_message() except Exception as e: - _write_message({ - "type": "error", - "error": f"Failed to read initial request: {e}", - }) + _write_message( + { + "type": "error", + "error": f"Failed to read initial request: {e}", + } + ) return code = request.get("code", "") @@ -200,9 +256,7 @@ def main(): sys.stderr = user_stderr try: - result = asyncio.run( - _execute_with_tools(code, tools, user_stdout, user_stderr) - ) + result = asyncio.run(_execute_with_tools(code, tools, user_stdout, user_stderr)) except Exception as e: result = { "type": "error", diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index df32f87..1c98bb4 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -36,9 +36,9 @@ This document provides a comprehensive overview of the Code Interpreter API arch │ │ ▼ ▼ ┌──────────────┐ ┌──────────────┐ - │ Redis │ │ MinIO │ - │ │ │ (S3-API) │ - │ - Sessions │ │ │ + │ Redis │ │ S3 store │ + │ │ │ (Garage by │ + │ - Sessions │ │ default) │ │ - State │ │ - Files │ │ - Caching │ │ - State │ │ │ │ Archives │ @@ -53,11 +53,14 @@ This document provides a comprehensive overview of the Code Interpreter API arch The API layer contains thin endpoint handlers that delegate to the orchestrator: -| File | Purpose | -| ----------- | ------------------------------------------------------------- | -| `exec.py` | Code execution endpoint, delegates to `ExecutionOrchestrator` | -| `files.py` | File upload, download, and list operations | -| `health.py` | Health checks and metrics endpoints | +| File | Purpose | +| ---------------------- | ---------------------------------------------------------------- | +| `exec.py` | Code execution endpoint, delegates to `ExecutionOrchestrator` | +| `programmatic.py` | `POST /exec/programmatic` — Programmatic Tool Calling endpoint | +| `files.py` | File upload, download, and list operations | +| `health.py` | Health checks and metrics endpoints | +| `admin.py` | Admin / API key management endpoints (require `MASTER_API_KEY`) | +| `dashboard_metrics.py` | Metrics endpoints powering the admin dashboard UI | **Design principle:** Endpoints are intentionally thin (~70 lines each). All business logic resides in services. @@ -65,29 +68,32 @@ The API layer contains thin endpoint handlers that delegate to the orchestrator: Business logic is organized into focused services: -| Service | File | Responsibility | -| ------------------------- | ------------------- | -------------------------------- | -| **ExecutionOrchestrator** | `orchestrator.py` | Coordinates execution workflow | -| **SessionService** | `session.py` | Redis session management | -| **FileService** | `file.py` | MinIO file storage | -| **StateService** | `state.py` | Internal Python state persistence (Redis, no external API) | -| **StateArchivalService** | `state_archival.py` | Internal state archival (MinIO) | -| **AuthService** | `auth.py` | API key authentication | -| **HealthService** | `health.py` | Health checks | -| **MetricsService** | `metrics.py` | Metrics collection | -| **CleanupService** | `cleanup.py` | Background cleanup tasks | +| Service | File | Responsibility | +| ------------------------- | ------------------- | ----------------------------------------------------------- | +| **ExecutionOrchestrator** | `orchestrator.py` | Coordinates execution workflow | +| **ProgrammaticService** | `programmatic.py` | PTC paused-execution state, continuation tokens, tool stubs | +| **SessionService** | `session.py` | Redis session management | +| **FileService** | `file.py` | S3 file storage (boto3 → Garage / any S3 backend) | +| **StateService** | `state.py` | Internal Python state persistence (Redis, no external API) | +| **StateArchivalService** | `state_archival.py` | Internal state archival (S3 cold storage) | +| **AuthService** | `auth.py` | API key authentication | +| **HealthService** | `health.py` | Health checks | +| **MetricsService** | `metrics.py` | Metrics collection | +| **CleanupService** | `cleanup.py` | Background cleanup tasks | ### 3. Sandbox Management (`src/services/sandbox/`) Sandbox lifecycle is managed by a dedicated package: -| Component | File | Purpose | -| -------------------- | ------------------ | ---------------------------------------------------- | -| **SandboxManager** | `manager.py` | Sandbox lifecycle (create, destroy) | -| **SandboxPool** | `pool.py` | Pre-warmed Python REPL sandbox pool | -| **SandboxExecutor** | `executor.py` | Code execution in nsjail sandboxes | -| **REPLExecutor** | `repl_executor.py` | Python REPL communication | -| **NsjailConfig** | `nsjail.py` | nsjail CLI argument builder and SandboxInfo dataclass | +| Component | File | Purpose | +| -------------------- | --------------------- | ------------------------------------------------------------------------------------------------ | +| **SandboxManager** | `manager.py` | Sandbox lifecycle (create, destroy) | +| **SandboxPool** | `pool.py` | Pre-warmed Python REPL sandbox pool | +| **SandboxExecutor** | `executor.py` | Code execution in nsjail sandboxes | +| **REPLExecutor** | `repl_executor.py` | Python REPL communication | +| **NsjailConfig** | `nsjail.py` | nsjail CLI argument builder and SandboxInfo dataclass | +| **EgressProxy** | `egress_proxy.py` | Inline allowlist HTTPS proxy (only used when `ENABLE_SANDBOX_NETWORK=true` for skill installs) | +| **EgressFirewall** | `egress_firewall.py` | Sandbox egress rules / iptables enforcement for the proxy | ### 4. Execution Engine (`src/services/execution/`) @@ -183,7 +189,7 @@ await event_bus.publish(ExecutionCompleted(session_id=..., execution_id=...)) │ ├── 3b. Get/create session │ - └── 3c. Store file in MinIO + └── 3c. Store file in S3 (Garage by default) │ ▼ 4. Return session_id and file_id @@ -264,14 +270,14 @@ Redis stores ephemeral data with TTL-based expiration: | State | `state:{session_id}` | 2h | Python namespace (compressed) | | Rate limits | `ratelimit:{key}` | varies | API rate limiting | -### MinIO (S3-Compatible) +### S3-Compatible Object Storage (Garage by default) -MinIO stores persistent files and archived state: +The default deployment uses [Garage](https://garagehq.deuxfleurs.fr/) as the S3-compatible backend (see `docker-compose.yml`). Any other S3-compatible service (MinIO, AWS S3, Cloudflare R2, etc.) works by changing the `S3_*` environment variables. -| Bucket | Object Pattern | TTL | Purpose | -| ------------------------ | ---------------------------- | --- | --------------------- | -| `code-interpreter-files` | `{session_id}/{file_id}` | 24h | User files | -| `code-interpreter-files` | `state-archive/{session_id}` | 7d | Archived Python state | +| Bucket | Object Pattern | TTL | Purpose | +| ------------------------ | ---------------------------- | ---- | ------------------------------------------------ | +| `code-interpreter-files` | `{session_id}/{file_id}` | 24h | User files | +| `code-interpreter-files` | `state-archive/{session_id}` | 1d | Archived Python state (`STATE_ARCHIVE_TTL_DAYS`) | --- @@ -283,7 +289,7 @@ Services are registered and injected via FastAPI's dependency system: # src/dependencies/services.py def get_file_service() -> FileService: - return FileService(minio_client) + return FileService() # constructs a boto3 S3 client from settings.s3.* def get_session_service() -> SessionService: return SessionService(redis_pool) @@ -318,11 +324,11 @@ Environment Variables (.env) │ ├── api.py → API settings (host, port, debug) │ │ ├── sandbox.py → Sandbox settings (nsjail binary, base dir) │ │ ├── redis.py → Redis settings (host, port, pool) │ -│ ├── minio.py → MinIO settings (endpoint, credentials) │ +│ ├── s3.py → S3 storage settings (endpoint, credentials, bucket) │ │ ├── security.py → Security settings (isolation, headers) │ │ ├── resources.py → Resource limits (memory, cpu, timeout) │ │ ├── logging.py → Logging settings (level, format) │ -│ └── languages.py → Language configuration (images, multipliers) │ +│ └── languages.py → Language configuration (multipliers, commands) │ │ │ └─────────────────────────────────────────────────────────────────────────────┘ │ diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index eb2b4c9..761dd80 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -21,11 +21,6 @@ The Code Interpreter API uses environment-based configuration with sensible defa API_KEY=your-secure-api-key-here ``` -3. Validate your configuration: - ```bash - python config_manager.py validate - ``` - ## Configuration Sections ### API Configuration @@ -43,7 +38,7 @@ Controls the basic API server settings. Configures SSL/TLS support for secure HTTPS connections. -Both `docker-compose.yml` and `docker-compose.prod.yml` use the same HTTPS contract: +`docker-compose.yml` uses the following HTTPS contract: - `PORT` is the external host port published by Docker. - `SSL_CERTS_PATH` is a host path mounted into the API container at `/app/ssl`. @@ -93,13 +88,10 @@ Both `docker-compose.yml` and `docker-compose.prod.yml` use the same HTTPS contr SSL_KEY_FILE=/app/ssl/live/example.com/privkey.pem ``` -3. **Start the stack with either compose file**: +3. **Start the stack**: ```bash docker compose up -d - - # or - docker compose -f docker-compose.prod.yml up -d ``` 4. **Verify HTTPS**: @@ -119,16 +111,31 @@ If you terminate TLS at an external reverse proxy instead, keep the API on HTTP Manages API key authentication and security. -| Variable | Default | Description | -| ------------------- | -------------- | -------------------------------------- | -| `API_KEY` | `test-api-key` | Primary API key (CHANGE IN PRODUCTION) | -| `API_KEYS` | - | Additional API keys (comma-separated) | +| Variable | Default | Description | +| ---------------- | -------------- | ---------------------------------------------------------------- | +| `API_KEY` | `test-api-key` | Primary API key (CHANGE IN PRODUCTION) | +| `API_KEYS` | - | Additional API keys (comma-separated) | +| `MASTER_API_KEY` | - | Required for `/api/v1/admin/*` endpoints | +| `AUTH_ENABLED` | `true` | When `false`, skip x-api-key/Basic checks on user endpoints | + +**How clients authenticate** (any one of): + +1. **`x-api-key` header** — `x-api-key: `. The traditional way. Reverse proxies that inject this header continue to work. +2. **HTTP Basic in URL credentials** — `Authorization: Basic base64(":")`. Current LibreChat versions use this when `LIBRECHAT_CODE_BASEURL=https://@your-api/v1` — `axios` and `node-fetch` automatically convert URL credentials into the Basic header. Single-token convention (Stripe / DigitalOcean / GitHub PAT style): the API key goes in the username slot, password is empty. +3. **`AUTH_ENABLED=false`** — no client-side auth. Use only when running on a trusted private network or behind another auth layer (mTLS, reverse-proxy auth, etc.). + +When both `x-api-key` and a Basic header are present, `x-api-key` wins. This is deterministic for proxy-injection setups. + +`/api/v1/admin/*` and the admin dashboard's API calls **always** require `MASTER_API_KEY`, regardless of `AUTH_ENABLED`. + +**Rate limiting:** per-key rate limits and the IP-based auth-failure limiter both run inside the auth path. When `AUTH_ENABLED=false`, both are bypassed — your network boundary is responsible for any abuse protection. **Security Notes:** - API keys should be at least 16 characters long - Use cryptographically secure random keys in production - Consider rotating API keys regularly +- Setting `AUTH_ENABLED=false` opens user endpoints to anyone who can reach the URL — do not expose to the public internet without a proxy/VPN/mTLS in front ### Redis Configuration @@ -151,17 +158,18 @@ Redis is used for session management and caching. REDIS_URL=redis://password@localhost:6379/0 ``` -### MinIO/S3 Configuration +### S3 Configuration -MinIO provides S3-compatible object storage for files. +S3-compatible object storage for files and archived state. The default deployment uses Garage; any S3-compatible backend (AWS S3, MinIO, Cloudflare R2, etc.) works. -| Variable | Default | Description | -| ------------------ | ------------------------ | ----------------------------------- | -| `MINIO_ENDPOINT` | `localhost:9000` | MinIO server endpoint (no protocol) | -| `MINIO_ACCESS_KEY` | `minioadmin` | MinIO access key | -| `MINIO_SECRET_KEY` | `minioadmin` | MinIO secret key | -| `MINIO_SECURE` | `false` | Use HTTPS for MinIO connections | -| `MINIO_BUCKET` | `code-interpreter-files` | Bucket name for file storage | +| Variable | Default | Description | +| ---------------- | ------------------------ | -------------------------------------- | +| `S3_ENDPOINT` | `localhost:3900` | S3 endpoint (host:port, no protocol) | +| `S3_ACCESS_KEY` | `test-access-key` | S3 access key | +| `S3_SECRET_KEY` | `test-secret-key` | S3 secret key | +| `S3_SECURE` | `false` | Use HTTPS for S3 connections | +| `S3_BUCKET` | `code-interpreter-files` | Bucket name for file storage | +| `S3_REGION` | `garage` | S3 region (set to match your backend) | ### Sandbox Configuration @@ -188,36 +196,39 @@ nsjail is used for secure code execution in isolated sandboxes. | Variable | Default | Description | | -------------------- | ------- | ------------------------------------- | -| `MAX_EXECUTION_TIME` | `30` | Maximum code execution time (seconds) | +| `MAX_EXECUTION_TIME` | `120` | Maximum code execution time (seconds) | | `MAX_MEMORY_MB` | `512` | Maximum memory per execution (MB) | #### File Limits -| Variable | Default | Description | -| ----------------------- | ------- | ---------------------------------- | -| `MAX_FILE_SIZE_MB` | `10` | Maximum individual file size (MB) | -| `MAX_FILES_PER_SESSION` | `50` | Maximum files per session | -| `MAX_OUTPUT_FILES` | `10` | Maximum output files per execution | -| `MAX_FILENAME_LENGTH` | `255` | Maximum filename length | +| Variable | Default | Description | +| ----------------------- | ------- | ------------------------------------------------------------ | +| `MAX_FILE_SIZE_MB` | `100` | Maximum individual file size (MB) | +| `MAX_FILES_PER_SESSION` | `300` | Maximum files per session (sized for skill bundles like pptx)| +| `MAX_OUTPUT_FILES` | `10` | Maximum output files per execution | +| `MAX_FILENAME_LENGTH` | `255` | Maximum filename length | ### Session Configuration -| Variable | Default | Description | -| ---------------------------------- | ------- | ---------------------------- | -| `SESSION_TTL_HOURS` | `24` | Session time-to-live (hours) | -| `SESSION_CLEANUP_INTERVAL_MINUTES` | `10` | Cleanup interval (minutes) | +| Variable | Default | Description | +| ---------------------------------- | ------- | -------------------------------------------------------------------------- | +| `SESSION_TTL_HOURS` | `24` | Session time-to-live (hours) | +| `SESSION_CLEANUP_INTERVAL_MINUTES` | `60` | Cleanup interval (minutes) | +| `ENABLE_ORPHAN_S3_CLEANUP` | `true` | Reap S3 objects with no matching session metadata during cleanup sweeps | ### Sandbox Pool Configuration Pre-warmed Python REPL sandboxes reduce execution latency by eliminating interpreter startup and library import time. Only Python supports REPL pooling; all other languages use one-shot nsjail execution. -| Variable | Default | Description | -| ---------------------------------- | ------- | -------------------------------------- | -| `SANDBOX_POOL_ENABLED` | `true` | Enable Python REPL pool | -| `SANDBOX_POOL_WARMUP_ON_STARTUP` | `true` | Pre-warm Python REPLs at startup | -| `SANDBOX_POOL_PY` | `5` | Number of pre-warmed Python REPLs | -| `SANDBOX_POOL_PARALLEL_BATCH` | `5` | Number of warmup sandboxes started concurrently | -| `SANDBOX_UID` | `1001` | Shared host UID used by all sandbox languages | +| Variable | Default | Description | +| ---------------------------------- | ------- | -------------------------------------------------------- | +| `SANDBOX_POOL_ENABLED` | `true` | Enable Python REPL pool | +| `SANDBOX_POOL_WARMUP_ON_STARTUP` | `true` | Pre-warm Python REPLs at startup | +| `SANDBOX_POOL_PY` | `2` | Number of pre-warmed Python REPLs | +| `SANDBOX_POOL_PARALLEL_BATCH` | `5` | Number of warmup sandboxes started concurrently | +| `SANDBOX_POOL_REPLENISH_INTERVAL` | `2` | Seconds between pool replenishment checks | +| `SANDBOX_POOL_EXHAUSTION_TRIGGER` | `true` | Trigger immediate replenishment when pool is exhausted | +| `SANDBOX_UID` | `1001` | Shared host UID used by all sandbox languages | **Note:** Sandboxes are destroyed immediately after execution. The pool is automatically replenished in the background. Non-Python languages do not use pooling. @@ -234,19 +245,20 @@ REPL mode keeps a Python interpreter running inside pooled sandboxes with common Python `/exec` sessions can persist variables, functions, and objects across executions when a Python session is reused. The most explicit path is sending the prior `session_id`, but the backend can also reuse an existing session through same-user file references or `entity_id`. -| Variable | Default | Description | -| --------------------------- | ------- | ------------------------------------ | -| `STATE_PERSISTENCE_ENABLED` | `true` | Enable Python state persistence | -| `STATE_TTL_SECONDS` | `7200` | Redis hot storage TTL (2 hours) | -| `STATE_CAPTURE_ON_ERROR` | `false` | Save state even on execution failure | +| Variable | Default | Description | +| --------------------------- | ------- | ------------------------------------------------------------------------ | +| `STATE_PERSISTENCE_ENABLED` | `true` | Enable Python state persistence | +| `STATE_TTL_SECONDS` | `7200` | Redis hot storage TTL (2 hours) | +| `STATE_CAPTURE_ON_ERROR` | `false` | Save state even on execution failure | +| `STATE_MAX_REDIS_SIZE_MB` | `100` | Max raw state size (MB) stored in Redis. Larger states go directly to S3 | ### State Archival Configuration (Python) -Inactive states are automatically archived to MinIO for long-term storage. +Inactive states are automatically archived to S3 for long-term storage. | Variable | Default | Description | | -------------------------------------- | ------- | -------------------------------------- | -| `STATE_ARCHIVE_ENABLED` | `true` | Enable MinIO cold storage archival | +| `STATE_ARCHIVE_ENABLED` | `true` | Enable S3 cold storage archival | | `STATE_ARCHIVE_AFTER_SECONDS` | `3600` | Archive after this inactivity (1 hour) | | `STATE_ARCHIVE_TTL_DAYS` | `1` | Keep archives for this many days (24h) | | `STATE_ARCHIVE_CHECK_INTERVAL_SECONDS` | `300` | Archival check frequency (5 min) | @@ -258,6 +270,17 @@ Inactive states are automatically archived to MinIO for long-term storage. | `ENABLE_NETWORK_ISOLATION` | `true` | Enable network isolation for sandboxes | | `ENABLE_FILESYSTEM_ISOLATION` | `true` | Enable filesystem isolation | +### Sandbox Network Access (Skill Installs) + +Off by default — sandboxes have no network access. When enabled, an inline allowlist HTTPS proxy on `127.0.0.1` lets sandboxes reach **only** package registries (PyPI, npm, Go modules, crates.io). Required for "skills" that `pip install` / `npm install` / `go get` / `cargo install` dependencies at runtime. + +| Variable | Default | Description | +| -------------------------- | --------------------- | --------------------------------------------------------------------------------- | +| `ENABLE_SANDBOX_NETWORK` | `false` | Allow sandboxes to reach the internet via the inline allowlist proxy | +| `SANDBOX_EGRESS_PORT` | `18443` | Port the inline egress proxy binds to on `127.0.0.1` | +| `SANDBOX_EGRESS_ALLOWLIST` | (registries default) | Comma-separated list of additional hostnames the proxy permits | +| `SKILL_DEPS_PATH` | `/opt/skill-deps` | Host-side directory mounted into every sandbox so install caches compound across runs | + ### Logging Configuration | Variable | Default | Description | @@ -306,42 +329,18 @@ All 13 language runtimes are pre-installed in the unified Docker image. No per-l - **D** (`d`): LDC - **Bash** (`bash`): GNU Bash -## Configuration Management Tools - -### Command Line Tool - -Use the configuration management script: - -```bash -# Show configuration summary -python config_manager.py summary - -# Validate configuration -python config_manager.py validate - -# Check security settings -python config_manager.py security - -# Generate complete .env template -python config_manager.py template - -# Export configuration as JSON -python config_manager.py export -``` - -### Programmatic Access +## Programmatic Access ```python from src.config import settings -from src.utils.config_validator import validate_configuration -# Access configuration +# Flat access (backward compatible) print(f"API Port: {settings.api_port}") print(f"Max Memory: {settings.max_memory_mb}MB") -# Validate configuration -if validate_configuration(): - print("Configuration is valid") +# Grouped access +print(f"S3 endpoint: {settings.s3.endpoint_url}") +print(f"Redis URL: {settings.redis.get_url()}") ``` ## Production Deployment Checklist @@ -380,20 +379,12 @@ if validate_configuration(): ### Infrastructure - [ ] Secure Redis with authentication -- [ ] Secure MinIO with proper access keys +- [ ] Secure S3 storage with proper access keys - [ ] Ensure SYS_ADMIN capability is set for nsjail -- [ ] Set up backup for Redis and MinIO data +- [ ] Set up backup for Redis and S3 data ## Troubleshooting -### Configuration Validation Errors - -Run the validation tool to identify issues: - -```bash -python config_manager.py validate -``` - ### Common Issues 1. **Redis Connection Failed** @@ -401,8 +392,8 @@ python config_manager.py validate - Verify host, port, and credentials - Check network connectivity -2. **MinIO Connection Failed** - - Verify MinIO server is accessible +2. **S3 Connection Failed** + - Verify S3 endpoint is accessible - Check access key and secret key - Ensure bucket exists or can be created diff --git a/docs/DEVELOPMENT.md b/docs/DEVELOPMENT.md index c0c3f3f..ac65c9f 100644 --- a/docs/DEVELOPMENT.md +++ b/docs/DEVELOPMENT.md @@ -7,16 +7,16 @@ This document provides detailed instructions for setting up the development envi ### Prerequisites - Python 3.11+ -- Docker and docker compose (for running the API container, Redis, and MinIO) +- Docker and docker compose (for running the API container, Redis, and Garage) - Redis -- MinIO (or S3-compatible storage) +- Garage (or S3-compatible storage) ### Installation Steps 1. **Clone the repository** ```bash - git clone https://github.com/LibreCodeInterpreter/LibreCodeInterpreter.git + git clone https://github.com/usnavy13/LibreCodeInterpreter.git cd LibreCodeInterpreter ``` @@ -61,7 +61,7 @@ For detailed testing instructions, please refer to [TESTING.md](TESTING.md). # Run unit tests pytest tests/unit/ -# Run integration tests (requires running API container, Redis, MinIO) +# Run integration tests (in-process TestClient, no running stack needed) pytest tests/integration/ # Run all tests with coverage @@ -79,13 +79,16 @@ not separately published packages. docker build --target app -t code-interpreter:nsjail . ``` -`docker compose up -d` continues to work for local development. Consumers should prefer `docker-compose.prod.yml`, which contains no `build:` step and always pulls the published image. +By default `docker compose up -d` pulls the published image (`ghcr.io/usnavy13/librecodeinterpreter:main`). To run your locally built image instead, override `API_IMAGE`: ```bash +# Run a locally built image API_IMAGE=code-interpreter:nsjail docker compose up -d -# Consumer path using the published GHCR image -docker compose -f docker-compose.prod.yml up -d +# Or pull a specific published tag +API_IMAGE=ghcr.io/usnavy13/librecodeinterpreter: docker compose up -d ``` +For repeated local-build workflows, copy `docker-compose.override.example.yml` to `docker-compose.override.yml` and uncomment the `build:` block so `docker compose up --build -d` rebuilds from your checkout automatically. + For more details on the sandbox architecture, see [ARCHITECTURE.md](ARCHITECTURE.md). diff --git a/docs/METRICS.md b/docs/METRICS.md index b0ff1fb..07839aa 100644 --- a/docs/METRICS.md +++ b/docs/METRICS.md @@ -8,7 +8,7 @@ Track per-execution, per-language, and per-API-key metrics. |----------|-------------|---------| | `DETAILED_METRICS_ENABLED` | Enable detailed metrics | `true` | | `METRICS_BUFFER_SIZE` | In-memory buffer size | `10000` | -| `METRICS_ARCHIVE_ENABLED` | Archive to MinIO | `true` | +| `METRICS_ARCHIVE_ENABLED` | Archive to S3 | `true` | | `METRICS_ARCHIVE_RETENTION_DAYS` | Archive retention | `90` days | ## API Endpoints diff --git a/docs/PERFORMANCE.md b/docs/PERFORMANCE.md index 164b74c..bcf72b8 100644 --- a/docs/PERFORMANCE.md +++ b/docs/PERFORMANCE.md @@ -16,7 +16,7 @@ The following metrics represent typical performance with all optimizations enabl | **Sandbox acquisition** | ~3ms | From pre-warmed pool | | **Cold start (no pool)** | 500-2000ms | First request or pool exhausted | | **State serialization** | 1-25ms | Depends on state size | -| **File upload (1MB)** | 50-100ms | To MinIO | +| **File upload (1MB)** | 50-100ms | To S3 | ### Performance Comparison @@ -138,7 +138,7 @@ For optimal state persistence performance: # Faster state operations (smaller states) STATE_MAX_SIZE_MB=10 -# Less frequent archival (reduces MinIO operations) +# Less frequent archival (reduces S3 operations) STATE_ARCHIVE_CHECK_INTERVAL_SECONDS=600 # Longer Redis TTL (fewer archive restorations) @@ -180,7 +180,7 @@ Sandbox acquire ~3ms Code execution ~50ms Output file detection ~5ms File download from sandbox ~10ms -MinIO upload ~20ms +S3 upload ~20ms Response building ~2ms ────────────────────────────────── Total ~104ms @@ -214,7 +214,7 @@ For high-throughput deployments: 1. **Multiple API instances**: Load balance across instances 2. **Shared Redis**: All instances use same Redis for sessions/state -3. **Shared MinIO**: All instances use same MinIO for files +3. **Shared S3**: All instances use same S3 storage for files 4. **Separate hosts**: Distribute sandbox load across API instances ``` @@ -231,13 +231,13 @@ For high-throughput deployments: └───────────────┼───────────────┘ ┌──────┴──────┐ │ Redis │ - │ MinIO │ + │ S3 │ └─────────────┘ ``` ### Resource Planning -| Daily Requests | Instances | Pool Size (per) | Redis Memory | MinIO Storage | +| Daily Requests | Instances | Pool Size (per) | Redis Memory | S3 Storage | | -------------- | --------- | --------------- | ------------ | ------------- | | 1,000 | 1 | 5 Python | 256MB | 1GB | | 10,000 | 2 | 10 Python | 512MB | 5GB | diff --git a/docs/PROGRAMMATIC_TOOL_CALLING.md b/docs/PROGRAMMATIC_TOOL_CALLING.md index 4217f9d..8baf0bb 100644 --- a/docs/PROGRAMMATIC_TOOL_CALLING.md +++ b/docs/PROGRAMMATIC_TOOL_CALLING.md @@ -6,10 +6,12 @@ ## Overview -Programmatic Tool Calling enables Python code to orchestrate multiple agent tools within a single execution. Instead of the LLM making individual tool calls one at a time, it writes Python code that calls multiple tools, processes results, uses loops/conditionals, and runs tools in parallel. +Programmatic Tool Calling enables sandboxed code (Python or bash) to orchestrate multiple agent tools within a single execution. Instead of the LLM making individual tool calls one at a time, it writes code that calls multiple tools, processes results, uses loops/conditionals, and runs tools in parallel. **Key Benefit**: Reduces LLM round-trips and token usage by letting code handle complex multi-tool workflows. +**Languages supported**: Python (default, via `docker/ptc_server.py`) and bash (via `docker/ptc_bash_server.py`). The bash server uses two FIFOs so user shell code can call a tool with `tool_name '{"arg":"..."}'` and read the JSON response from stdout. + --- ## Table of Contents @@ -518,17 +520,16 @@ async def retrieve_execution_state(execution_id: str) -> PausedExecution: 3. **Monitoring**: Metrics for round-trips, timeouts, errors 4. **Load testing**: Concurrent multi-round executions -### Estimated Scope - -| Component | Files to Create/Modify | -| ----------------------- | ------------------------------------------------- | -| API endpoint | `src/api/programmatic.py` (new) | -| Request/Response models | `src/models/programmatic.py` (new) | -| Execution orchestrator | `src/services/programmatic_orchestrator.py` (new) | -| State management | `src/services/continuation.py` (new) | -| Python wrapper | `src/services/execution/python_wrapper.py` (new) | -| Tool stub generator | `src/services/execution/tool_stubs.py` (new) | -| Tests | `tests/integration/test_programmatic.py` (new) | +### Implementation Map (current code) + +| Component | File | +| ----------------------------- | --------------------------------------------------- | +| API endpoint | `src/api/programmatic.py` | +| Request/Response models | `src/models/programmatic.py` | +| Execution + continuation svc | `src/services/programmatic.py` (`ProgrammaticService`, `PausedContext`) | +| In-sandbox PTC server (Python)| `docker/ptc_server.py` | +| In-sandbox PTC server (Bash) | `docker/ptc_bash_server.py` | +| Live tests | `tests/functional/test_client_replay.py` | --- diff --git a/docs/SECURITY.md b/docs/SECURITY.md index 09f6e9b..8373b1d 100644 --- a/docs/SECURITY.md +++ b/docs/SECURITY.md @@ -128,8 +128,8 @@ Python state persistence introduces additional security considerations: #### Storage Security - **Redis encryption**: Consider enabling Redis TLS in production for encrypted state storage -- **MinIO encryption**: Enable server-side encryption for archived states -- **TTL-based cleanup**: States automatically expire (2 hours in Redis, 7 days in MinIO archives) +- **S3 encryption**: Enable server-side encryption for archived states +- **TTL-based cleanup**: States automatically expire (2 hours in Redis, 7 days in S3 archives) - **Size limits**: `STATE_MAX_SIZE_MB` prevents denial-of-service via large states #### Session Isolation @@ -153,7 +153,7 @@ This ensures each execution starts with a clean namespace. State persistence operations are logged: - State save (size, session_id) -- State load (session_id, source: redis/minio) +- State load (session_id, source: redis/s3) - State archive (session_id) - State size limit exceeded (warning) diff --git a/docs/STATE_PERSISTENCE.md b/docs/STATE_PERSISTENCE.md index c92d094..3e342bf 100644 --- a/docs/STATE_PERSISTENCE.md +++ b/docs/STATE_PERSISTENCE.md @@ -23,9 +23,9 @@ State persistence uses a hybrid storage architecture: │ Hybrid State Storage │ ├─────────────────────────────────────────────────────────────────────────────┤ │ │ -│ Hot Storage (Redis) Cold Storage (MinIO) │ +│ Hot Storage (Redis) Cold Storage (S3) │ │ ┌─────────────────────┐ ┌─────────────────────┐ │ -│ │ TTL: 2 hours │ Archive │ TTL: 7 days │ │ +│ │ TTL: 2 hours │ Archive │ TTL: 1 day │ │ │ │ Access: ~1ms │ ──────────▶ │ Access: ~50ms │ │ │ │ State: compressed │ (after │ State: compressed │ │ │ │ lz4 + base64 │ 1 hour │ lz4 + base64 │ │ @@ -57,7 +57,7 @@ State persistence uses a hybrid storage architecture: POST /exec {"lang": "py", "code": "print(x)", "session_id": "abc123"} → StateService loads state from Redis - → If not in Redis, checks MinIO archives + → If not in Redis, checks S3 archives → State deserialized into REPL namespace → Code executes with existing variables → Updated state saved back to Redis @@ -95,7 +95,7 @@ CleanupService (every 5 min) │ └── If inactive > 1 hour: │ - ├── Upload to MinIO (state-archive/{session_id}) + ├── Upload to S3 (state-archive/{session_id}) │ └── Keep in Redis (will expire at 2 hours) ``` @@ -103,7 +103,7 @@ CleanupService (every 5 min) When a session resumes after Redis expiry: 1. StateService checks Redis → not found -2. StateArchivalService checks MinIO → found +2. StateArchivalService checks S3 → found 3. State restored to Redis for fast future access --- @@ -116,14 +116,14 @@ When a session resumes after Redis expiry: | --------------------------- | ------- | ------------------------------------ | | `STATE_PERSISTENCE_ENABLED` | `true` | Enable/disable state persistence | | `STATE_TTL_SECONDS` | `7200` | Redis TTL (default 2 hours) | -| `STATE_MAX_SIZE_MB` | `50` | Maximum serialized state size | +| `STATE_MAX_REDIS_SIZE_MB` | `100` | Max raw state size in Redis (MB). Larger states bypass Redis and go straight to S3 | | `STATE_CAPTURE_ON_ERROR` | `false` | Save state even on execution failure | ### State Archival Settings | Variable | Default | Description | | -------------------------------------- | ------- | -------------------------------------- | -| `STATE_ARCHIVE_ENABLED` | `true` | Enable MinIO archival | +| `STATE_ARCHIVE_ENABLED` | `true` | Enable S3 cold storage archival | | `STATE_ARCHIVE_AFTER_SECONDS` | `3600` | Archive after this inactivity (1 hour) | | `STATE_ARCHIVE_TTL_DAYS` | `1` | Keep archives for this many days (24h) | | `STATE_ARCHIVE_CHECK_INTERVAL_SECONDS` | `300` | Check frequency (5 minutes) | @@ -139,7 +139,7 @@ STATE_PERSISTENCE_ENABLED=false When disabled: - Each Python execution starts with a clean namespace -- No state is saved to Redis or MinIO +- No state is saved to Redis or S3 - `session_id` in requests is ignored for state (it still scopes files and session continuity) --- @@ -318,14 +318,13 @@ encoded = base64.b64encode(compressed).decode('utf-8') ### State Size Limits -The maximum state size is configurable via `STATE_MAX_SIZE_MB` (default 50MB). +The maximum *Redis* state size is configurable via `STATE_MAX_REDIS_SIZE_MB` (default 100 MB of raw bytes). -If state exceeds this limit: +When state exceeds this limit: -1. A warning is logged -2. State is NOT saved -3. Execution still succeeds -4. Next execution starts fresh +1. The state bypasses Redis hot storage and is written directly to S3 cold storage +2. Subsequent executions reload it from S3 (slightly higher latency than Redis) +3. Execution still succeeds normally **Common causes of large state:** @@ -337,14 +336,14 @@ If state exceeds this limit: - Save large data to files instead of variables - Clear unused variables: `del large_variable` -- Increase limit if needed: `STATE_MAX_SIZE_MB=100` +- Tune the Redis ceiling if needed: `STATE_MAX_REDIS_SIZE_MB=200` ### Storage Keys | Storage | Key Pattern | Content | | ------- | ---------------------------- | --------------------------- | | Redis | `state:{session_id}` | Compressed state + metadata | -| MinIO | `state-archive/{session_id}` | Compressed state (archived) | +| S3 | `state-archive/{session_id}` | Compressed state (archived) | --- @@ -396,8 +395,8 @@ Ensure sandboxes have sufficient memory for state operations. - Session IDs are case-sensitive 3. **Check state size:** - - Large states may exceed `STATE_MAX_SIZE_MB` - - Check logs for "State size exceeds limit" warnings + - Very large states bypass Redis (`STATE_MAX_REDIS_SIZE_MB`) and may take longer to reload from S3 + - Check logs for state-size related warnings ### State Restored but Variables Missing @@ -420,10 +419,10 @@ curl -X GET https://localhost/health/redis \ ### Archive Not Working -1. **Check MinIO connectivity:** +1. **Check S3 connectivity:** ```bash - curl -X GET https://localhost/health/minio \ + curl -X GET https://localhost/health/s3 \ -H "x-api-key: $API_KEY" ``` diff --git a/docs/TESTING.md b/docs/TESTING.md index 15feed2..76de238 100644 --- a/docs/TESTING.md +++ b/docs/TESTING.md @@ -14,7 +14,7 @@ tests/ ### `tests/unit/` -- Mock Redis, MinIO, sandboxing, and other infrastructure. +- Mock Redis, S3, sandboxing, and other infrastructure. - Fast feedback for service logic. - No external stack required. @@ -99,10 +99,7 @@ If a mocked integration test passes but a `live_api` or `client_replay` test fai ## CI/CD Test Tiers -GitHub Actions now uses three workflow tiers: +GitHub Actions uses two workflow tiers: -- `ci.yml`: required PR checks for static analysis, unit tests, `contract_only` integration tests, amd64 app build validation, amd64 live smoke tests, and amd64 `client_replay` -- `release.yml`: publishes the multi-arch app image used by `docker-compose.prod.yml` after per-arch smoke validation -- `nightly.yml`: builds the app image locally and runs the full/slow live validation suites - -The amd64 live smoke suite is the required compatibility gate on pull requests. Slow live scenarios stay in nightly validation so the PR path keeps the authoritative checks without forcing the heaviest runtime coverage into every change. +- `ci.yml`: required PR checks for static analysis, unit tests, and integration tests +- `release.yml`: publishes the multi-arch app image consumed by `docker-compose.yml` (default `API_IMAGE`) after per-arch smoke validation diff --git a/garage.toml b/garage.toml new file mode 100644 index 0000000..dbfb1c3 --- /dev/null +++ b/garage.toml @@ -0,0 +1,14 @@ +metadata_dir = "/var/lib/garage/meta" +data_dir = "/var/lib/garage/data" +db_engine = "sqlite" +replication_factor = 1 + +rpc_bind_addr = "[::]:3901" +rpc_secret = "0000000000000000000000000000000000000000000000000000000000000000" + +[s3_api] +s3_region = "garage" +api_bind_addr = "[::]:3900" + +[admin] +api_bind_addr = "[::]:3903" diff --git a/requirements.txt b/requirements.txt index 0d81d69..c8c1ac0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ requests>=2.31.0,<3 # Core API framework -fastapi==0.129.0 -uvicorn[standard]==0.41.0 +fastapi==0.135.2 +uvicorn[standard]==0.42.0 # Data validation and serialization pydantic==2.12.5 @@ -11,22 +11,22 @@ pydantic-settings==2.13.1 httpx==0.28.1 # Redis for session management -redis==7.2.0 +redis==7.4.0 # SQLite async support for metrics aiosqlite>=0.19.0 -# MinIO/S3 client -minio==7.2.20 +# S3 storage client (Garage/any S3-compatible backend) +boto3>=1.35.0 # Date/time parsing utilities python-dateutil==2.9.0.post0 # Testing framework -pytest==9.0.2 +pytest==9.0.3 pytest-asyncio==1.3.0 -pytest-cov==4.1.0 +pytest-cov==7.1.0 pytest-mock==3.15.1 # Development tools @@ -35,7 +35,7 @@ flake8==7.3.0 mypy==1.19.1 # Environment management -python-dotenv==1.2.1 +python-dotenv==1.2.2 # Logging structlog==25.5.0 diff --git a/src/api/admin.py b/src/api/admin.py index c0d95ef..32d1291 100644 --- a/src/api/admin.py +++ b/src/api/admin.py @@ -1,16 +1,24 @@ """Admin API endpoints for dashboard.""" +import os +import shutil +from pathlib import Path from typing import List, Optional, Dict, Any from datetime import datetime, timedelta, timezone + +import structlog from fastapi import APIRouter, HTTPException, Depends, Query from pydantic import BaseModel, Field +from ..config import settings from ..dependencies.auth import verify_master_key from ..services.api_key_manager import get_api_key_manager from ..services.metrics import metrics_service as unified_metrics from ..services.health import health_service from ..models.api_key import RateLimits as RateLimitsModel +logger = structlog.get_logger(__name__) + router = APIRouter(prefix="/admin", tags=["admin"]) @@ -202,3 +210,111 @@ async def get_admin_stats( "period_hours": hours, "timestamp": now.isoformat(), } + + +@router.get("/skill-deps", summary="Inspect persistent skill-deps cache") +async def get_skill_deps_status(_: str = Depends(verify_master_key)): + """Report on the persistent /opt/skill-deps cache. + + Returns size and per-ecosystem subdirectory counts so operators can see + what's accumulated. Useful before deciding whether to purge. + """ + deps_root = Path(settings.skill_deps_path) + if not deps_root.exists(): + return { + "path": str(deps_root), + "exists": False, + "enabled": settings.enable_sandbox_network, + "total_bytes": 0, + "ecosystems": {}, + } + + def _dir_size(p: Path) -> int: + total = 0 + for root, _dirs, files in os.walk(str(p)): + for f in files: + try: + total += os.path.getsize(os.path.join(root, f)) + except OSError: + pass + return total + + ecosystems: Dict[str, Dict[str, Any]] = {} + for sub in ("python", "node", "go", "cargo"): + sp = deps_root / sub + if sp.exists(): + ecosystems[sub] = { + "exists": True, + "bytes": _dir_size(sp), + } + else: + ecosystems[sub] = {"exists": False, "bytes": 0} + + return { + "path": str(deps_root), + "exists": True, + "enabled": settings.enable_sandbox_network, + "total_bytes": _dir_size(deps_root), + "ecosystems": ecosystems, + } + + +@router.post("/skill-deps/purge", summary="Wipe the persistent skill-deps cache") +async def purge_skill_deps(_: str = Depends(verify_master_key)): + """Delete every package the sandbox has installed. + + Use when the cache is bloated, when a bad install needs eviction, or + after a suspected supply-chain incident. Next sandbox install cold-starts. + The directory itself is recreated empty (sticky + world-writable) so + sandboxes can immediately install fresh. + """ + deps_root = Path(settings.skill_deps_path) + if not deps_root.exists(): + return {"purged": True, "freed_bytes": 0, "path": str(deps_root)} + + freed = 0 + errors: List[str] = [] + try: + for entry in deps_root.iterdir(): + try: + if entry.is_dir() and not entry.is_symlink(): + # Tally before nuking for the response. + for root, _dirs, files in os.walk(str(entry)): + for f in files: + try: + freed += os.path.getsize(os.path.join(root, f)) + except OSError: + pass + shutil.rmtree(str(entry)) + else: + try: + freed += entry.stat().st_size + except OSError: + pass + entry.unlink() + except OSError as exc: + errors.append(f"{entry}: {exc}") + except OSError as exc: + raise HTTPException( + status_code=500, + detail=f"Could not enumerate {deps_root}: {exc}", + ) + + # Reset perms so future installs from sandbox uids work. + try: + os.chmod(str(deps_root), 0o1777) # nosec B103 + except OSError: + pass + + logger.info( + "Skill-deps cache purged", + path=str(deps_root), + freed_bytes=freed, + errors=len(errors), + ) + return { + "purged": True, + "path": str(deps_root), + "freed_bytes": freed, + "errors": errors, + } diff --git a/src/api/exec.py b/src/api/exec.py index 67288aa..2dc6952 100644 --- a/src/api/exec.py +++ b/src/api/exec.py @@ -40,7 +40,11 @@ _KEEPALIVE_INTERVAL = 3 -@router.post("/exec", responses={200: {"model": ExecResponse}}) +@router.post( + "/exec", + responses={200: {"model": ExecResponse}}, + response_model_exclude_none=True, +) async def execute_code( request: ExecRequest, http_request: Request, @@ -60,7 +64,7 @@ async def execute_code( within the same session, whether the caller supplies `session_id` directly or the orchestrator reuses a session through same-user file references or `entity_id` continuity. State is stored in Redis (2 hour TTL) with - automatic archival to MinIO for long-term storage (7 day TTL). + automatic archival to S3 for long-term storage (configurable TTL). Returns a streaming response that sends keepalive whitespace before the JSON body to prevent client socket timeouts during long operations. @@ -175,7 +179,7 @@ async def _stream_response(): request_id=request_id, session_id=response.session_id, ) - yield response.model_dump_json().encode() + yield response.model_dump_json(exclude_none=True).encode() return StreamingResponse( _stream_response(), diff --git a/src/api/files.py b/src/api/files.py index c49f904..59f85fa 100644 --- a/src/api/files.py +++ b/src/api/files.py @@ -9,11 +9,21 @@ # Third-party imports import structlog -from fastapi import APIRouter, HTTPException, UploadFile, File, Form, Query +from fastapi import ( + APIRouter, + HTTPException, + Request, + UploadFile, + File, + Form, + Query, +) from fastapi.responses import StreamingResponse +from starlette.datastructures import UploadFile as StarletteUploadFile from unidecode import unidecode # Local application imports +from ..config import settings from ..dependencies import FileServiceDep, SessionServiceDep from ..models import SessionCreate from ..services.execution.output import OutputProcessor @@ -125,13 +135,14 @@ async def upload_file( # Sanitize filename to match what will be used in container sanitized_name = OutputProcessor.sanitize_filename(file.filename) - # Store with sanitized name so MinIO, sandbox, and cleanup all use the same name + # Store with sanitized name so S3, sandbox, and cleanup all use the same name file_id = await file_service.store_uploaded_file( session_id=session_id, filename=sanitized_name, content=content, content_type=file.content_type, is_agent_file=is_agent_file, + original_filename=file.filename, ) uploaded_files.append( @@ -175,6 +186,166 @@ async def upload_file( raise HTTPException(status_code=500, detail="Failed to upload files") +# TODO(librechat-compat): /upload/batch duplicates the per-file storage flow +# from /upload above. Kept separate to avoid touching the stable single-file +# endpoint while we prove out the batch path. If both endpoints stay in +# production unchanged for a release cycle, factor a shared +# `_store_files_to_session()` helper that both call. +@router.post("/upload/batch") +async def upload_files_batch( + request: Request, + file_service: FileServiceDep = None, + session_service: SessionServiceDep = None, +): + """Batch file upload — LibreChat compatible. + + LibreChat (`crud.js:118` in librechat) sends multi-file uploads here as + multipart with the field name `file` repeated once per file. Per-file + failures are reported individually in the response rather than failing + the whole batch — LibreChat's caller distinguishes `succeeded`/`failed` + counts and reads each `files[].status`. + + Filenames may include subdirectories (e.g. `skills/foo/SKILL.md` from + skill priming). Subdirectory structure is preserved via + `OutputProcessor.sanitize_relative_path()`; LibreChat then echoes them + back to its agent code, which checks `f.filename.endsWith('/SKILL.md')`. + """ + form = await request.form() + upload_files: List[UploadFile] = [ + v + for k, v in form.multi_items() + if k == "file" and isinstance(v, StarletteUploadFile) + ] + + if not upload_files: + # LibreChat guards with `if (filesToUpload.length === 0) return null` + # before calling, so reaching this branch means a misconfigured + # client. Match the existing /upload contract for missing files. + raise HTTPException( + status_code=422, + detail={ + "error": "Request validation failed", + "error_type": "validation", + "details": [ + { + "field": "body -> file", + "message": "At least one file required", + "code": "missing", + } + ], + }, + ) + + if len(upload_files) > settings.max_files_per_session: + raise HTTPException( + status_code=413, + detail=( + f"Too many files in batch. Maximum " + f"{settings.max_files_per_session} files allowed per upload." + ), + ) + + entity_id_raw = form.get("entity_id") + entity_id: Optional[str] = ( + entity_id_raw if isinstance(entity_id_raw, str) and entity_id_raw else None + ) + is_agent_file = entity_id is not None + + read_only_raw = form.get("read_only") + is_read_only = isinstance(read_only_raw, str) and read_only_raw.lower() in ( + "1", + "true", + "yes", + ) + + metadata = {"entity_id": entity_id} if entity_id else {} + session = await session_service.create_session(SessionCreate(metadata=metadata)) + session_id = session.session_id + + max_size_bytes = settings.max_file_size_mb * 1024 * 1024 + results: List[dict] = [] + succeeded = 0 + failed = 0 + + for upload in upload_files: + original_filename = upload.filename or "unknown" + try: + content = await upload.read() + size = len(content) + if size > max_size_bytes: + raise ValueError(f"File exceeds {settings.max_file_size_mb}MB limit") + # Skill-priming uploads (entity_id set) come from the LibreChat host + # itself, not end users. Skill bundles legitimately ship arbitrary + # extensions (.xsd schemas, .toml configs, .lock files, .d.ts type + # defs, etc.) — extending the user-facing allowlist for every new + # skill is unsustainable. The sandbox is the actual security + # boundary; extension filtering exists to stop end-user uploads + # of executables via /upload, not to second-guess the LibreChat + # host's skill loader. Skip the extension check for the agent path. + if not is_agent_file and not settings.is_file_allowed(original_filename): + raise ValueError(f"File type not allowed: {original_filename}") + + # Preserve subdirectory structure (LibreChat skill bundles ship + # `skills//SKILL.md` etc.) while sanitizing each segment. + stored_filename = OutputProcessor.sanitize_relative_path(original_filename) + + file_id = await file_service.store_uploaded_file( + session_id=session_id, + filename=stored_filename, + content=content, + content_type=upload.content_type, + is_agent_file=is_agent_file, + is_read_only=is_read_only, + original_filename=original_filename, + ) + + results.append( + { + "status": "success", + "fileId": file_id, + "filename": stored_filename, + } + ) + succeeded += 1 + except Exception as exc: + logger.warning( + "Batch upload entry failed", + filename=original_filename, + error=str(exc), + ) + results.append( + { + "status": "error", + "filename": original_filename, + "error": str(exc), + } + ) + failed += 1 + + if failed == 0: + message = "success" + elif succeeded == 0: + message = "error" + else: + message = "partial" + + logger.info( + "Batch upload completed", + session_id=session_id, + entity_id=entity_id, + succeeded=succeeded, + failed=failed, + ) + + return { + "message": message, + "session_id": session_id, + "files": results, + "succeeded": succeeded, + "failed": failed, + } + + @router.get("/files/{session_id}") async def list_files( session_id: str, @@ -244,7 +415,8 @@ async def list_files( "etag": f'"{file_info.file_id}"', "metadata": { "content-type": file_info.content_type, - "original-filename": file_info.filename, + "original-filename": file_info.original_filename + or file_info.filename, }, "contentType": file_info.content_type, } @@ -257,6 +429,47 @@ async def list_files( raise HTTPException(status_code=404, detail="Session not found") +@router.get("/sessions/{session_id}/objects/{file_id}") +async def get_session_object_metadata( + session_id: str, + file_id: str, + file_service: FileServiceDep = None, +): + """Session-liveness probe used by LibreChat's `primeFiles()`. + + LibreChat's `process.js:363` reads `lastModified` only — if the value + parses to >23h ago (or this endpoint 404s), it treats the session as + expired and re-uploads the file from its own storage. We return the + file's `created_at`, normalized to UTC + `Z`, matching the format used + by `GET /files/{session_id}?detail=summary`. + """ + try: + file_info = await file_service.get_file_info(session_id, file_id) + except Exception as e: + logger.warning( + "Failed to look up session object metadata", + session_id=session_id, + file_id=file_id, + error=str(e), + ) + raise HTTPException(status_code=404, detail="File not found") + + if file_info is None: + raise HTTPException(status_code=404, detail="File not found") + + dt = file_info.created_at + if isinstance(dt, str): + try: + dt = datetime.fromisoformat(dt) + except ValueError: + dt = datetime.now(timezone.utc) + if dt.tzinfo is None: + dt = dt.replace(tzinfo=timezone.utc) + + last_modified = dt.isoformat(timespec="milliseconds").replace("+00:00", "Z") + return {"lastModified": last_modified} + + @router.get("/download/{session_id}/{file_id}") async def download_file( session_id: str, file_id: str, file_service: FileServiceDep = None diff --git a/src/api/health.py b/src/api/health.py index 3ff6ff8..e7c43e3 100644 --- a/src/api/health.py +++ b/src/api/health.py @@ -116,11 +116,11 @@ async def redis_health_check(_: str = Depends(verify_api_key)): ) -@router.get("/health/minio", summary="MinIO health check") -async def minio_health_check(_: str = Depends(verify_api_key)): - """Check MinIO/S3 connectivity and performance.""" +@router.get("/health/s3", summary="S3 storage health check") +async def s3_health_check(_: str = Depends(verify_api_key)): + """Check S3 storage connectivity and performance.""" try: - result = await health_service.check_minio() + result = await health_service.check_s3() if result.status == HealthStatus.UNHEALTHY: return JSONResponse(status_code=503, content=result.to_dict()) @@ -128,13 +128,13 @@ async def minio_health_check(_: str = Depends(verify_api_key)): return JSONResponse(status_code=200, content=result.to_dict()) except Exception as e: - logger.error("MinIO health check failed", error=str(e)) + logger.error("S3 health check failed", error=str(e)) return JSONResponse( status_code=503, content={ - "service": "minio", + "service": "s3", "status": "unhealthy", - "error": str(e) if settings.api_debug else "MinIO check failed", + "error": str(e) if settings.api_debug else "S3 check failed", }, ) diff --git a/src/api/programmatic.py b/src/api/programmatic.py index 2697b80..3999f50 100644 --- a/src/api/programmatic.py +++ b/src/api/programmatic.py @@ -107,6 +107,7 @@ async def execute_programmatic( "PTC execution request", request_id=request_id, session_id=session_id[:12], + lang=request.lang, code_length=len(request.code), tools_count=len(request.tools), ) @@ -117,6 +118,7 @@ async def execute_programmatic( session_id=session_id, timeout=_timeout_ms_to_seconds(request.timeout), files=request.files, + lang=request.lang, ) # Ensure session_id is set in response diff --git a/src/config/__init__.py b/src/config/__init__.py index 63aef2a..bd13e27 100644 --- a/src/config/__init__.py +++ b/src/config/__init__.py @@ -29,7 +29,7 @@ # Import grouped configurations from .api import APIConfig from .redis import RedisConfig -from .minio import MinIOConfig +from .s3 import S3Config from .security import SecurityConfig from .resources import ResourcesConfig from .logging import LoggingConfig @@ -92,6 +92,46 @@ class Settings(BaseSettings): rate_limit_enabled: bool = Field( default=True, description="Enable per-key rate limiting for Redis-managed keys" ) + auth_enabled: bool = Field( + default=True, + description=( + "Require x-api-key (or equivalent Basic auth) on user endpoints. " + "Set false when running behind a trusted network boundary. " + "Admin endpoints always require MASTER_API_KEY regardless." + ), + ) + + # Sandbox egress (skill installs) + enable_sandbox_network: bool = Field( + default=False, + description=( + "Allow sandboxes to reach the internet via an inline allowlist proxy. " + "Required for skills that pip/npm/go/cargo install dependencies at " + "runtime. Outbound traffic is restricted to package registries; " + "everything else is refused." + ), + ) + sandbox_egress_port: int = Field( + default=18443, + ge=1024, + le=65535, + description="Port the inline egress proxy binds to on 127.0.0.1.", + ) + sandbox_egress_allowlist: Optional[str] = Field( + default=None, + description=( + "Comma-separated list of additional hostnames the egress proxy " + "permits. Defaults already cover PyPI, npm, Go modules, and crates.io." + ), + ) + skill_deps_path: str = Field( + default="/opt/skill-deps", + description=( + "Host-side directory (mounted into every sandbox) that holds " + "user-installed skill dependencies. pip/npm/go/cargo are configured " + "to install here so the cache compounds across executions." + ), + ) # Redis Configuration redis_host: str = Field(default="localhost") @@ -103,12 +143,13 @@ class Settings(BaseSettings): redis_socket_timeout: int = Field(default=5, ge=1) redis_socket_connect_timeout: int = Field(default=5, ge=1) - # MinIO/S3 Configuration - minio_endpoint: str = Field(default="localhost:9000") - minio_access_key: str = Field(default="test-access-key", min_length=3) - minio_secret_key: str = Field(default="test-secret-key", min_length=8) - minio_secure: bool = Field(default=False) - minio_bucket: str = Field(default="code-interpreter-files") + # S3 Storage Configuration + s3_endpoint: str = Field(default="localhost:3900") + s3_access_key: str = Field(default="test-access-key", min_length=3) + s3_secret_key: str = Field(default="test-secret-key", min_length=8) + s3_secure: bool = Field(default=False) + s3_bucket: str = Field(default="code-interpreter-files") + s3_region: str = Field(default="garage") # Sandbox (nsjail) Configuration nsjail_binary: str = Field( @@ -144,14 +185,19 @@ class Settings(BaseSettings): # Resource Limits - Files max_file_size_mb: int = Field(default=100, ge=1, le=500) - max_files_per_session: int = Field(default=50, ge=1, le=200) + # Default sized for skill bundles — Anthropic's pptx skill has 58 files + # (incl. ECMA XSD schemas under scripts/office/schemas/), docx and xlsx + # are similar. Legacy default of 50 caused 413s during /upload/batch + # priming. Ceiling raised to 1000 to leave headroom for multi-skill + # agents and future bundles. + max_files_per_session: int = Field(default=300, ge=1, le=1000) max_output_files: int = Field(default=10, ge=1, le=50) max_filename_length: int = Field(default=255, ge=1, le=255) # Session Configuration session_ttl_hours: int = Field(default=24, ge=1, le=168) session_cleanup_interval_minutes: int = Field(default=60, ge=1, le=1440) - enable_orphan_minio_cleanup: bool = Field(default=True) + enable_orphan_s3_cleanup: bool = Field(default=True) # Sandbox Pool Configuration sandbox_pool_enabled: bool = Field(default=True) @@ -205,24 +251,24 @@ class Settings(BaseSettings): default=100, ge=1, le=500, - description="Max state size (MB, raw bytes) for Redis storage. Larger states go directly to MinIO", + description="Max state size (MB, raw bytes) for Redis storage. Larger states go directly to S3 cold storage", ) - # State Archival Configuration - Hybrid Redis + MinIO storage + # State Archival Configuration - Hybrid Redis + S3 storage state_archive_enabled: bool = Field( - default=True, description="Enable archiving inactive states from Redis to MinIO" + default=True, description="Enable archiving inactive states from Redis to S3" ) state_archive_after_seconds: int = Field( default=3600, ge=300, le=86400, - description="Archive state to MinIO after this many seconds of inactivity. Default: 1 hour", + description="Archive state to S3 after this many seconds of inactivity. Default: 1 hour", ) state_archive_ttl_days: int = Field( default=1, ge=1, le=30, - description="Keep archived states in MinIO for N days. Default: 1 (24 hours)", + description="Keep archived states in S3 for N days. Default: 1 (24 hours)", ) state_archive_check_interval_seconds: int = Field( default=300, @@ -404,12 +450,12 @@ def parse_api_keys(cls, v): """Parse comma-separated API keys into a list.""" return [key.strip() for key in v.split(",") if key.strip()] if v else None - @validator("minio_endpoint") - def validate_minio_endpoint(cls, v): - """Ensure MinIO endpoint doesn't include protocol.""" + @validator("s3_endpoint") + def validate_s3_endpoint(cls, v): + """Ensure S3 endpoint doesn't include protocol.""" if v.startswith(("http://", "https://")): raise ValueError( - "MinIO endpoint should not include protocol (use minio_secure instead)" + "S3 endpoint should not include protocol (use s3_secure instead)" ) return v @@ -460,14 +506,15 @@ def redis(self) -> RedisConfig: ) @property - def minio(self) -> MinIOConfig: - """Access MinIO configuration group.""" - return MinIOConfig( - minio_endpoint=self.minio_endpoint, - minio_access_key=self.minio_access_key, - minio_secret_key=self.minio_secret_key, - minio_secure=self.minio_secure, - minio_bucket=self.minio_bucket, + def s3(self) -> S3Config: + """Access S3 storage configuration group.""" + return S3Config( + s3_endpoint=self.s3_endpoint, + s3_access_key=self.s3_access_key, + s3_secret_key=self.s3_secret_key, + s3_secure=self.s3_secure, + s3_bucket=self.s3_bucket, + s3_region=self.s3_region, ) @property @@ -476,6 +523,7 @@ def security(self) -> SecurityConfig: return SecurityConfig( api_key=self.api_key, api_keys=self.api_keys if isinstance(self.api_keys, str) else None, + auth_enabled=self.auth_enabled, enable_network_isolation=self.enable_network_isolation, enable_filesystem_isolation=self.enable_filesystem_isolation, enable_security_logs=self.enable_security_logs, @@ -493,7 +541,7 @@ def resources(self) -> ResourcesConfig: max_filename_length=self.max_filename_length, session_ttl_hours=self.session_ttl_hours, session_cleanup_interval_minutes=self.session_cleanup_interval_minutes, - enable_orphan_minio_cleanup=self.enable_orphan_minio_cleanup, + enable_orphan_s3_cleanup=self.enable_orphan_s3_cleanup, ) @property @@ -566,7 +614,7 @@ def is_file_allowed(self, filename: str) -> bool: # Grouped configs "APIConfig", "RedisConfig", - "MinIOConfig", + "S3Config", "SecurityConfig", "ResourcesConfig", "LoggingConfig", diff --git a/src/config/minio.py b/src/config/minio.py deleted file mode 100644 index 11a8494..0000000 --- a/src/config/minio.py +++ /dev/null @@ -1,31 +0,0 @@ -"""MinIO/S3 configuration.""" - -from pydantic import Field, validator -from pydantic_settings import BaseSettings - - -class MinIOConfig(BaseSettings): - """MinIO/S3 storage settings.""" - - endpoint: str = Field(default="localhost:9000", alias="minio_endpoint") - access_key: str = Field( - default="test-access-key", min_length=3, alias="minio_access_key" - ) - secret_key: str = Field( - default="test-secret-key", min_length=8, alias="minio_secret_key" - ) - secure: bool = Field(default=False, alias="minio_secure") - bucket: str = Field(default="code-interpreter-files", alias="minio_bucket") - - @validator("endpoint") - def validate_endpoint(cls, v): - """Ensure endpoint doesn't include protocol.""" - if v.startswith(("http://", "https://")): - raise ValueError( - "MinIO endpoint should not include protocol (use secure instead)" - ) - return v - - class Config: - env_prefix = "" - extra = "ignore" diff --git a/src/config/resources.py b/src/config/resources.py index b4c5c44..8a57ab8 100644 --- a/src/config/resources.py +++ b/src/config/resources.py @@ -20,7 +20,7 @@ class ResourcesConfig(BaseSettings): # Session Lifecycle session_ttl_hours: int = Field(default=24, ge=1, le=168) session_cleanup_interval_minutes: int = Field(default=60, ge=1, le=1440) - enable_orphan_minio_cleanup: bool = Field(default=True) + enable_orphan_s3_cleanup: bool = Field(default=True) def get_session_ttl_minutes(self) -> int: """Get session TTL in minutes.""" diff --git a/src/config/s3.py b/src/config/s3.py new file mode 100644 index 0000000..0293279 --- /dev/null +++ b/src/config/s3.py @@ -0,0 +1,29 @@ +"""S3-compatible object storage configuration.""" + +from pydantic import Field +from pydantic_settings import BaseSettings + + +class S3Config(BaseSettings): + """S3-compatible storage settings (Garage, AWS S3, etc.).""" + + endpoint: str = Field(default="localhost:3900", alias="s3_endpoint") + access_key: str = Field( + default="test-access-key", min_length=3, alias="s3_access_key" + ) + secret_key: str = Field( + default="test-secret-key", min_length=8, alias="s3_secret_key" + ) + secure: bool = Field(default=False, alias="s3_secure") + bucket: str = Field(default="code-interpreter-files", alias="s3_bucket") + region: str = Field(default="garage", alias="s3_region") + + @property + def endpoint_url(self) -> str: + """Construct the full endpoint URL for boto3.""" + scheme = "https" if self.secure else "http" + return f"{scheme}://{self.endpoint}" + + class Config: + env_prefix = "" + extra = "ignore" diff --git a/src/config/security.py b/src/config/security.py index f61fbea..29d9fae 100644 --- a/src/config/security.py +++ b/src/config/security.py @@ -11,6 +11,7 @@ class SecurityConfig(BaseSettings): # API Key Authentication api_key: str = Field(default="test-api-key", min_length=16) api_keys: str | None = Field(default=None) # Comma-separated additional keys + auth_enabled: bool = Field(default=True) # Container Isolation enable_network_isolation: bool = Field(default=True) diff --git a/src/dependencies/auth.py b/src/dependencies/auth.py index e0042e6..e7db6ea 100644 --- a/src/dependencies/auth.py +++ b/src/dependencies/auth.py @@ -22,6 +22,11 @@ async def verify_api_key( Verify API key authentication. This dependency can be used in addition to middleware for extra security. """ + # Operator-controlled bypass: middleware seeds anonymous state, but this + # dependency may run before middleware in some test configurations. + if not settings.auth_enabled: + return getattr(request.state, "api_key", "") + # First check if middleware already authenticated the request if hasattr(request.state, "authenticated") and request.state.authenticated: return getattr(request.state, "api_key", "") diff --git a/src/dependencies/services.py b/src/dependencies/services.py index 5de3703..521d661 100644 --- a/src/dependencies/services.py +++ b/src/dependencies/services.py @@ -53,7 +53,7 @@ def get_state_service() -> StateService: @lru_cache() def get_state_archival_service() -> StateArchivalService: - """Get state archival service instance for MinIO cold storage.""" + """Get state archival service instance for S3 cold storage.""" state_service = get_state_service() return StateArchivalService(state_service=state_service) diff --git a/src/main.py b/src/main.py index f1992b5..9df350e 100644 --- a/src/main.py +++ b/src/main.py @@ -139,6 +139,97 @@ async def _perform_health_checks() -> None: logger.error("Initial health checks failed", error=str(e)) +async def _startup_egress_proxy(app: FastAPI) -> None: + """Start the inline egress proxy if sandbox network access is enabled. + + Also prepares the persistent skill-deps directory: chmods it sticky + + world-writable so each language's sandbox uid can install packages + without root, while preserving package files across containers. + """ + if not settings.enable_sandbox_network: + return + + import os + from pathlib import Path + + from .services.sandbox.egress_proxy import DEFAULT_ALLOWLIST, EgressProxy + + deps_root = Path(settings.skill_deps_path) + try: + deps_root.mkdir(parents=True, exist_ok=True) + # Sticky + world-writable, like /tmp. The sandbox uid (e.g. 1001) needs + # to write here; keeping it sticky means one sandbox can't unlink + # another's files. + os.chmod(str(deps_root), 0o1777) # nosec B103 + except OSError as exc: + logger.warning( + "Could not prepare skill-deps directory; " + "sandbox installs may fail with permission errors", + path=str(deps_root), + error=str(exc), + ) + + extra = ( + [h.strip() for h in settings.sandbox_egress_allowlist.split(",") if h.strip()] + if settings.sandbox_egress_allowlist + else [] + ) + proxy = EgressProxy( + port=settings.sandbox_egress_port, + allowlist=list(DEFAULT_ALLOWLIST) + extra, + ) + await proxy.start() + app.state.egress_proxy = proxy + + # Network-level enforcement so a malicious skill can't `socket.create_connection` + # around the application-level proxy. Without these iptables rules, sandbox + # processes — sharing the API container's net namespace — can directly reach + # Redis/S3 and any internal docker network. Refuse to enable network if the + # firewall can't be installed (better to fail loudly than to silently leak SSRF). + from .config.languages import SANDBOX_USER_ID + from .services.sandbox.egress_firewall import install_sandbox_egress_rules + + sandbox_uid = SANDBOX_USER_ID + firewall_ok = install_sandbox_egress_rules( + sandbox_uid=sandbox_uid, + proxy_port=settings.sandbox_egress_port, + ) + if not firewall_ok: + await proxy.stop() + app.state.egress_proxy = None + raise RuntimeError( + "ENABLE_SANDBOX_NETWORK=true but the iptables egress firewall could " + "not be installed. The container needs CAP_NET_ADMIN (cap_add: NET_ADMIN " + "in compose) and an iptables binary. Without these rules, sandboxes " + "could SSRF Redis/S3 via direct sockets — refusing to enable network." + ) + + logger.info( + "Sandbox network access ENABLED via egress proxy + firewall", + port=settings.sandbox_egress_port, + skill_deps_path=str(deps_root), + sandbox_uid=sandbox_uid, + allowlist_extra=extra or None, + ) + + +async def _shutdown_egress_proxy(app: FastAPI) -> None: + proxy = getattr(app.state, "egress_proxy", None) + if proxy is None: + return + try: + await proxy.stop() + except Exception as exc: + logger.warning("Failed to stop egress proxy cleanly", error=str(exc)) + # Best-effort cleanup of the iptables rules so a restart doesn't accumulate. + try: + from .services.sandbox.egress_firewall import remove_existing_rules + + remove_existing_rules() + except Exception as exc: + logger.warning("Failed to remove sandbox egress firewall", error=str(exc)) + + async def _shutdown_services(app: FastAPI) -> None: """Stop monitoring services, sandbox pool, PTC contexts, and cleanup scheduler.""" try: @@ -188,13 +279,31 @@ async def lifespan(app: FastAPI): logger.warning("Using default API key - CHANGE THIS IN PRODUCTION!") if settings.api_debug: logger.warning("Debug mode is enabled - disable in production") + if not settings.auth_enabled: + logger.warning( + "AUTHENTICATION DISABLED via AUTH_ENABLED=false; " + "trusting network boundary for x-api-key endpoints " + "(master-key admin endpoints still require MASTER_API_KEY)" + ) if settings.master_api_key: logger.info("API key management enabled") logger.debug("Rate limiting", enabled=settings.rate_limit_enabled) + # Bash PTC requires `jq` inside the sandbox image. The Dockerfile installs + # it, but warn if running outside Docker so bash PTC failures aren't a + # surprise. + import shutil + + if shutil.which("jq") is None: + logger.warning( + "jq not found on PATH; /exec/programmatic with lang='bash' will fail " + "(bash PTC tool wrappers depend on jq for JSON marshalling)" + ) + await _startup_monitoring(app) await _startup_cleanup_tasks() await _startup_sandbox_pool(app) + await _startup_egress_proxy(app) await _perform_health_checks() logger.info("Code Interpreter API startup completed") @@ -203,6 +312,7 @@ async def lifespan(app: FastAPI): logger.info("Shutting down Code Interpreter API") + await _shutdown_egress_proxy(app) await _shutdown_services(app) try: diff --git a/src/middleware/security.py b/src/middleware/security.py index 6a0caf7..9c89d51 100644 --- a/src/middleware/security.py +++ b/src/middleware/security.py @@ -94,7 +94,7 @@ async def send_wrapper(message): await self._validate_request(request) # Handle authentication (skip for excluded paths and OPTIONS) - if not self._should_skip_auth(request): + if not self._should_skip_auth(request, scope): await self._authenticate_request(request, scope) except HTTPException as e: @@ -138,18 +138,58 @@ async def _validate_request(self, request: Request): status_code=415, detail=f"Unsupported content type: {content_type}" ) - def _should_skip_auth(self, request: Request) -> bool: - """Check if authentication should be skipped.""" + def _should_skip_auth(self, request: Request, scope: dict) -> bool: + """Check if authentication should be skipped. + + Returns True for: + 1. Excluded paths (/health, /docs, /redoc, /openapi.json) or OPTIONS + requests — pure passthrough, no state seeding needed. + 2. /admin-dashboard HTML/static assets — UI loads without auth, but + API calls from the dashboard still require the master key. + 3. AUTH_ENABLED=false on non-admin paths — operator opted to trust + the network boundary. Master-key admin paths still authenticate. + + For (2) and (3), seed scope["state"] with anonymous access markers so + downstream code that reads request.state.api_key_hash etc. doesn't + crash. + """ path = request.url.path if path in self.excluded_paths or request.method == "OPTIONS": return True - # Allow the admin dashboard UI (HTML/static assets) to load without auth. - # The dashboard itself has a login form where users enter the master key, - # which is then sent as a header with API requests. + + is_admin_path = path.startswith("/api/v1/admin") or path.startswith( + "/admin-dashboard" + ) + + # Dashboard UI loads without auth (its API calls below /api/v1/admin + # still authenticate via master key). if path.startswith("/admin-dashboard"): + self._grant_anonymous_access(scope) + return True + + # Operator-controlled bypass for trusted-network deployments. Admin + # paths are deliberately excluded so MASTER_API_KEY remains required. + if not is_admin_path and not settings.auth_enabled: + self._grant_anonymous_access(scope) return True + return False + @staticmethod + def _grant_anonymous_access(scope: dict) -> None: + """Seed scope state for bypassed-auth requests. + + Downstream code (exec endpoint, orchestrator metrics) reads + request.state.api_key_hash and request.state.is_env_key. Without + seeding, attribute access falls back to None which works but the + "anonymous" sentinel keeps metrics dashboards readable. + """ + scope["state"] = scope.get("state", {}) + scope["state"]["authenticated"] = True + scope["state"]["api_key"] = "" + scope["state"]["api_key_hash"] = "anonymous" + scope["state"]["is_env_key"] = False + async def _authenticate_request(self, request: Request, scope: dict): """Handle API key authentication with rate limiting.""" # Extract API key using shared utility diff --git a/src/models/exec.py b/src/models/exec.py index a706c5b..dae1d20 100644 --- a/src/models/exec.py +++ b/src/models/exec.py @@ -2,7 +2,7 @@ # Standard library imports from datetime import datetime -from typing import List, Optional, Any +from typing import Dict, List, Optional, Any # Third-party imports from pydantic import BaseModel, Field @@ -15,6 +15,9 @@ class FileRef(BaseModel): name: str path: Optional[str] = None # Make path optional session_id: Optional[str] = None # Session ID for cross-message file persistence + inherited: Optional[bool] = None + entity_id: Optional[str] = None + modified_from: Optional[Dict[str, str]] = None class RequestFile(BaseModel): @@ -23,6 +26,7 @@ class RequestFile(BaseModel): id: str session_id: str name: str + entity_id: Optional[str] = None class ExecRequest(BaseModel): @@ -55,6 +59,12 @@ class ExecRequest(BaseModel): default_factory=list, description="Array of file references to be used during execution", ) + timeout: Optional[int] = Field( + default=None, + ge=1000, + le=300000, + description="Execution timeout in milliseconds", + ) class ExecResponse(BaseModel): diff --git a/src/models/execution.py b/src/models/execution.py index ac2877d..a3a5cc8 100644 --- a/src/models/execution.py +++ b/src/models/execution.py @@ -3,7 +3,7 @@ # Standard library imports from datetime import datetime from enum import Enum -from typing import List, Optional +from typing import Any, Dict, List, Optional # Third-party imports from pydantic import BaseModel, Field @@ -42,6 +42,7 @@ class ExecutionOutput(BaseModel): default=None, description="Size in bytes for file outputs" ) timestamp: datetime = Field(default_factory=datetime.utcnow) + metadata: Optional[Dict[str, Any]] = None class CodeExecution(BaseModel): diff --git a/src/models/files.py b/src/models/files.py index 038f471..654d5c0 100644 --- a/src/models/files.py +++ b/src/models/files.py @@ -40,6 +40,7 @@ class FileInfo(BaseModel): content_type: str created_at: datetime path: str = Field(..., description="File path in the session") + original_filename: Optional[str] = None class Config: json_encoders = {datetime: lambda v: v.isoformat()} diff --git a/src/models/programmatic.py b/src/models/programmatic.py index 9eb1d2f..7a0208c 100644 --- a/src/models/programmatic.py +++ b/src/models/programmatic.py @@ -8,7 +8,9 @@ from typing import Any, Dict, List, Optional -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, validator + +SUPPORTED_PTC_LANGUAGES = {"py", "bash"} class PTCToolDefinition(BaseModel): @@ -67,12 +69,28 @@ class ProgrammaticExecRequest(BaseModel): # Initial execution fields code: Optional[str] = Field( - default=None, description="Python code to execute (initial request)" + default=None, description="Code to execute (initial request)" + ) + lang: str = Field( + default="py", + description=( + "Language for the PTC sandbox: 'py' (default) or 'bash'. " + "LibreChat's BashProgrammaticToolCalling tool sends 'bash'." + ), ) tools: List[PTCToolDefinition] = Field( default_factory=list, description="Tools available to the code (initial request)", ) + + @validator("lang") + def _validate_lang(cls, v: str) -> str: + if v not in SUPPORTED_PTC_LANGUAGES: + raise ValueError( + f"lang must be one of {sorted(SUPPORTED_PTC_LANGUAGES)}, got {v!r}" + ) + return v + session_id: Optional[str] = Field( default=None, description="Optional session ID for continuity" ) diff --git a/src/services/cleanup.py b/src/services/cleanup.py index 6d43183..0461ea5 100644 --- a/src/services/cleanup.py +++ b/src/services/cleanup.py @@ -7,7 +7,7 @@ immediately after execution by the orchestrator. This scheduler handles: - File cleanup when sessions are explicitly deleted - Legacy cleanup for non-pooled containers -- Periodic state archival from Redis to MinIO +- Periodic state archival from Redis to S3 """ import asyncio @@ -27,7 +27,7 @@ class CleanupScheduler: With the simplified container pool architecture: - Containers are destroyed immediately after execution (no TTL tracking) - This scheduler handles file cleanup and session-level resource cleanup - - Periodic state archival from Redis to MinIO + - Periodic state archival from Redis to S3 """ def __init__(self, delay_seconds: int = 5): @@ -178,7 +178,7 @@ def pending_count(self) -> int: return len(self._pending_cleanups) async def _archival_loop(self): - """Background loop for archiving inactive states to MinIO.""" + """Background loop for archiving inactive states to S3.""" interval = settings.state_archive_check_interval_seconds while True: diff --git a/src/services/execution/output.py b/src/services/execution/output.py index a7fb6d1..4fc68bb 100644 --- a/src/services/execution/output.py +++ b/src/services/execution/output.py @@ -3,6 +3,7 @@ import os import re import secrets +import unicodedata from pathlib import Path from typing import Any, Dict @@ -213,13 +214,27 @@ def format_error_message(cls, exit_code: int, stderr: str) -> str: return f"Execution failed (exit code {exit_code}):\n{stderr_clean}" + # ASCII chars safe in filenames — matches LibreChat's ASCII_FILENAME_SAFE_PATTERN. + _ASCII_SAFE = re.compile(r"[a-zA-Z0-9._\-]") + # C1 control characters (U+0080–U+009F) — unsafe in filenames. + _C1_CONTROLS = re.compile(r"[\x80-\x9f]") + + @classmethod + def _sanitize_char(cls, char: str) -> str: + """Replace unsafe ASCII; preserve Unicode letters, marks, numbers, and emoji.""" + if ord(char) <= 0x7F: + return char if cls._ASCII_SAFE.match(char) else "_" + return "_" if cls._C1_CONTROLS.match(char) else char + @classmethod def sanitize_filename(cls, input_name: str) -> str: - """Sanitize filename to match LibreChat's sanitization logic. + """Sanitize filename while preserving Unicode letters, digits, and emoji. - Replaces all non-alphanumeric characters (except '.' and '-') with - underscores. This ensures filenames on disk match what LibreChat - reports in the system prompt. + NFC-normalizes, then applies a two-pass approach matching + LibreChat's ``sanitizeFilenameSegment``: strict for ASCII + (only ``[a-zA-Z0-9._-]``), permissive for non-ASCII (keeps + Unicode letters, combining marks, numbers, emoji — blocks + only C1 control characters). Args: input_name: Original filename (may include path components) @@ -234,8 +249,12 @@ def sanitize_filename(cls, input_name: str) -> str: # Remove any directory components (path traversal prevention) name = os.path.basename(input_name) - # Replace any non-alphanumeric characters except for '.' and '-' - name = re.sub(r"[^a-zA-Z0-9.-]", "_", name) + # NFC-normalize so decomposed sequences (e + U+0301) become + # precomposed (é) before the regex runs. + name = unicodedata.normalize("NFC", name) + + # Two-pass sanitization: strict ASCII, permissive Unicode. + name = "".join(cls._sanitize_char(c) for c in name) # Ensure the name doesn't start with a dot (hidden file in Unix) if name.startswith(".") or name == "": @@ -257,3 +276,38 @@ def sanitize_filename(cls, input_name: str) -> str: except Exception as e: logger.error(f"Failed to sanitize filename: {e}") return "_" + + @classmethod + def sanitize_relative_path(cls, input_path: str) -> str: + """Sanitize a relative path while preserving subdirectory structure. + + Calls `sanitize_filename` on each path segment and rejoins with `/`. + Used for filenames that legitimately contain subdirectories — both + on the input side (LibreChat sends `skills/foo/SKILL.md` for skill + bundles) and the output side (code that writes to `/mnt/data/charts/foo.png` + should round-trip back as `charts/foo.png`). + + Path traversal segments (`..`) are rejected, and the result is + guaranteed to be a non-empty relative path with forward slashes. + """ + if not input_path: + return "_" + + # Strip leading/trailing slashes and split into segments. + segments = [s for s in input_path.replace("\\", "/").split("/") if s] + if not segments: + return "_" + + sanitized_segments = [] + for segment in segments: + if segment == "..": + # Drop traversal attempts entirely rather than allowing them. + continue + sanitized = cls.sanitize_filename(segment) + if sanitized and sanitized != "_": + sanitized_segments.append(sanitized) + + if not sanitized_segments: + return "_" + + return "/".join(sanitized_segments) diff --git a/src/services/execution/runner.py b/src/services/execution/runner.py index 029ce28..89c24ea 100644 --- a/src/services/execution/runner.py +++ b/src/services/execution/runner.py @@ -238,15 +238,36 @@ async def execute( generated_files = [] if should_detect_files: + # _detect_generated_files now consults sandbox_info.mounted_file_stats + # internally to skip unchanged mounts and surface in-place edits as + # new generated files. The legacy _filter_generated_files blanket + # suppression was removed because it dropped real edits along with + # the noise. generated_files = await self._detect_generated_files(sandbox_info) - mounted_filenames = self._get_mounted_filenames(files) - filtered_files = self._filter_generated_files( - generated_files, mounted_filenames - ) - - for file_info in filtered_files: + for file_info in generated_files: if OutputProcessor.validate_generated_file(file_info): + meta: Dict[str, Any] = {} + if file_info.get("inherited"): + meta = { + k: file_info[k] + for k in ( + "inherited", + "original_file_id", + "original_session_id", + "original_entity_id", + ) + if k in file_info + } + elif file_info.get("modified_from_id"): + meta = { + k: file_info[k] + for k in ( + "modified_from_id", + "modified_from_session_id", + ) + if k in file_info + } outputs.append( ExecutionOutput( type=OutputType.FILE, @@ -254,6 +275,7 @@ async def execute( mime_type=file_info.get("mime_type"), size=file_info.get("size"), timestamp=end_time, + metadata=meta or None, ) ) @@ -346,30 +368,6 @@ def _process_outputs( return outputs - def _get_mounted_filenames(self, files: Optional[List[Dict[str, Any]]]) -> set: - """Get set of mounted filenames for filtering.""" - mounted = set() - if files: - try: - for f in files: - name = f.get("filename") or f.get("name") - if name: - mounted.add(name) - mounted.add(OutputProcessor.sanitize_filename(name)) - except Exception: - pass - return mounted - - def _filter_generated_files( - self, generated: List[Dict[str, Any]], mounted_filenames: set - ) -> List[Dict[str, Any]]: - """Filter out mounted files from generated files list.""" - return [ - f - for f in generated - if Path(f.get("path", "")).name not in mounted_filenames - ] - async def _create_fresh_sandbox( self, session_id: str, language: str ) -> SandboxInfo: @@ -703,9 +701,13 @@ async def _mount_files_to_sandbox( ) -> None: """Mount files to sandbox workspace. - Uses streaming (MinIO fget_object) to transfer files directly to the + Uses streaming (S3 download_file) to transfer files directly to the sandbox data directory without loading entire files into memory. This avoids blocking the asyncio event loop during large file transfers. + + Filenames may include subdirectories (e.g. `skills/foo/SKILL.md` from + LibreChat skill bundles). Parent directories are created and chowned + to match the sandbox uid before the file is written. """ try: from ..file import FileService @@ -713,10 +715,27 @@ async def _mount_files_to_sandbox( file_service = FileService() user_id = get_user_id_for_language(language) + data_dir = sandbox_info.data_dir + + def _ensure_parent_dirs(dest: Path, uid: int) -> None: + parent = dest.parent + if parent == data_dir or not parent.is_relative_to(data_dir): + return + parent.mkdir(parents=True, exist_ok=True) + # Chown each newly-created ancestor so the sandbox uid can + # traverse and write inside it. + for ancestor in [parent, *parent.parents]: + if ancestor == data_dir: + break + try: + os.chown(ancestor, uid, uid) + os.chmod(ancestor, 0o755) + except (PermissionError, FileNotFoundError): + pass - def _set_file_perms(path, uid): + def _set_file_perms(path, uid, read_only=False): os.chown(path, uid, uid) - os.chmod(path, 0o644) + os.chmod(path, 0o444 if read_only else 0o644) return os.path.getsize(path) for file_info in files: @@ -729,8 +748,12 @@ def _set_file_perms(path, uid): continue try: - normalized_filename = OutputProcessor.sanitize_filename(filename) - dest_path = str(sandbox_info.data_dir / normalized_filename) + normalized_filename = OutputProcessor.sanitize_relative_path( + filename + ) + dest = data_dir / normalized_filename + await asyncio.to_thread(_ensure_parent_dirs, dest, user_id) + dest_path = str(dest) file_size = file_info.get("size", 0) if file_size > 10 * 1024 * 1024: @@ -740,15 +763,37 @@ def _set_file_perms(path, uid): size_mb=round(file_size / 1024 / 1024, 1), ) - # Stream directly from MinIO to sandbox directory (non-blocking) + # Stream directly from S3 to sandbox directory (non-blocking) success = await file_service.stream_file_to_path( session_id, file_id, dest_path ) if success: + is_read_only = file_info.get("is_read_only", False) actual_size = await asyncio.to_thread( - _set_file_perms, dest_path, user_id + _set_file_perms, dest_path, user_id, read_only=is_read_only ) + # Snapshot stats so _detect_generated_files can tell + # later whether user code edited this file in place. + # Key by both the original (possibly-nested) filename + # and the normalized form, so lookup works regardless + # of which form the post-walk uses. + try: + st = await asyncio.to_thread(os.stat, dest_path) + entity_id = file_info.get("entity_id") + stat_tuple = ( + st.st_mtime_ns, + st.st_size, + file_id, + session_id, + entity_id, + ) + sandbox_info.mounted_file_stats[normalized_filename] = ( + stat_tuple + ) + sandbox_info.mounted_file_stats[filename] = stat_tuple + except OSError: + pass logger.debug( "Mounted file", filename=filename, @@ -768,9 +813,13 @@ def _set_file_perms(path, uid): async def _create_placeholder_file( self, sandbox_info: SandboxInfo, filename: str ) -> None: - """Create a placeholder file when content cannot be retrieved.""" + """Create a placeholder file when content cannot be retrieved. + + Preserves any subdirectory structure in the filename so the placeholder + lands at the path the caller expects (e.g. `/mnt/data/skills/foo/SKILL.md`). + """ try: - normalized_filename = OutputProcessor.sanitize_filename(filename) + normalized_filename = OutputProcessor.sanitize_relative_path(filename) placeholder = f"# File: {filename}\n# This is a placeholder - original file could not be retrieved\n" self.sandbox_manager.copy_content_to_sandbox( sandbox_info, @@ -781,38 +830,144 @@ async def _create_placeholder_file( except Exception as e: logger.error(f"Failed to create placeholder file: {e}") + # Directory names we never descend into when scanning for generated + # artifacts. These are package-manager / build-tool caches: their contents + # aren't user-meaningful "artifacts" and they routinely contain tens of + # thousands of files which would (a) blow our max_output_files budget on + # noise and (b) get phantom-mounted into future executions, polluting the + # workspace. If a skill genuinely needs a file inside one of these dirs to + # round-trip, it should write a copy elsewhere in /mnt/data first. + _ARTIFACT_SCAN_SKIP_DIRS: frozenset = frozenset( + { + "node_modules", + "__pycache__", + ".git", + ".cache", + ".npm", + ".npm-cache", + ".venv", + "venv", + "env", + ".tox", + ".pytest_cache", + ".mypy_cache", + ".ruff_cache", + "target", # Rust + "dist", + "build", + "vendor", # Go vendor / PHP composer + ".bundle", + ".gradle", + ".m2", # Maven + ".cargo", + "pkg", # Go module cache mirror + } + ) + async def _detect_generated_files( self, sandbox_info: SandboxInfo ) -> List[Dict[str, Any]]: - """Detect files generated during execution.""" + """Detect files generated or modified during execution. + + Walks the sandbox data directory recursively so artifacts written to + subdirectories (e.g. `/mnt/data/charts/foo.png`) are discoverable. + Hidden segments and known dependency-cache directories are skipped, + and the per-execution output budget is enforced after sorting for + deterministic test results. + + For files that match a previously-mounted file by basename or relative + path: include them only if (mtime_ns, size) changed since mount — + i.e., user code edited them in place. Unchanged mounted files are + skipped to avoid re-uploading them on every execution. + """ try: - generated_files = [] data_dir = sandbox_info.data_dir - if not data_dir.exists(): return [] - for name in os.listdir(data_dir): - # Skip code files - if name.startswith("code") or name.startswith("Code."): - continue + max_size_bytes = settings.max_file_size_mb * 1024 * 1024 + candidates: List[Dict[str, Any]] = [] + skip_dirs = self._ARTIFACT_SCAN_SKIP_DIRS + mounted_stats = sandbox_info.mounted_file_stats or {} + + for root, dirs, files in os.walk(data_dir): + # Filter hidden + known-cache directories in-place so os.walk + # doesn't descend into them. This is the critical fix that + # keeps `npm install` / `pip install` from polluting the + # session with thousands of dependency files. + dirs[:] = [ + d for d in dirs if not d.startswith(".") and d not in skip_dirs + ] + + for name in files: + # Skip hidden files and the source code we wrote in. + if name.startswith("."): + continue + if name.startswith("code") or name.startswith("Code."): + continue + + filepath = Path(root) / name + if not filepath.is_file(): + continue - filepath = data_dir / name - if filepath.is_file(): - size = filepath.stat().st_size - if size <= settings.max_file_size_mb * 1024 * 1024: - generated_files.append( - { - "path": f"/mnt/data/{name}", - "size": size, - "mime_type": OutputProcessor.guess_mime_type(name), - } - ) - - if len(generated_files) >= settings.max_output_files: - break + try: + st = filepath.stat() + except OSError: + continue + size = st.st_size + if size > max_size_bytes: + continue + + rel = filepath.relative_to(data_dir).as_posix() + + # If this file was mounted, only surface it when content + # changed during execution. We check both the relative path + # and the basename — _mount_files_to_sandbox snapshots both + # forms because we don't know which the caller originally + # used (LibreChat sometimes ships nested filenames like + # `skills/foo/SKILL.md`, sometimes flat). + prior = mounted_stats.get(rel) or mounted_stats.get(name) + if prior is not None: + if prior[:2] == (st.st_mtime_ns, size): + candidates.append( + { + "path": f"/mnt/data/{rel}", + "size": size, + "mime_type": OutputProcessor.guess_mime_type(rel), + "inherited": True, + "original_file_id": prior[2], + "original_session_id": prior[3], + "original_entity_id": prior[4], + } + ) + continue + else: + candidates.append( + { + "path": f"/mnt/data/{rel}", + "size": size, + "mime_type": OutputProcessor.guess_mime_type(rel), + "modified_from_id": prior[2], + "modified_from_session_id": prior[3], + } + ) + continue + + candidates.append( + { + "path": f"/mnt/data/{rel}", + "size": size, + "mime_type": OutputProcessor.guess_mime_type(rel), + } + ) - return generated_files + # Stable ordering before applying the output budget keeps tests + # deterministic when many files exist. Inherited files don't count + # against the budget — they're not new content. + inherited = [c for c in candidates if c.get("inherited")] + generated = [c for c in candidates if not c.get("inherited")] + generated.sort(key=lambda f: f["path"]) + return inherited + generated[: settings.max_output_files] except Exception as e: logger.error(f"Failed to detect generated files: {e}") diff --git a/src/services/file.py b/src/services/file.py index 796c17a..5d12c01 100644 --- a/src/services/file.py +++ b/src/services/file.py @@ -1,15 +1,15 @@ -"""File management service with MinIO/S3 storage integration.""" +"""File management service with S3-compatible storage integration.""" # Standard library imports import asyncio -from datetime import datetime, timedelta +from datetime import datetime from typing import List, Optional, Tuple, Dict, Any # Third-party imports +import boto3 import redis.asyncio as redis import structlog -from minio import Minio -from minio.error import S3Error +from botocore.exceptions import ClientError # Local application imports from .interfaces import FileServiceInterface @@ -21,16 +21,16 @@ class FileService(FileServiceInterface): - """File management service with MinIO/S3 storage and Redis metadata.""" + """File management service with S3 storage and Redis metadata.""" def __init__(self): - """Initialize the file service with MinIO and Redis clients.""" - # Initialize MinIO client - self.minio_client = Minio( - settings.minio_endpoint, - access_key=settings.minio_access_key, - secret_key=settings.minio_secret_key, - secure=settings.minio_secure, + """Initialize the file service with S3 and Redis clients.""" + self.s3_client = boto3.client( + "s3", + endpoint_url=settings.s3.endpoint_url, + aws_access_key_id=settings.s3_access_key, + aws_secret_access_key=settings.s3_secret_key, + region_name=settings.s3_region, ) # Initialize Redis client @@ -38,24 +38,28 @@ def __init__(self): settings.get_redis_url(), decode_responses=True ) - self.bucket_name = settings.minio_bucket + self.bucket_name = settings.s3_bucket async def _ensure_bucket_exists(self) -> None: - """Ensure the MinIO bucket exists.""" + """Ensure the S3 bucket exists.""" try: - # Run in thread pool since minio client is synchronous loop = asyncio.get_event_loop() - bucket_exists = await loop.run_in_executor( - None, self.minio_client.bucket_exists, self.bucket_name - ) - - if not bucket_exists: + try: await loop.run_in_executor( - None, self.minio_client.make_bucket, self.bucket_name + None, + lambda: self.s3_client.head_bucket(Bucket=self.bucket_name), ) - logger.info("Created MinIO bucket", bucket=self.bucket_name) + except ClientError as e: + if e.response["Error"]["Code"] in ("404", "NoSuchBucket"): + await loop.run_in_executor( + None, + lambda: self.s3_client.create_bucket(Bucket=self.bucket_name), + ) + logger.info("Created S3 bucket", bucket=self.bucket_name) + else: + raise - except S3Error as e: + except ClientError as e: logger.error( "Failed to ensure bucket exists", error=str(e), bucket=self.bucket_name ) @@ -109,13 +113,13 @@ async def _has_link_references(self, session_id: str, file_id: str) -> bool: return bool(await self.redis_client.smembers(links_key)) async def _delete_object(self, object_key: str) -> None: - """Delete a backing object from MinIO.""" + """Delete a backing object from S3.""" loop = asyncio.get_event_loop() await loop.run_in_executor( None, - self.minio_client.remove_object, - self.bucket_name, - object_key, + lambda: self.s3_client.delete_object( + Bucket=self.bucket_name, Key=object_key + ), ) async def _find_linked_file( @@ -268,10 +272,11 @@ async def upload_file( loop = asyncio.get_event_loop() upload_url = await loop.run_in_executor( None, - self.minio_client.presigned_put_object, - self.bucket_name, - object_key, - timedelta(hours=1), + lambda: self.s3_client.generate_presigned_url( + "put_object", + Params={"Bucket": self.bucket_name, "Key": object_key}, + ExpiresIn=3600, + ), ) # Store initial metadata @@ -297,7 +302,7 @@ async def upload_file( return file_id, upload_url - except S3Error as e: + except ClientError as e: logger.error( "Failed to generate upload URL", error=str(e), session_id=session_id ) @@ -314,31 +319,36 @@ async def confirm_upload(self, session_id: str, file_id: str) -> FileInfo: try: # Get object info to confirm upload and get size loop = asyncio.get_event_loop() - stat = await loop.run_in_executor( - None, self.minio_client.stat_object, self.bucket_name, object_key + head = await loop.run_in_executor( + None, + lambda: self.s3_client.head_object( + Bucket=self.bucket_name, Key=object_key + ), ) + file_size = head["ContentLength"] + # Update metadata with actual file size - metadata["size"] = stat.size + metadata["size"] = file_size await self._store_file_metadata(session_id, file_id, metadata) logger.debug( "Confirmed file upload", session_id=session_id, file_id=file_id, - size=stat.size, + size=file_size, ) return FileInfo( file_id=file_id, filename=metadata["filename"], - size=stat.size, + size=file_size, content_type=metadata["content_type"], created_at=metadata["created_at"], path=metadata["path"], ) - except S3Error as e: + except ClientError as e: logger.error( "Failed to confirm upload", error=str(e), @@ -360,6 +370,7 @@ async def get_file_info(self, session_id: str, file_id: str) -> Optional[FileInf content_type=metadata["content_type"], created_at=metadata["created_at"], path=metadata["path"], + original_filename=metadata.get("original_filename"), ) async def list_files(self, session_id: str) -> List[FileInfo]: @@ -419,6 +430,9 @@ async def link_file_into_session( "source_session_id": source_session_id, "source_file_id": source_file_id, "is_read_only": "1", + "original_filename": source_metadata.get( + "original_filename", source_metadata["filename"] + ), } await self._store_file_metadata(target_session_id, linked_file_id, metadata) @@ -444,6 +458,7 @@ async def link_file_into_session( content_type=metadata["content_type"], created_at=datetime.fromisoformat(metadata["created_at"]), path=metadata["path"], + original_filename=metadata.get("original_filename"), ) async def download_file(self, session_id: str, file_id: str) -> Optional[str]: @@ -459,15 +474,16 @@ async def download_file(self, session_id: str, file_id: str) -> Optional[str]: loop = asyncio.get_event_loop() download_url = await loop.run_in_executor( None, - self.minio_client.presigned_get_object, - self.bucket_name, - object_key, - timedelta(hours=1), + lambda: self.s3_client.generate_presigned_url( + "get_object", + Params={"Bucket": self.bucket_name, "Key": object_key}, + ExpiresIn=3600, + ), ) return download_url - except S3Error as e: + except ClientError as e: logger.error( "Failed to generate download URL", error=str(e), @@ -512,7 +528,7 @@ async def delete_file(self, session_id: str, file_id: str) -> bool: source_file_id=metadata["source_file_id"], object_key=metadata["object_key"], ) - except S3Error as e: + except ClientError as e: logger.warning( "Failed to delete orphaned shared object", source_session_id=metadata["source_session_id"], @@ -534,7 +550,6 @@ async def delete_file(self, session_id: str, file_id: str) -> bool: object_key = metadata["object_key"] try: - # Delete from MinIO await self._delete_object(object_key) # Delete metadata from Redis @@ -543,7 +558,7 @@ async def delete_file(self, session_id: str, file_id: str) -> bool: logger.debug("Deleted file", session_id=session_id, file_id=file_id) return True - except S3Error as e: + except ClientError as e: logger.error( "Failed to delete file", error=str(e), @@ -566,36 +581,39 @@ async def cleanup_session_files(self, session_id: str) -> int: # Clean up session files set await self.redis_client.delete(session_files_key) - # If no files were tracked in Redis, fall back to prefix-based deletion in MinIO + # If no files were tracked in Redis, fall back to prefix-based deletion if deleted_count == 0: try: loop = asyncio.get_event_loop() - # List objects under both uploads and outputs prefixes prefixes = [ f"sessions/{session_id}/uploads/", f"sessions/{session_id}/outputs/", ] for prefix in prefixes: - # MinIO list_objects returns an iterator; use recursive to get all - objects = await loop.run_in_executor( - None, - lambda: list( - self.minio_client.list_objects( - self.bucket_name, prefix=prefix, recursive=True - ) - ), - ) - for obj in objects: - await loop.run_in_executor( - None, - self.minio_client.remove_object, - self.bucket_name, - obj.object_name, + + def _list_prefix(p: str = prefix) -> list: + return list( + self.s3_client.get_paginator("list_objects_v2") + .paginate(Bucket=self.bucket_name, Prefix=p) + .search("Contents[]") ) + + objects = await loop.run_in_executor(None, _list_prefix) + for entry in objects: + if entry is None: + continue + key = entry["Key"] + + def _delete(k: str = key) -> None: + self.s3_client.delete_object( + Bucket=self.bucket_name, Key=k + ) + + await loop.run_in_executor(None, _delete) deleted_count += 1 except Exception as e: logger.error( - "Prefix-based MinIO cleanup failed", + "Prefix-based S3 cleanup failed", session_id=session_id, error=str(e), ) @@ -638,20 +656,19 @@ async def store_execution_output_file( object_key = self._get_file_key(session_id, file_id, "outputs") try: - # Convert bytes to BytesIO for MinIO import io content_stream = io.BytesIO(content) - # Upload file content directly loop = asyncio.get_event_loop() await loop.run_in_executor( None, - self.minio_client.put_object, - self.bucket_name, - object_key, - content_stream, - len(content), + lambda: self.s3_client.put_object( + Bucket=self.bucket_name, + Key=object_key, + Body=content_stream, + ContentLength=len(content), + ), ) now = datetime.utcnow() @@ -665,7 +682,7 @@ async def store_execution_output_file( "created_at": now.isoformat(), "size": len(content), "path": f"/outputs/{filename}", - "type": "output", # Mark as execution output + "type": "output", } await self._store_file_metadata(session_id, file_id, metadata) @@ -680,7 +697,7 @@ async def store_execution_output_file( return file_id - except S3Error as e: + except ClientError as e: logger.error( "Failed to store output file", error=str(e), @@ -701,17 +718,15 @@ async def get_file_content(self, session_id: str, file_id: str) -> Optional[byte loop = asyncio.get_event_loop() def _download(): - response = self.minio_client.get_object(self.bucket_name, object_key) - try: - return response.read() - finally: - response.close() - response.release_conn() + response = self.s3_client.get_object( + Bucket=self.bucket_name, Key=object_key + ) + return response["Body"].read() content = await loop.run_in_executor(None, _download) return content - except S3Error as e: + except ClientError as e: logger.error( "Failed to get file content", error=str(e), @@ -723,9 +738,9 @@ def _download(): async def stream_file_to_path( self, session_id: str, file_id: str, dest_path: str ) -> bool: - """Stream file content from MinIO directly to a local file path. + """Stream file content from S3 directly to a local file path. - Uses MinIO's fget_object for efficient disk-to-disk transfer + Uses boto3's download_file for efficient disk-to-disk transfer without loading the entire file into memory. Runs in a thread pool executor to avoid blocking the async event loop. @@ -747,13 +762,12 @@ async def stream_file_to_path( loop = asyncio.get_event_loop() await loop.run_in_executor( None, - self.minio_client.fget_object, - self.bucket_name, - object_key, - dest_path, + lambda: self.s3_client.download_file( + self.bucket_name, object_key, dest_path + ), ) return True - except S3Error as e: + except ClientError as e: logger.error( "Failed to stream file to path", error=str(e), @@ -770,15 +784,19 @@ async def store_uploaded_file( content: bytes, content_type: Optional[str] = None, is_agent_file: bool = False, + is_read_only: bool = False, + original_filename: Optional[str] = None, ) -> str: """Store an uploaded file directly. Args: session_id: Session identifier - filename: Original filename + filename: Sanitized filename used for storage and sandbox mounting content: File content as bytes content_type: MIME type of the file is_agent_file: If True, marks the file as read-only (agent-assigned) + is_read_only: If True, mounted file should be chmod 444 in sandbox + original_filename: Pre-sanitization filename for metadata recovery Returns: The generated file_id @@ -792,7 +810,6 @@ async def store_uploaded_file( object_key = self._get_file_key(session_id, file_id, "uploads") try: - # Upload file content directly from io import BytesIO content_stream = BytesIO(content) @@ -800,12 +817,13 @@ async def store_uploaded_file( loop = asyncio.get_event_loop() await loop.run_in_executor( None, - self.minio_client.put_object, - self.bucket_name, - object_key, - content_stream, - len(content), - content_type or "application/octet-stream", + lambda: self.s3_client.put_object( + Bucket=self.bucket_name, + Key=object_key, + Body=content_stream, + ContentLength=len(content), + ContentType=content_type or "application/octet-stream", + ), ) # Store metadata @@ -818,10 +836,10 @@ async def store_uploaded_file( "created_at": datetime.utcnow().isoformat(), "size": len(content), "path": f"/{filename}", - "type": "upload", # Mark as uploaded file - "is_agent_file": ( - "1" if is_agent_file else "0" - ), # Read-only if agent file + "type": "upload", + "is_agent_file": ("1" if is_agent_file else "0"), + "is_read_only": "1" if (is_read_only or is_agent_file) else "0", + "original_filename": original_filename or filename, } await self._store_file_metadata(session_id, file_id, metadata) @@ -836,7 +854,7 @@ async def store_uploaded_file( return file_id - except S3Error as e: + except ClientError as e: logger.error( "Failed to store uploaded file", error=str(e), @@ -846,7 +864,7 @@ async def store_uploaded_file( raise async def cleanup_orphan_objects(self, batch_limit: int = 1000) -> int: - """Delete MinIO objects under sessions/ whose sessions are not active in Redis. + """Delete S3 objects under sessions/ whose sessions are not active in Redis. Safety guards: - Skip if the session index is empty (avoid mass-deletes on cold start). @@ -859,53 +877,51 @@ async def cleanup_orphan_objects(self, batch_limit: int = 1000) -> int: active_session_ids = await self.redis_client.smembers("sessions:index") active_session_ids = active_session_ids or set() - # Guard 1: if index is empty, skip to avoid accidental bulk deletes if not active_session_ids: - logger.debug("Skipping orphan MinIO cleanup: empty sessions index") + logger.debug("Skipping orphan S3 cleanup: empty sessions index") return 0 loop = asyncio.get_event_loop() - # List all objects under the sessions/ prefix + + # List all objects under the sessions/ prefix using paginator objects = await loop.run_in_executor( None, lambda: list( - self.minio_client.list_objects( - self.bucket_name, prefix="sessions/", recursive=True - ) + self.s3_client.get_paginator("list_objects_v2") + .paginate(Bucket=self.bucket_name, Prefix="sessions/") + .search("Contents[]") ), ) deleted_count = 0 - # Cache existence checks to minimize Redis round-trips for unknown session IDs + # Cache existence checks to minimize Redis round-trips checked_missing_sessions: Dict[str, bool] = {} - # Determine age cutoff based on TTL (older than TTL are safe to remove) + # Determine age cutoff based on TTL ttl_minutes = settings.get_session_ttl_minutes() ttl_seconds = ttl_minutes * 60 now_ts = datetime.utcnow().timestamp() - for obj in objects: + for entry in objects: + if entry is None: + continue if deleted_count >= batch_limit: break - object_key = getattr(obj, "object_name", None) + object_key = entry.get("Key") if not object_key: continue parts = object_key.split("/") - # Expecting sessions/// if len(parts) < 3 or parts[0] != "sessions": continue object_session_id = parts[1] - # Guard 2: only delete if object is older than TTL (requires last_modified) try: - # minio list_objects entries typically have last_modified; if missing, skip - last_modified = getattr(obj, "last_modified", None) + last_modified = entry.get("LastModified") if last_modified is None: continue - # last_modified may be datetime; convert to timestamp obj_ts = ( last_modified.timestamp() if hasattr(last_modified, "timestamp") @@ -914,7 +930,6 @@ async def cleanup_orphan_objects(self, batch_limit: int = 1000) -> int: if obj_ts is None: continue if (now_ts - obj_ts) < ttl_seconds: - # Too new; skip to avoid racing with active sessions continue except Exception as e: logger.debug( @@ -924,7 +939,6 @@ async def cleanup_orphan_objects(self, batch_limit: int = 1000) -> int: ) continue - # Skip if known active if object_session_id in active_session_ids: continue @@ -934,7 +948,6 @@ async def cleanup_orphan_objects(self, batch_limit: int = 1000) -> int: ): continue - # Double-check via Redis existence in case index is stale if object_session_id not in checked_missing_sessions: try: exists = await self.redis_client.exists( @@ -950,34 +963,31 @@ async def cleanup_orphan_objects(self, batch_limit: int = 1000) -> int: checked_missing_sessions[object_session_id] = False if checked_missing_sessions.get(object_session_id, False): - # Session exists; keep the object continue - # Delete orphaned object try: - await loop.run_in_executor( - None, - self.minio_client.remove_object, - self.bucket_name, - object_key, - ) + + def _delete_orphan(k: str = object_key) -> None: + self.s3_client.delete_object(Bucket=self.bucket_name, Key=k) + + await loop.run_in_executor(None, _delete_orphan) deleted_count += 1 except Exception as e: logger.error( - "Failed to delete orphan MinIO object", + "Failed to delete orphan S3 object", object_key=object_key, error=str(e), ) if deleted_count > 0: - logger.info("Deleted orphan MinIO objects", deleted_count=deleted_count) + logger.info("Deleted orphan S3 objects", deleted_count=deleted_count) else: - logger.debug("No orphan MinIO objects found") + logger.debug("No orphan S3 objects found") return deleted_count except Exception as e: - logger.error("Orphan MinIO objects cleanup failed", error=str(e)) + logger.error("Orphan S3 objects cleanup failed", error=str(e)) return 0 async def update_file_content( @@ -988,7 +998,7 @@ async def update_file_content( ) -> bool: """Update the content of an existing file. - Overwrites the MinIO object and updates metadata. Used to persist + Overwrites the S3 object and updates metadata. Used to persist in-place edits to mounted files after execution. Args: @@ -1027,7 +1037,6 @@ async def update_file_content( ) return False - # Overwrite content in MinIO import io loop = asyncio.get_event_loop() @@ -1036,12 +1045,12 @@ async def update_file_content( await loop.run_in_executor( None, - lambda: self.minio_client.put_object( - self.bucket_name, - object_key, - content_stream, - len(content), - content_type, + lambda: self.s3_client.put_object( + Bucket=self.bucket_name, + Key=object_key, + Body=content_stream, + ContentLength=len(content), + ContentType=content_type, ), ) diff --git a/src/services/health.py b/src/services/health.py index 70e69c5..0027c51 100644 --- a/src/services/health.py +++ b/src/services/health.py @@ -11,10 +11,10 @@ from typing import Dict, Any, Optional # Third-party imports +import boto3 import redis.asyncio as redis import structlog -from minio import Minio -from minio.error import S3Error +from botocore.exceptions import ClientError # Local application imports from ..config import settings @@ -75,7 +75,7 @@ class HealthCheckService: def __init__(self): """Initialize health check service.""" self._redis_client: Optional[redis.Redis] = None - self._minio_client: Optional[Minio] = None + self._s3_client = None self._sandbox_pool = None self._last_check_time: Optional[datetime] = None self._cached_results: Dict[str, HealthCheckResult] = {} @@ -104,10 +104,10 @@ async def check_all_services( # Run all health checks concurrently tasks = [ self.check_redis(), - self.check_minio(), + self.check_s3(), self.check_nsjail(), ] - service_names = ["redis", "minio", "nsjail"] + service_names = ["redis", "s3", "nsjail"] # Add sandbox pool check if pool is configured if self._sandbox_pool and settings.sandbox_pool_enabled: @@ -218,122 +218,124 @@ async def check_redis(self) -> HealthCheckResult: error=str(e), ) - async def check_minio(self) -> HealthCheckResult: - """Check MinIO/S3 connectivity and performance.""" + async def check_s3(self) -> HealthCheckResult: + """Check S3 storage connectivity and performance.""" start_time = time.time() try: - # Create MinIO client if not exists - if not self._minio_client: - self._minio_client = Minio( - settings.minio_endpoint, - access_key=settings.minio_access_key, - secret_key=settings.minio_secret_key, - secure=settings.minio_secure, + if not self._s3_client: + self._s3_client = boto3.client( + "s3", + endpoint_url=settings.s3.endpoint_url, + aws_access_key_id=settings.s3_access_key, + aws_secret_access_key=settings.s3_secret_key, + region_name=settings.s3_region, ) - # Test basic connectivity by listing buckets loop = asyncio.get_event_loop() - buckets = await loop.run_in_executor(None, self._minio_client.list_buckets) - - # Check if our bucket exists - bucket_exists = await loop.run_in_executor( - None, self._minio_client.bucket_exists, settings.minio_bucket + buckets_resp = await loop.run_in_executor( + None, self._s3_client.list_buckets ) + buckets = buckets_resp.get("Buckets", []) - if not bucket_exists: - # Try to create the bucket + # Check if our bucket exists + try: + await loop.run_in_executor( + None, + lambda: self._s3_client.head_bucket(Bucket=settings.s3_bucket), + ) + bucket_exists = True + except ClientError: + bucket_exists = False await loop.run_in_executor( - None, self._minio_client.make_bucket, settings.minio_bucket + None, + lambda: self._s3_client.create_bucket(Bucket=settings.s3_bucket), ) - logger.info(f"Created missing bucket: {settings.minio_bucket}") + logger.info(f"Created missing bucket: {settings.s3_bucket}") # Test read/write operations test_object = f"health_check/test_{int(time.time())}.txt" test_content = b"health check test content" - # Create a BytesIO object for the upload from io import BytesIO test_data = BytesIO(test_content) - # Upload test object await loop.run_in_executor( None, - self._minio_client.put_object, - settings.minio_bucket, - test_object, - test_data, - len(test_content), + lambda: self._s3_client.put_object( + Bucket=settings.s3_bucket, + Key=test_object, + Body=test_data, + ContentLength=len(test_content), + ), ) - # Download test object response = await loop.run_in_executor( - None, self._minio_client.get_object, settings.minio_bucket, test_object + None, + lambda: self._s3_client.get_object( + Bucket=settings.s3_bucket, Key=test_object + ), ) - downloaded_content = response.read() - response.close() - response.release_conn() + downloaded_content = response["Body"].read() - # Clean up test object await loop.run_in_executor( None, - self._minio_client.remove_object, - settings.minio_bucket, - test_object, + lambda: self._s3_client.delete_object( + Bucket=settings.s3_bucket, Key=test_object + ), ) if downloaded_content != test_content: - raise Exception("MinIO read/write test failed") + raise Exception("S3 read/write test failed") response_time = (time.time() - start_time) * 1000 - # Determine status based on response time status = HealthStatus.HEALTHY - if response_time > 2000: # > 2 seconds + if response_time > 2000: status = HealthStatus.DEGRADED details = { - "endpoint": settings.minio_endpoint, - "bucket": settings.minio_bucket, + "endpoint": settings.s3_endpoint, + "bucket": settings.s3_bucket, "bucket_exists": bucket_exists, "total_buckets": len(buckets), - "secure": settings.minio_secure, + "secure": settings.s3_secure, } return HealthCheckResult( - service="minio", + service="s3", status=status, response_time_ms=response_time, details=details, ) - except S3Error as e: + except ClientError as e: response_time = (time.time() - start_time) * 1000 logger.error( - "MinIO health check failed", + "S3 health check failed", error=str(e), response_time_ms=response_time, ) return HealthCheckResult( - service="minio", + service="s3", status=HealthStatus.UNHEALTHY, response_time_ms=response_time, - error=f"S3 Error: {e.message if hasattr(e, 'message') else str(e)}", + error=str(e), ) except Exception as e: response_time = (time.time() - start_time) * 1000 logger.error( - "MinIO health check failed", + "S3 health check failed", error=str(e), response_time_ms=response_time, ) return HealthCheckResult( - service="minio", + service="s3", status=HealthStatus.UNHEALTHY, response_time_ms=response_time, error=str(e), @@ -470,7 +472,7 @@ async def check_sandbox_pool(self) -> HealthCheckResult: details = { "enabled": True, - "architecture": "stateless", # Sandboxes destroyed after each execution + "architecture": "stateless", "total_available": total_available, "total_acquisitions": total_acquisitions, "pool_hits": pool_hits, diff --git a/src/services/orchestrator.py b/src/services/orchestrator.py index 71a87bb..afb64c2 100644 --- a/src/services/orchestrator.py +++ b/src/services/orchestrator.py @@ -16,6 +16,7 @@ """ import asyncio +import math from dataclasses import dataclass from datetime import datetime from typing import Any, Dict, List, Optional @@ -55,6 +56,12 @@ class ExecutionContext: request_id: str session_id: Optional[str] = None mounted_files: Optional[List[Dict[str, Any]]] = None + # Snapshot of (mtime_ns, size) per mounted-file basename, captured AFTER mount + # but BEFORE user code runs. Used by _handle_generated_files to detect + # in-place edits — files whose stats changed get surfaced as new generated + # FileRefs in the current session, so LibreChat tracks the new version on + # the next call. Empty when no files were mounted. + mounted_file_stats: Optional[Dict[str, tuple]] = None execution: Optional[Any] = None generated_files: Optional[List[FileRef]] = None stdout: str = "" @@ -148,10 +155,11 @@ async def execute( # Step 5.5: Save new state (Python only, before file handling) await self._save_state(ctx) - # Step 5.6: Update mounted files to capture in-place edits - await self._update_mounted_files_content(ctx) - - # Step 6: Handle generated files + # Step 6: Handle generated files. Includes in-place edits to mounted + # files now — runner._detect_generated_files compares pre-execution + # mtime/size against current state and surfaces edited files. Each + # such file becomes a new file_id owned by ctx.session_id, so + # LibreChat's next call references the updated content. ctx.generated_files = await self._handle_generated_files(ctx) # Step 7: Build response @@ -407,6 +415,13 @@ async def _mount_explicit_files( file_info.file_id, ) + file_metadata = await self.file_service.get_file_metadata( + file_ref.session_id, file_info.file_id + ) + is_read_only = ( + file_metadata.get("is_read_only") == "1" if file_metadata else False + ) + mounted.append( { "file_id": file_info.file_id, @@ -415,6 +430,8 @@ async def _mount_explicit_files( "size": file_info.size, "session_id": file_ref.session_id, "is_linked_input": False, + "entity_id": getattr(file_ref, "entity_id", None), + "is_read_only": is_read_only, } ) mounted_ids.add(key) @@ -450,6 +467,9 @@ async def _auto_mount_session_files( is_linked_input = ( file_metadata.get("type") == "linked_input" if file_metadata else False ) + is_read_only = ( + file_metadata.get("is_read_only") == "1" if file_metadata else False + ) # Skip duplicates (shouldn't happen, but defensive) key = (ctx.session_id, file_info.file_id) @@ -464,6 +484,7 @@ async def _auto_mount_session_files( "size": file_info.size, "session_id": ctx.session_id, "is_linked_input": is_linked_input, + "is_read_only": is_read_only, } ) mounted_ids.add(key) @@ -479,11 +500,11 @@ async def _auto_mount_session_files( return mounted async def _load_state(self, ctx: ExecutionContext) -> None: - """Load previous state from Redis (or MinIO fallback) for Python sessions. + """Load previous state from Redis (or S3 fallback) for Python sessions. Priority order: 1. Redis hot storage (within 2-hour TTL) - 2. MinIO cold storage (archived state) + 2. S3 cold storage (archived state) """ if not settings.state_persistence_enabled: return @@ -510,14 +531,14 @@ async def _load_state(self, ctx: ExecutionContext) -> None: ) return - # Try MinIO fallback (cold storage) + # Try S3 fallback (cold storage) if self.state_archival_service and settings.state_archive_enabled: ctx.initial_state = await self.state_archival_service.restore_state( ctx.session_id ) if ctx.initial_state: logger.debug( - "Restored state from MinIO", + "Restored state from S3", session_id=ctx.session_id[:12], state_size=len(ctx.initial_state), ) @@ -553,9 +574,9 @@ async def _save_state(self, ctx: ExecutionContext) -> None: max_redis_bytes = settings.state_max_redis_size_mb * 1024 * 1024 if raw_size > max_redis_bytes: - # Large state: store blob in MinIO, pointer in Redis + # Large state: store blob in S3, pointer in Redis logger.info( - "State exceeds Redis threshold, storing in MinIO", + "State exceeds Redis threshold, storing in S3", session_id=ctx.session_id[:12], state_size_mb=round(raw_size / 1024 / 1024, 1), threshold_mb=settings.state_max_redis_size_mb, @@ -572,9 +593,9 @@ async def _save_state(self, ctx: ExecutionContext) -> None: ttl_seconds=settings.state_ttl_seconds, ) else: - # MinIO failed, fall back to Redis anyway + # S3 archival failed, fall back to Redis anyway logger.warning( - "MinIO archival failed, falling back to Redis", + "S3 archival failed, falling back to Redis", session_id=ctx.session_id[:12], ) await self.state_service.save_state( @@ -604,96 +625,17 @@ async def _save_state(self, ctx: ExecutionContext) -> None: warning=error, ) - async def _update_mounted_files_content(self, ctx: ExecutionContext) -> None: - """Re-upload all mounted files to capture any modifications. - - This ensures in-place edits to mounted files persist after execution. - Called after execution completes, reads current content from container - and updates the file in MinIO storage. - - SECURITY: Only updates files that belong to the current session. - Files referenced from other sessions are read-only to prevent - cross-session/cross-user data modification. - """ - if not ctx.mounted_files or not ctx.container: - return - - sandbox_manager = self.execution_service.sandbox_manager - - for file_info in ctx.mounted_files: - try: - filename = file_info.get("filename") - file_id = file_info.get("file_id") - file_session_id = file_info.get("session_id") - - if not all([filename, file_id, file_session_id]): - continue - - # SECURITY: Only update files from the current session - # Files from other sessions are read-only - if file_session_id != ctx.session_id: - logger.debug( - "Skipping update for cross-session file", - filename=filename, - file_session=file_session_id[:12] if file_session_id else None, - exec_session=ctx.session_id[:12] if ctx.session_id else None, - ) - continue - - # SECURITY: Skip agent-assigned files (uploaded with entity_id) - # Agent files are read-only and cannot be modified by user code - file_metadata = await self.file_service.get_file_metadata( - file_session_id, file_id - ) - if file_metadata and file_metadata.get("is_agent_file") == "1": - logger.debug( - "Skipping update for agent-assigned file (read-only)", - filename=filename, - file_id=file_id, - ) - continue - - if file_metadata and file_metadata.get("is_read_only") == "1": - logger.debug( - "Skipping update for read-only linked file", - filename=filename, - file_id=file_id, - ) - continue - - # Read current content from container - file_path = f"/mnt/data/{filename}" - content = sandbox_manager.get_file_content_from_sandbox( - ctx.container, file_path - ) - - if content is None: - # File may have been deleted - that's ok - logger.debug( - "Mounted file not found after execution", - filename=filename, - ) - continue - - # Update file in storage - await self.file_service.update_file_content( - session_id=file_session_id, - file_id=file_id, - content=content, - ) - - logger.debug( - "Updated mounted file content", - filename=filename, - size=len(content), - ) - - except Exception as e: - logger.warning( - "Failed to update mounted file", - filename=file_info.get("filename"), - error=str(e), - ) + # NOTE: `_update_mounted_files_content` was removed in favor of letting + # `_handle_generated_files` handle in-place edits. The old in-place-update + # path silently dropped edits in three common scenarios (cross-session + # mounted files, agent-uploaded files, read-only linked aliases), and + # because the response carried no signal that an edit had occurred, + # LibreChat had no way to track the new content for the next call. The + # new model: if user code modifies a mounted file, the runner detects + # the mtime/size change and surfaces it as a regular generated file in + # the current session. Each iteration produces a fresh file_id which + # LibreChat then references on the next call. See `runner.py: + # _detect_generated_files` and `SandboxInfo.mounted_file_stats`. def _normalize_args(self, args: Any) -> Optional[List[str]]: """Normalize args parameter to List[str] or None. @@ -721,10 +663,18 @@ async def _execute_code(self, ctx: ExecutionContext) -> Any: # Normalize args from request normalized_args = self._normalize_args(ctx.request.args) + # Convert per-request timeout (ms) to seconds, clamped to server max. + timeout_seconds = ( + math.ceil(ctx.request.timeout / 1000) + if ctx.request.timeout + else settings.max_execution_time + ) + timeout_seconds = min(timeout_seconds, settings.max_execution_time) + exec_request = ExecuteCodeRequest( code=ctx.request.code, language=ctx.request.lang, - timeout=settings.max_execution_time, + timeout=timeout_seconds, args=normalized_args, ) @@ -762,7 +712,13 @@ async def _execute_code(self, ctx: ExecutionContext) -> Any: return execution async def _handle_generated_files(self, ctx: ExecutionContext) -> List[FileRef]: - """Handle files generated during execution.""" + """Handle files generated during execution. + + Preserves any subdirectory structure under `/mnt/data/` so files + like `/mnt/data/charts/foo.png` come back as `name="charts/foo.png"` + in the response. LibreChat (PR #12848) preserves these paths in its + own rendering — collapsing them here would break that. + """ generated = [] for output in ctx.execution.outputs: @@ -770,9 +726,44 @@ async def _handle_generated_files(self, ctx: ExecutionContext) -> List[FileRef]: continue file_path = output.content - filename = file_path.split("/")[-1] if "/" in file_path else file_path + relative = ( + file_path[len("/mnt/data/") :] + if file_path.startswith("/mnt/data/") + else file_path + ) - if not filename or filename.startswith("."): + # Skip hidden files (any segment starting with `.`). Done on the + # raw path because sanitize_filename rewrites `.foo` to `_.foo`, + # which would defeat the check. + raw_segments = [s for s in relative.replace("\\", "/").split("/") if s] + if not raw_segments or any(s.startswith(".") for s in raw_segments): + continue + + filename = OutputProcessor.sanitize_relative_path(relative) + if not filename or filename == "_": + continue + + meta = output.metadata or {} + + # Inherited files: untouched mounted files. Skip download and emit + # the original FileRef so clients can split "Generated" from + # "Available" in LLM prompts and avoid re-uploading. + if meta.get("inherited"): + generated.append( + FileRef( + id=meta["original_file_id"], + name=filename, + session_id=meta.get("original_session_id"), + inherited=True, + entity_id=meta.get("original_entity_id"), + ) + ) + logger.debug( + "Inherited file passed through", + session_id=ctx.session_id, + filename=filename, + original_file_id=meta.get("original_file_id"), + ) continue try: @@ -787,13 +778,17 @@ async def _handle_generated_files(self, ctx: ExecutionContext) -> List[FileRef]: file_content, ) - generated.append( - FileRef( - id=file_id, - name=filename, - session_id=ctx.session_id, # Include for cross-message persistence - ) + file_ref = FileRef( + id=file_id, + name=filename, + session_id=ctx.session_id, # Include for cross-message persistence ) + if meta.get("modified_from_id"): + file_ref.modified_from = { + "id": meta["modified_from_id"], + "session_id": meta.get("modified_from_session_id") or "", + } + generated.append(file_ref) logger.debug( "Generated file stored", session_id=ctx.session_id, diff --git a/src/services/programmatic.py b/src/services/programmatic.py index 43288e2..719e2dd 100644 --- a/src/services/programmatic.py +++ b/src/services/programmatic.py @@ -50,6 +50,7 @@ class PausedContext: sandbox_info: SandboxInfo process: asyncio.subprocess.Process session_id: str + lang: str = "py" round_trip_count: int = 0 timeout_handle: Optional[asyncio.TimerHandle] = None accumulated_stdout: str = "" @@ -82,53 +83,68 @@ async def start_execution( session_id: str, timeout: Optional[int] = None, files: Optional[List[PTCFileInput]] = None, + lang: str = "py", ) -> ProgrammaticExecResponse: """Start a new PTC execution. - Creates an nsjail sandbox, copies ptc_server.py into it, - and starts execution with the provided code and tools. + Creates an nsjail sandbox, copies the appropriate PTC server script + into it, and starts execution with the provided code and tools. Args: - code: Python code to execute + code: Code to execute (Python or bash, depending on `lang`) tools: Tool definitions available to the code session_id: Session identifier timeout: Execution timeout in seconds files: Optional referenced prior-session files to mount in sandbox + lang: PTC language. "py" runs ptc_server.py (asyncio + Python). + "bash" runs ptc_bash_server.py (Python wrapper that spawns + bash with one auto-generated function per tool). Returns: ProgrammaticExecResponse with status and optional tool_calls """ + if lang not in ("py", "bash"): + return ProgrammaticExecResponse( + status="error", + session_id=session_id, + error=f"Unsupported PTC lang: {lang!r}", + ) + execution_timeout = timeout or settings.max_execution_time execution_deadline = time.monotonic() + execution_timeout - # Create sandbox + # Bash PTC sandbox runs as the bash uid; python PTC sandbox runs as py. + sandbox_language = lang sandbox_info = self._sandbox_manager.create_sandbox( session_id=session_id, - language="py", + language=sandbox_language, repl_mode=False, ) try: - # Copy ptc_server.py into the sandbox data dir - ptc_server_path = Path("/opt/ptc_server.py") + ptc_server_filename = ( + "ptc_bash_server.py" if lang == "bash" else "ptc_server.py" + ) + ptc_server_path = Path("/opt") / ptc_server_filename if not ptc_server_path.exists(): # Fallback: try relative path (local development) ptc_server_path = ( - Path(__file__).parent.parent.parent / "docker" / "ptc_server.py" + Path(__file__).parent.parent.parent / "docker" / ptc_server_filename ) if ptc_server_path.exists(): self._sandbox_manager.copy_content_to_sandbox( sandbox_info, ptc_server_path.read_bytes(), - "/mnt/data/ptc_server.py", - language="py", + f"/mnt/data/{ptc_server_filename}", + language=sandbox_language, ) else: + self._sandbox_manager.destroy_sandbox(sandbox_info) return ProgrammaticExecResponse( status="error", session_id=session_id, - error="PTC server script not found", + error=f"PTC server script not found: {ptc_server_filename}", ) # Mount any provided files @@ -136,6 +152,7 @@ async def start_execution( file_error = await self._mount_requested_files( sandbox_info=sandbox_info, files=files, + language=sandbox_language, ) if file_error: self._sandbox_manager.destroy_sandbox(sandbox_info) @@ -145,18 +162,22 @@ async def start_execution( error=file_error, ) - # Build nsjail command - wrap in /bin/sh -c like SandboxExecutor - env = self._sandbox_manager.executor._build_sanitized_env("py") + # Both server scripts are launched via python3 — the bash variant + # is itself a Python wrapper that spawns bash internally. + env = self._sandbox_manager.executor._build_sanitized_env(sandbox_language) shell_command = [ "/bin/sh", "-c", - "python3 /mnt/data/ptc_server.py", + f"python3 /mnt/data/{ptc_server_filename}", ] nsjail_args = self._nsjail_config.build_args( sandbox_dir=str(sandbox_info.data_dir), command=shell_command, - language="py", + language=sandbox_language, timeout=execution_timeout, + # Honor ENABLE_SANDBOX_NETWORK so PTC sandboxes can also + # reach the inline egress proxy for skill installs. + network=bool(settings.enable_sandbox_network), env=env, ) @@ -165,6 +186,10 @@ async def start_execution( shlex.quote(str(a)) for a in [settings.nsjail_binary] + nsjail_args ) + tmpfs_size = settings.sandbox_tmpfs_size_mb + noexec_tmpfs = "noexec,nosuid,nodev," + deps_path = settings.skill_deps_path + wrapper_cmd = ( f"mount --bind {shlex.quote(str(sandbox_info.data_dir))} /mnt/data && " f"mount -t tmpfs -o size=1k tmpfs /var/lib/code-interpreter/sandboxes && " @@ -173,7 +198,18 @@ async def start_execution( f"mount -t tmpfs -o size=1k tmpfs /app/ssl && " f"mount -t tmpfs -o size=1k tmpfs /app/dashboard && " f"mount -t tmpfs -o size=1k tmpfs /app/src && " - f"mount --bind /tmp/empty_proc /proc && " + f"mount --bind /var/lib/code-interpreter/empty_proc /proc && " + # BUG-007: Ephemeral /tmp with noexec,nosuid,nodev + f"mount -t tmpfs -o {noexec_tmpfs}size={tmpfs_size}m,mode=1777 tmpfs /tmp && " + # BUG-008: Lock down other writable paths + f"mount -t tmpfs -o {noexec_tmpfs}size=1m,mode=1777 tmpfs /var/tmp && " + f"mount -t tmpfs -o {noexec_tmpfs}size=1m,mode=1777 tmpfs /run/lock && " + f"mount -t tmpfs -o {noexec_tmpfs}size=1m,mode=1733 tmpfs /var/lib/php/sessions && " + # BUG-008: skill-deps nosuid,nodev (not noexec — installed CLIs need exec) + f"(test -d {shlex.quote(deps_path)} && " + f"mount --bind {shlex.quote(deps_path)} {shlex.quote(deps_path)} && " + f"mount -o remount,bind,nosuid,nodev {shlex.quote(deps_path)} " + f"|| true) && " f"{nsjail_cmd}" ) @@ -215,6 +251,7 @@ async def start_execution( timeout=execution_timeout, execution_deadline=execution_deadline, execution_timeout_seconds=execution_timeout, + lang=lang, ) except Exception as e: @@ -317,6 +354,7 @@ async def continue_execution( accumulated_stdout=ctx.accumulated_stdout, accumulated_stderr=ctx.accumulated_stderr, round_trip_count=ctx.round_trip_count, + lang=ctx.lang, ) except Exception as e: @@ -343,6 +381,7 @@ async def _read_ptc_response( accumulated_stdout: str = "", accumulated_stderr: str = "", round_trip_count: int = 0, + lang: str = "py", ) -> ProgrammaticExecResponse: """Read and process a response from the PTC server subprocess. @@ -472,6 +511,7 @@ async def read_until_delimiter() -> None: sandbox_info=sandbox_info, process=proc, session_id=session_id, + lang=lang, round_trip_count=round_trip_count, accumulated_stdout=total_stdout, accumulated_stderr=total_stderr, @@ -559,10 +599,13 @@ async def _mount_requested_files( self, sandbox_info: SandboxInfo, files: List[PTCFileInput], + language: str = "py", ) -> Optional[str]: """Mount referenced prior-session files into the sandbox.""" for file_info in files: - error = await self._mount_referenced_file(sandbox_info, file_info) + error = await self._mount_referenced_file( + sandbox_info, file_info, language=language + ) if error: return error @@ -572,6 +615,7 @@ async def _mount_referenced_file( self, sandbox_info: SandboxInfo, file_info: PTCFileInput, + language: str = "py", ) -> Optional[str]: """Resolve a stored file reference and mount it into /mnt/data.""" if self._file_service is None: @@ -604,15 +648,22 @@ async def _mount_referenced_file( sandbox_info, content, f"/mnt/data/{filename}", - language="py", + language=language, ) return None def _normalize_mount_filename(self, filename: Optional[str]) -> str: - """Collapse any path-like input to a safe basename for /mnt/data.""" + """Sanitize filename for /mnt/data while preserving subdirectories. + + Aligned with Item 4b's sanitize_relative_path so PTC file mounts use + the same rules as the main /exec mount path. Filenames may legitimately + contain `/` (skill bundles, nested data); only `..` traversal is rejected. + """ + from .execution.output import OutputProcessor + candidate = (filename or "").strip() - normalized = Path(candidate).name - if not normalized: + normalized = OutputProcessor.sanitize_relative_path(candidate) + if not normalized or normalized == "_": raise ValueError("Referenced PTC file input must include a valid name") return normalized diff --git a/src/services/sandbox/egress_firewall.py b/src/services/sandbox/egress_firewall.py new file mode 100644 index 0000000..6afd3e5 --- /dev/null +++ b/src/services/sandbox/egress_firewall.py @@ -0,0 +1,174 @@ +"""iptables egress firewall for the sandbox uid. + +Without this, enabling ENABLE_SANDBOX_NETWORK shares the API container's +network namespace with sandbox processes, which gives them direct access to +internal services like Redis/S3 on the docker bridge — full SSRF. + +The hostname-allowlist proxy only protects HTTPS_PROXY-aware clients +(pip, npm, requests with proxy support). Raw socket calls — `socket.create_connection`, +direct TCP from a malicious skill — bypass the proxy entirely. + +This module installs iptables OUTPUT rules that match on the sandbox uid: + - ALLOW the sandbox uid → 127.0.0.1: (so pip etc. work) + - DROP everything else from the sandbox uid + +The API process itself runs as root (uid 0), so the proxy's own outbound +traffic to PyPI/npm/etc. is unaffected by these rules. + +Requires the container to have CAP_NET_ADMIN. If iptables fails (missing +binary, missing capability) we log a clear error and refuse to enable +network access — better to break loud than silently leak SSRF. +""" + +from __future__ import annotations + +import shutil +import subprocess +from typing import List + +import structlog + +logger = structlog.get_logger(__name__) + + +# Marker comment so we can find and remove our own rules without disturbing +# anyone else's. iptables --comment is supported by every modern build. +_RULE_COMMENT = "code-interpreter-sandbox-egress" + + +def _run_iptables(args: List[str]) -> tuple[int, str]: + """Run an iptables command. Returns (exit_code, combined_output).""" + iptables = shutil.which("iptables") + if iptables is None: + return 127, "iptables binary not found" + try: + proc = subprocess.run( + [iptables, *args], + capture_output=True, + text=True, + timeout=5, + ) + except subprocess.TimeoutExpired: + return 1, "iptables timed out" + except OSError as exc: + return 1, f"iptables failed to start: {exc}" + out = (proc.stdout or "") + (proc.stderr or "") + return proc.returncode, out.strip() + + +def remove_existing_rules() -> None: + """Idempotent cleanup of any rules left over from a previous run. + + iptables doesn't have a 'remove all rules matching comment X' verb, so + we list the OUTPUT chain and delete one rule at a time by line number. + Doing this in a loop because line numbers shift after each delete. + """ + while True: + code, out = _run_iptables(["-L", "OUTPUT", "--line-numbers", "-n"]) + if code != 0: + return + target_line: int | None = None + for line in out.splitlines(): + if _RULE_COMMENT in line: + first = line.split(None, 1)[0] + try: + target_line = int(first) + except ValueError: + continue + break + if target_line is None: + return + _run_iptables(["-D", "OUTPUT", str(target_line)]) + + +def install_sandbox_egress_rules(sandbox_uid: int, proxy_port: int) -> bool: + """Install iptables rules so the sandbox uid can only reach the proxy. + + Returns True on success, False if iptables isn't available or the rules + couldn't be installed (e.g., missing CAP_NET_ADMIN). + """ + # Clean up any rules we might have left from a previous start. + remove_existing_rules() + + # Order matters: ACCEPT rules must come before the catch-all DROP. + rules: List[List[str]] = [ + # Allow the sandbox uid to talk to the proxy on loopback. + [ + "-A", + "OUTPUT", + "-m", + "owner", + "--uid-owner", + str(sandbox_uid), + "-d", + "127.0.0.1", + "-p", + "tcp", + "--dport", + str(proxy_port), + "-m", + "comment", + "--comment", + _RULE_COMMENT, + "-j", + "ACCEPT", + ], + # Allow loopback traffic generally (DNS to systemd-resolved on 127.0.0.53, + # localhost-only services, etc.). The proxy enforces hostname allowlist + # for actual outbound; this just keeps the sandbox uid able to talk + # to itself if it ever needs to. + [ + "-A", + "OUTPUT", + "-m", + "owner", + "--uid-owner", + str(sandbox_uid), + "-o", + "lo", + "-m", + "comment", + "--comment", + _RULE_COMMENT, + "-j", + "ACCEPT", + ], + # Drop everything else from the sandbox uid. This is what blocks + # direct connections to Redis/S3/internet. + [ + "-A", + "OUTPUT", + "-m", + "owner", + "--uid-owner", + str(sandbox_uid), + "-m", + "comment", + "--comment", + _RULE_COMMENT, + "-j", + "REJECT", + "--reject-with", + "icmp-net-unreachable", + ], + ] + + for rule in rules: + code, out = _run_iptables(rule) + if code != 0: + logger.error( + "Failed to install sandbox egress firewall rule; " + "ROLLING BACK to avoid leaving the rule chain in a partial state", + rule=rule, + code=code, + output=out, + ) + remove_existing_rules() + return False + + logger.info( + "Sandbox egress firewall installed", + sandbox_uid=sandbox_uid, + proxy_port=proxy_port, + ) + return True diff --git a/src/services/sandbox/egress_proxy.py b/src/services/sandbox/egress_proxy.py new file mode 100644 index 0000000..4f87bfa --- /dev/null +++ b/src/services/sandbox/egress_proxy.py @@ -0,0 +1,313 @@ +"""HTTP CONNECT proxy with hostname allowlist for sandbox egress. + +Runs as an asyncio task inside the API process. Sandboxes that have +network access enabled get `HTTPS_PROXY=http://127.0.0.1:` injected +into their env; pip, npm, go, cargo all honor that variable. The proxy: + +- Only handles `CONNECT host:port HTTP/1.1` (HTTPS tunneling). The proxy + never sees the encrypted body — TLS terminates between the sandbox and + the upstream. Allowlist enforcement happens on the requested host name. +- Refuses to open tunnels to private IP ranges (RFC 1918, loopback, link-local) + even if a public hostname resolves to one. This stops trivial SSRF against + Redis/S3/etc. on the same docker network. +- Refuses any request whose host doesn't match the allowlist. + +Allowlist defaults cover Python (PyPI), Node (npmjs), Go modules, and +Rust crates so `pip install`, `npm install`, `go get`, `cargo add` work +out of the box. Add more via SANDBOX_EGRESS_ALLOWLIST=host1,host2. +""" + +from __future__ import annotations + +import asyncio +import ipaddress +import socket +from typing import Iterable, Optional, Set + +import structlog + +logger = structlog.get_logger(__name__) + + +# Defaults cover the major package ecosystems for skills. Operators can +# extend via SANDBOX_EGRESS_ALLOWLIST. Subdomains are matched as suffixes +# (e.g., `pypi.org` permits `files.pypi.org`). +DEFAULT_ALLOWLIST: tuple[str, ...] = ( + # Python (PyPI) + "pypi.org", + "files.pythonhosted.org", + "pythonhosted.org", + # Node (npm + npx) + "registry.npmjs.org", + "registry.npmjs.com", + "npmjs.org", + "npmjs.com", + # Go modules + "proxy.golang.org", + "sum.golang.org", + "golang.org", + # Rust crates + "crates.io", + "static.crates.io", + "index.crates.io", +) + + +def _normalize_host(host: str) -> str: + """Lowercase and strip an optional surrounding `[ipv6]` notation.""" + host = host.strip().lower() + if host.startswith("[") and host.endswith("]"): + host = host[1:-1] + return host + + +def _is_private_ip(host: str) -> bool: + """True if `host` is an IP literal that's loopback, private, link-local, or otherwise non-public.""" + try: + ip = ipaddress.ip_address(host) + except ValueError: + return False + return ( + ip.is_private + or ip.is_loopback + or ip.is_link_local + or ip.is_multicast + or ip.is_reserved + or ip.is_unspecified + ) + + +def _matches_allowlist(host: str, allowlist: Set[str]) -> bool: + """True if `host` exactly matches an entry or is a subdomain of one.""" + host = _normalize_host(host) + if host in allowlist: + return True + # Subdomain match: `files.pypi.org` is allowed when `pypi.org` is in the list. + return any(host.endswith("." + entry) for entry in allowlist) + + +async def _resolve_first_addr(host: str, port: int) -> Optional[tuple[str, int]]: + """Resolve `host` once and return the first concrete (ip, port) pair. + + We resolve here instead of letting `asyncio.open_connection` do it so we can + reject the tunnel early if the host resolves only to private IPs. Returns + None if the host fails to resolve or all addresses are private. + """ + loop = asyncio.get_event_loop() + try: + infos = await loop.getaddrinfo( + host, + port, + type=socket.SOCK_STREAM, + proto=socket.IPPROTO_TCP, + ) + except (socket.gaierror, OSError): + return None + for family, _stype, _proto, _canon, sockaddr in infos: + ip = sockaddr[0] + if _is_private_ip(ip): + continue + return ip, port + return None + + +async def _pipe( + src: asyncio.StreamReader, + dst: asyncio.StreamWriter, +) -> None: + """Copy bytes from `src` to `dst` until EOF or write failure.""" + try: + while True: + chunk = await src.read(65536) + if not chunk: + break + dst.write(chunk) + await dst.drain() + except (ConnectionResetError, BrokenPipeError, asyncio.IncompleteReadError): + pass + finally: + try: + dst.close() + except Exception: + pass + + +class EgressProxy: + """An asyncio CONNECT proxy with hostname allowlist enforcement. + + Bind to 127.0.0.1 only — sandboxes share the host network namespace + when network access is enabled, so 127.0.0.1 is reachable from inside. + No external listener. + """ + + def __init__( + self, + port: int, + allowlist: Iterable[str] = DEFAULT_ALLOWLIST, + bind_host: str = "127.0.0.1", + ): + self.port = port + self.bind_host = bind_host + self.allowlist: Set[str] = {h.strip().lower() for h in allowlist if h.strip()} + self._server: Optional[asyncio.base_events.Server] = None + self._serve_task: Optional[asyncio.Task] = None + + async def start(self) -> None: + if self._server is not None: + return + self._server = await asyncio.start_server( + self._handle_client, + host=self.bind_host, + port=self.port, + ) + self._serve_task = asyncio.create_task(self._server.serve_forever()) + logger.info( + "Sandbox egress proxy started", + bind=f"{self.bind_host}:{self.port}", + allowlist_size=len(self.allowlist), + ) + + async def stop(self) -> None: + if self._server is None: + return + self._server.close() + try: + await self._server.wait_closed() + except Exception: + pass + if self._serve_task is not None: + self._serve_task.cancel() + try: + await self._serve_task + except (asyncio.CancelledError, Exception): + pass + self._server = None + self._serve_task = None + logger.info("Sandbox egress proxy stopped") + + async def _handle_client( + self, + client_reader: asyncio.StreamReader, + client_writer: asyncio.StreamWriter, + ) -> None: + peer = client_writer.get_extra_info("peername") + try: + request_line = await asyncio.wait_for(client_reader.readline(), timeout=5) + except (asyncio.TimeoutError, ConnectionError): + client_writer.close() + return + + if not request_line: + client_writer.close() + return + + # Drain headers (we don't act on them, but clients send them). + try: + while True: + line = await asyncio.wait_for(client_reader.readline(), timeout=5) + if not line or line == b"\r\n": + break + except asyncio.TimeoutError: + await self._reply_and_close(client_writer, 408, "Request Timeout") + return + + method, _, target = request_line.decode("latin-1", errors="replace").partition( + " " + ) + method = method.upper() + if method != "CONNECT": + # Plain HTTP proxying isn't supported (and shouldn't be needed — + # pip etc. all use HTTPS). Reject with a clear status. + logger.warning( + "Egress proxy refused non-CONNECT request", + method=method, + peer=peer, + ) + await self._reply_and_close(client_writer, 405, "Method Not Allowed") + return + + target_host_port = target.split(" ", 1)[0] + host, _, port_str = target_host_port.rpartition(":") + try: + port = int(port_str) + except ValueError: + await self._reply_and_close(client_writer, 400, "Bad Request") + return + host = _normalize_host(host) + + # Allowlist check on the host *before* we resolve it, so audit logs show + # the requested host even when DNS would have failed. + if _is_private_ip(host): + logger.warning( + "Egress proxy refused private IP literal", host=host, peer=peer + ) + await self._reply_and_close(client_writer, 403, "Forbidden") + return + if not _matches_allowlist(host, self.allowlist): + logger.warning( + "Egress proxy refused non-allowlisted host", host=host, peer=peer + ) + await self._reply_and_close(client_writer, 403, "Forbidden") + return + + # Resolve and reject if it only points at private space. + resolved = await _resolve_first_addr(host, port) + if resolved is None: + logger.warning( + "Egress proxy could not resolve host to public address", + host=host, + peer=peer, + ) + await self._reply_and_close(client_writer, 502, "Bad Gateway") + return + + ip, _ = resolved + try: + upstream_reader, upstream_writer = await asyncio.wait_for( + asyncio.open_connection(ip, port), + timeout=10, + ) + except (asyncio.TimeoutError, OSError) as e: + logger.warning( + "Egress proxy upstream connect failed", + host=host, + ip=ip, + error=str(e), + ) + await self._reply_and_close(client_writer, 502, "Bad Gateway") + return + + logger.debug( + "Egress proxy tunnel opened", + host=host, + ip=ip, + port=port, + peer=peer, + ) + + client_writer.write(b"HTTP/1.1 200 Connection Established\r\n\r\n") + try: + await client_writer.drain() + except ConnectionError: + upstream_writer.close() + return + + await asyncio.gather( + _pipe(client_reader, upstream_writer), + _pipe(upstream_reader, client_writer), + return_exceptions=True, + ) + + @staticmethod + async def _reply_and_close( + writer: asyncio.StreamWriter, status: int, reason: str + ) -> None: + try: + writer.write(f"HTTP/1.1 {status} {reason}\r\n\r\n".encode("ascii")) + await writer.drain() + except (ConnectionError, RuntimeError): + pass + try: + writer.close() + except Exception: + pass diff --git a/src/services/sandbox/executor.py b/src/services/sandbox/executor.py index e90ef9e..fae13f6 100644 --- a/src/services/sandbox/executor.py +++ b/src/services/sandbox/executor.py @@ -65,8 +65,11 @@ async def execute_command( # Use absolute path since nsjail uses execve (no PATH search) shell_command = ["/bin/sh", "-c", command] - # Build nsjail arguments - network = False # nsjail sandboxes run without network access + # Network access is operator-controlled via ENABLE_SANDBOX_NETWORK. + # Default off (sandboxes are isolated). When on, sandboxes share the + # host network namespace so they can reach the inline egress proxy + # at 127.0.0.1, which then enforces the package-registry allowlist. + network = bool(settings.enable_sandbox_network) nsjail_args = self._nsjail_config.build_args( sandbox_dir=str(sandbox_info.data_dir), command=shell_command, @@ -84,14 +87,29 @@ async def execute_command( shlex.quote(str(a)) for a in [settings.nsjail_binary] + nsjail_args ) # BUG-003: Mask /proc for most languages. - # Java and Rust need /proc/self/exe to locate shared libraries - # (JVM needs libjli.so, rustc needs its own binary path). - # For these languages, /proc remains accessible (known limitation). + # Some languages need /proc to function: + # - Java needs /proc/self/exe to locate libjli.so. + # - Rust needs /proc/self/exe to locate its own binary path. + # - Bash sandboxes are the typical entry point for skills (e.g., + # the Anthropic pptx/docx/xlsx skills) that shell out to + # LibreOffice (`soffice`) for PDF/image conversion. soffice + # hard-fails with "ERROR: /proc not mounted - LibreOffice is + # unlikely to work well if at all" without /proc. + # nsjail still creates a separate PID namespace so the visible + # /proc is restricted to the sandbox's own processes — main host + # info disclosure risk is /proc/cpuinfo and /proc/meminfo, which + # is acceptable in the trusted-tenant model these languages run in. lang = sandbox_info.language.lower().strip() - if lang in ("java", "rs"): + if lang in ("java", "rs", "bash"): proc_mask = "" else: - proc_mask = "mount --bind /tmp/empty_proc /proc && " + proc_mask = ( + "mount --bind /var/lib/code-interpreter/empty_proc /proc && " + ) + + tmpfs_size = settings.sandbox_tmpfs_size_mb + noexec_tmpfs = "noexec,nosuid,nodev," + deps_path = settings.skill_deps_path wrapper_cmd = ( # Bind sandbox dir to /mnt/data (before hiding sandboxes dir) @@ -108,6 +126,17 @@ async def execute_command( f"mount -t tmpfs -o size=1k tmpfs /app/src && " # BUG-003: Hide /proc (except Java which needs /proc/self/exe) f"{proc_mask}" + # BUG-007: Ephemeral /tmp with noexec,nosuid,nodev + f"mount -t tmpfs -o {noexec_tmpfs}size={tmpfs_size}m,mode=1777 tmpfs /tmp && " + # BUG-008: Lock down other writable paths + f"mount -t tmpfs -o {noexec_tmpfs}size=1m,mode=1777 tmpfs /var/tmp && " + f"mount -t tmpfs -o {noexec_tmpfs}size=1m,mode=1777 tmpfs /run/lock && " + f"mount -t tmpfs -o {noexec_tmpfs}size=1m,mode=1733 tmpfs /var/lib/php/sessions && " + # BUG-008: skill-deps nosuid,nodev (not noexec — installed CLIs need exec) + f"(test -d {shlex.quote(deps_path)} && " + f"mount --bind {shlex.quote(deps_path)} {shlex.quote(deps_path)} && " + f"mount -o remount,bind,nosuid,nodev {shlex.quote(deps_path)} " + f"|| true) && " # Execute nsjail f"{nsjail_cmd}" ) @@ -163,6 +192,7 @@ async def execute_command( def _build_sanitized_env(self, language: Optional[str]) -> Dict[str, str]: """Build environment whitelist for execution.""" normalized_lang = (language or "").lower().strip() + deps_root = settings.skill_deps_path # e.g. /opt/skill-deps env_whitelist: Dict[str, str] = { "PATH": "/usr/local/bin:/usr/bin:/bin", @@ -171,11 +201,15 @@ def _build_sanitized_env(self, language: Optional[str]) -> Dict[str, str]: } if normalized_lang in {"py", "python"}: + # PYTHONPATH includes the persistent skill-deps cache so installs + # from earlier executions (or other sessions) are importable. The + # cache lives under /opt/skill-deps and is mounted from a Docker + # named volume so it survives container restarts. env_whitelist.update( { "PYTHONUNBUFFERED": "1", "PYTHONDONTWRITEBYTECODE": "1", - "PYTHONPATH": "/mnt/data", + "PYTHONPATH": f"{deps_root}/python:/mnt/data", "MPLCONFIGDIR": "/tmp/mplconfig", "XDG_CACHE_HOME": "/tmp/.cache", "MPLBACKEND": "Agg", @@ -184,7 +218,9 @@ def _build_sanitized_env(self, language: Optional[str]) -> Dict[str, str]: elif normalized_lang in {"js", "ts"}: env_whitelist.update( { - "NODE_PATH": "/usr/local/lib/node_modules", + "NODE_PATH": ( + f"{deps_root}/node/lib/node_modules:/usr/local/lib/node_modules" + ), } ) elif normalized_lang == "java": @@ -250,6 +286,67 @@ def _build_sanitized_env(self, language: Optional[str]) -> Dict[str, str]: ) # bash and d use default PATH/HOME/TMPDIR only + # When sandbox network access is enabled, route outbound HTTPS through + # the inline egress proxy (allowlist-enforced) and point EVERY + # package manager at the persistent skill-deps cache. We set all of + # these regardless of `language` because skills routinely shell out + # — a bash skill might `pip install`, `npm install -g`, `go get`, + # etc. Limiting these to the matching language broke the bash case + # (no NPM_CONFIG_PREFIX → `npm -g` tries /usr/lib/node_modules). + # The proxy listens on 127.0.0.1 inside the API container's network + # namespace; sandboxes share that namespace via nsjail's + # --disable_clone_newnet so 127.0.0.1 reaches the proxy. + if settings.enable_sandbox_network: + proxy_url = f"http://127.0.0.1:{settings.sandbox_egress_port}" + env_whitelist.update( + { + "HTTPS_PROXY": proxy_url, + "https_proxy": proxy_url, + "HTTP_PROXY": proxy_url, + "http_proxy": proxy_url, + "NO_PROXY": "127.0.0.1,localhost", + "no_proxy": "127.0.0.1,localhost", + # Python: pip installs land in the persistent cache. + "PIP_TARGET": f"{deps_root}/python", + "PIP_DISABLE_PIP_VERSION_CHECK": "1", + # Node: -g installs land in the persistent cache. + "NPM_CONFIG_PREFIX": f"{deps_root}/node", + "NPM_CONFIG_CACHE": f"{deps_root}/node/.npm-cache", + # Go: module cache is persistent. + "GOPATH": f"{deps_root}/go", + "GOMODCACHE": f"{deps_root}/go/pkg/mod", + # Rust: crates.io cache is persistent. + "CARGO_HOME": f"{deps_root}/cargo", + } + ) + # Make installed binaries immediately usable on PATH (npm -g, pip + # console scripts, cargo bins). Prepend so they win over system + # equivalents inside the sandbox. + env_whitelist["PATH"] = ( + f"{deps_root}/node/bin:{deps_root}/python/bin:" + f"{deps_root}/cargo/bin:{deps_root}/go/bin:" + f"{env_whitelist['PATH']}" + ) + # Runtime import paths so freshly-installed packages are loadable + # without further config. These have to be set for EVERY language + # (not just py/js) because skills routinely shell out — a bash + # skill might `node -e "require('foo')"` after `npm install -g foo`. + # If a language already set its own PYTHONPATH/NODE_PATH above, + # prepend the deps cache so it wins for newly-installed packages. + existing_pythonpath = env_whitelist.get("PYTHONPATH", "") + env_whitelist["PYTHONPATH"] = ( + f"{deps_root}/python:{existing_pythonpath}" + if existing_pythonpath + else f"{deps_root}/python:/mnt/data" + ) + existing_node_path = env_whitelist.get("NODE_PATH", "") + node_dep_path = f"{deps_root}/node/lib/node_modules" + env_whitelist["NODE_PATH"] = ( + f"{node_dep_path}:{existing_node_path}" + if existing_node_path + else f"{node_dep_path}:/usr/local/lib/node_modules" + ) + return env_whitelist def _escape_env_value(self, value: str) -> str: diff --git a/src/services/sandbox/manager.py b/src/services/sandbox/manager.py index c9ebf2e..cee35fc 100644 --- a/src/services/sandbox/manager.py +++ b/src/services/sandbox/manager.py @@ -165,21 +165,50 @@ def copy_content_to_sandbox( Args: sandbox_info: Target sandbox content: File content as bytes - dest_path: Destination path (e.g., /mnt/data/file.py or file.py) + dest_path: Destination path. May be absolute (`/mnt/data/foo.py`, + `/mnt/data/skills/foo/SKILL.md`) or relative (`foo.py`). + Subdirectories under `/mnt/data/` are preserved; their parent + directories are created and chowned to the language uid. language: Programming language (used to set correct ownership) Returns: True if successful, False otherwise """ try: - # Extract filename from dest_path (may be absolute like /mnt/data/file.py) - filename = Path(dest_path).name - file_path = sandbox_info.data_dir / filename + user_id = get_user_id_for_language(language.lower().strip()) + + # Strip the bind-mount prefix so the remainder maps cleanly under + # data_dir; relative paths fall through unchanged. + relative = dest_path + if relative.startswith("/mnt/data/"): + relative = relative[len("/mnt/data/") :] + elif relative == "/mnt/data": + relative = "" + + # Use Path semantics to drop empty components but otherwise keep + # subdirectories. This is the one place we accept paths with `/` + # because the caller already controls them. + relative_path = Path(relative) + file_path = sandbox_info.data_dir / relative_path + + parent = file_path.parent + if parent != sandbox_info.data_dir and parent.is_relative_to( + sandbox_info.data_dir + ): + parent.mkdir(parents=True, exist_ok=True) + # Chown each ancestor we may have created so the sandbox uid + # can traverse into the subdirectory. + for ancestor in [parent, *parent.parents]: + if ancestor == sandbox_info.data_dir: + break + try: + os.chown(str(ancestor), user_id, user_id) + os.chmod(str(ancestor), 0o755) + except (PermissionError, FileNotFoundError): + pass file_path.write_bytes(content) - # Set ownership to language-specific user - user_id = get_user_id_for_language(language.lower().strip()) os.chown(str(file_path), user_id, user_id) os.chmod(str(file_path), 0o644) diff --git a/src/services/sandbox/nsjail.py b/src/services/sandbox/nsjail.py index c3a2494..f9d83d9 100644 --- a/src/services/sandbox/nsjail.py +++ b/src/services/sandbox/nsjail.py @@ -7,7 +7,7 @@ from dataclasses import dataclass, field from datetime import datetime from pathlib import Path -from typing import Dict, List, Optional +from typing import Dict, List, Optional, Tuple import structlog @@ -33,6 +33,14 @@ class SandboxInfo: created_at: datetime repl_mode: bool = False labels: Dict[str, str] = field(default_factory=dict) + # Snapshot of (mtime_ns, size) for each mounted file basename, captured + # right after _mount_files_to_sandbox writes the file but BEFORE user code + # runs. Used by _detect_generated_files to distinguish "user edited a + # mounted file in place" from "mounted file is unchanged" so iterative + # edits to scripts get persisted as new file_ids in the current session. + mounted_file_stats: Dict[ + str, Tuple[int, int, Optional[str], Optional[str], Optional[str]] + ] = field(default_factory=dict) @property def id(self) -> str: @@ -194,14 +202,21 @@ def build_args( # Seccomp policy: block dangerous syscalls # - ptrace: prevents process inspection/debugging (BUG-006a) - # - bind: prevents opening server sockets even with network access (BUG-006c) + # - bind: was originally blocked to prevent server sockets even with + # network access (BUG-006c), but bash sandboxes need it for tools + # like LibreOffice which use AF_UNIX sockets internally for IPC. + # Bash has the looser sandboxing model (also gets /proc), so allow + # bind there. For other languages, keep blocking. # Using ERRNO(1) so the process gets EPERM rather than SIGSYS - args.extend( - [ - "--seccomp_string", - "POLICY policy { ERRNO(1) { ptrace, bind } } USE policy DEFAULT ALLOW", - ] - ) + if normalized_lang == "bash": + seccomp_policy = ( + "POLICY policy { ERRNO(1) { ptrace } } USE policy DEFAULT ALLOW" + ) + else: + seccomp_policy = ( + "POLICY policy { ERRNO(1) { ptrace, bind } } USE policy DEFAULT ALLOW" + ) + args.extend(["--seccomp_string", seccomp_policy]) # Working directory: /mnt/data (bind-mounted by the executor wrapper) args.extend(["--cwd", "/mnt/data"]) diff --git a/src/services/sandbox/pool.py b/src/services/sandbox/pool.py index 197c289..9f3fa63 100644 --- a/src/services/sandbox/pool.py +++ b/src/services/sandbox/pool.py @@ -378,6 +378,9 @@ async def _start_repl_process( command=["/usr/bin/python3", "/opt/repl_server.py"], language="py", repl_mode=True, + # Honor ENABLE_SANDBOX_NETWORK so pooled REPL sandboxes can + # also reach the inline egress proxy for skill installs. + network=bool(settings.enable_sandbox_network), env=env, ) @@ -387,6 +390,10 @@ async def _start_repl_process( nsjail_cmd = " ".join( shlex.quote(str(a)) for a in [settings.nsjail_binary] + nsjail_args ) + tmpfs_size = settings.sandbox_tmpfs_size_mb + noexec_tmpfs = "noexec,nosuid,nodev," + deps_path = settings.skill_deps_path + wrapper_cmd = ( # Bind sandbox dir to /mnt/data (before hiding sandboxes dir) f"mount --bind {shlex.quote(str(sandbox_info.data_dir))} /mnt/data && " @@ -401,7 +408,18 @@ async def _start_repl_process( f"mount -t tmpfs -o size=1k tmpfs /app/dashboard && " f"mount -t tmpfs -o size=1k tmpfs /app/src && " # BUG-003: Hide /proc (REPL is Python-only, always safe to mask) - f"mount --bind /tmp/empty_proc /proc && " + f"mount --bind /var/lib/code-interpreter/empty_proc /proc && " + # BUG-007: Ephemeral /tmp with noexec,nosuid,nodev + f"mount -t tmpfs -o {noexec_tmpfs}size={tmpfs_size}m,mode=1777 tmpfs /tmp && " + # BUG-008: Lock down other writable paths + f"mount -t tmpfs -o {noexec_tmpfs}size=1m,mode=1777 tmpfs /var/tmp && " + f"mount -t tmpfs -o {noexec_tmpfs}size=1m,mode=1777 tmpfs /run/lock && " + f"mount -t tmpfs -o {noexec_tmpfs}size=1m,mode=1733 tmpfs /var/lib/php/sessions && " + # BUG-008: skill-deps nosuid,nodev (not noexec — installed CLIs need exec) + f"(test -d {shlex.quote(deps_path)} && " + f"mount --bind {shlex.quote(deps_path)} {shlex.quote(deps_path)} && " + f"mount -o remount,bind,nosuid,nodev {shlex.quote(deps_path)} " + f"|| true) && " # Execute nsjail f"{nsjail_cmd}" ) diff --git a/src/services/session.py b/src/services/session.py index 58fbbca..638fd37 100644 --- a/src/services/session.py +++ b/src/services/session.py @@ -99,21 +99,19 @@ async def _cleanup_loop(self) -> None: else: logger.debug("No expired sessions to clean up") - # Opportunistically prune orphan MinIO objects (configurable) - if self._file_service and settings.enable_orphan_minio_cleanup: + # Opportunistically prune orphan S3 objects (configurable) + if self._file_service and settings.enable_orphan_s3_cleanup: try: deleted_orphans = ( await self._file_service.cleanup_orphan_objects() ) if deleted_orphans: logger.info( - "Pruned orphan MinIO objects", + "Pruned orphan S3 objects", deleted_orphans=deleted_orphans, ) except Exception as e: - logger.error( - "Failed pruning orphan MinIO objects", error=str(e) - ) + logger.error("Failed pruning orphan S3 objects", error=str(e)) # Wait for the configured cleanup interval await asyncio.sleep(settings.session_cleanup_interval_minutes * 60) diff --git a/src/services/state.py b/src/services/state.py index d26aca9..e3c99c6 100644 --- a/src/services/state.py +++ b/src/services/state.py @@ -9,7 +9,7 @@ Hybrid storage: - Hot storage: Redis with configurable TTL (default 2 hours) -- Cold storage: MinIO for long-term archival (handled by StateArchivalService) +- Cold storage: S3 for long-term archival (handled by StateArchivalService) Storage format: - Redis storage: Base64-encoded @@ -158,11 +158,11 @@ async def save_state_pointer( state_b64: str, ttl_seconds: Optional[int] = None, ) -> Tuple[bool, Optional[str]]: - """Save only hash and metadata to Redis (state blob stored in MinIO). + """Save only hash and metadata to Redis (state blob stored in S3). Used when state exceeds the Redis size threshold. The full state - is stored in MinIO; Redis only holds the hash and metadata for - fast lookups. The orchestrator's _load_state MinIO fallback + is stored in S3; Redis only holds the hash and metadata for + fast lookups. The orchestrator's _load_state S3 fallback handles retrieval. Args: @@ -192,7 +192,7 @@ async def save_state_pointer( "size_bytes": len(raw_bytes), "hash": state_hash, "created_at": now.isoformat(), - "storage": "minio", + "storage": "s3", } ) pipe.setex(self._meta_key(session_id), ttl_seconds, meta) @@ -200,7 +200,7 @@ async def save_state_pointer( await pipe.execute() logger.info( - "Saved state pointer to Redis (blob in MinIO)", + "Saved state pointer to Redis (blob in S3)", session_id=session_id[:12], state_size=len(raw_bytes), hash=state_hash[:12], diff --git a/src/services/state_archival.py b/src/services/state_archival.py index e95a7b8..2d05d12 100644 --- a/src/services/state_archival.py +++ b/src/services/state_archival.py @@ -1,18 +1,18 @@ -"""State archival service for MinIO cold storage. +"""State archival service for S3 cold storage. -This service handles archiving Python session states from Redis to MinIO +This service handles archiving Python session states from Redis to S3 for long-term storage, and restoring them on demand. Hybrid storage architecture: - Hot storage: Redis with 2-hour TTL (fast access) -- Cold storage: MinIO with 7-day TTL (long-term archival) +- Cold storage: S3 with configurable TTL (long-term archival) When a state is accessed: 1. Check Redis first (hot storage) -2. If not found, check MinIO (cold storage) -3. If found in MinIO, restore to Redis +2. If not found, check S3 (cold storage) +3. If found in S3, restore to Redis -States are archived to MinIO when: +States are archived to S3 when: - TTL in Redis drops below archive_after_seconds threshold - This indicates the session has been inactive for a while """ @@ -22,9 +22,9 @@ from datetime import datetime, timezone from typing import Optional, Dict, Any +import boto3 import structlog -from minio import Minio -from minio.error import S3Error +from botocore.exceptions import ClientError from ..config import settings from .state import StateService @@ -33,74 +33,79 @@ class StateArchivalService: - """Manages archiving and restoring Python session states to/from MinIO. + """Manages archiving and restoring Python session states to/from S3. - States are stored in MinIO under the path: + States are stored in S3 under the path: states/{session_id}/state.dat - Metadata is stored as object tags/custom metadata: + Metadata is stored as S3 object metadata: - archived_at: ISO timestamp - original_size: Size before any host-side compression - session_id: The session identifier """ - # MinIO path prefix for archived states STATE_PREFIX = "states" def __init__( self, state_service: Optional[StateService] = None, - minio_client: Optional[Minio] = None, + s3_client: Optional[Any] = None, ): """Initialize the archival service. Args: state_service: StateService instance for Redis operations - minio_client: Optional MinIO client (creates new one if not provided) + s3_client: Optional boto3 S3 client (creates new one if not provided) """ self.state_service = state_service or StateService() - self.minio_client = minio_client or Minio( - settings.minio_endpoint, - access_key=settings.minio_access_key, - secret_key=settings.minio_secret_key, - secure=settings.minio_secure, + self.s3_client = s3_client or boto3.client( + "s3", + endpoint_url=settings.s3.endpoint_url, + aws_access_key_id=settings.s3_access_key, + aws_secret_access_key=settings.s3_secret_key, + region_name=settings.s3_region, ) - self.bucket_name = settings.minio_bucket + self.bucket_name = settings.s3_bucket self._bucket_checked = False def _get_state_object_key(self, session_id: str) -> str: - """Generate MinIO object key for a session state.""" + """Generate S3 object key for a session state.""" return f"{self.STATE_PREFIX}/{session_id}/state.dat" async def _ensure_bucket_exists(self) -> None: - """Ensure the MinIO bucket exists.""" + """Ensure the S3 bucket exists.""" if self._bucket_checked: return try: loop = asyncio.get_event_loop() - bucket_exists = await loop.run_in_executor( - None, self.minio_client.bucket_exists, self.bucket_name - ) - - if not bucket_exists: + try: await loop.run_in_executor( - None, self.minio_client.make_bucket, self.bucket_name - ) - logger.info( - "Created MinIO bucket for state archival", bucket=self.bucket_name + None, + lambda: self.s3_client.head_bucket(Bucket=self.bucket_name), ) + except ClientError as e: + if e.response["Error"]["Code"] in ("404", "NoSuchBucket"): + await loop.run_in_executor( + None, + lambda: self.s3_client.create_bucket(Bucket=self.bucket_name), + ) + logger.info( + "Created S3 bucket for state archival", bucket=self.bucket_name + ) + else: + raise self._bucket_checked = True - except S3Error as e: + except ClientError as e: logger.error( "Failed to ensure bucket exists", error=str(e), bucket=self.bucket_name ) raise async def archive_state(self, session_id: str, state_data: str) -> bool: - """Archive a session state to MinIO. + """Archive a session state to S3. Args: session_id: Session identifier @@ -115,31 +120,29 @@ async def archive_state(self, session_id: str, state_data: str) -> bool: object_key = self._get_state_object_key(session_id) state_bytes = state_data.encode("utf-8") - # Create metadata metadata = { "archived_at": datetime.now(timezone.utc).isoformat(), "original_size": str(len(state_bytes)), "session_id": session_id, } - # Upload to MinIO loop = asyncio.get_event_loop() data_stream = io.BytesIO(state_bytes) await loop.run_in_executor( None, - lambda: self.minio_client.put_object( - self.bucket_name, - object_key, - data_stream, - len(state_bytes), - content_type="application/octet-stream", - metadata=metadata, + lambda: self.s3_client.put_object( + Bucket=self.bucket_name, + Key=object_key, + Body=data_stream, + ContentLength=len(state_bytes), + ContentType="application/octet-stream", + Metadata=metadata, ), ) logger.info( - "Archived state to MinIO", + "Archived state to S3", session_id=session_id[:12], size_bytes=len(state_bytes), object_key=object_key, @@ -153,7 +156,7 @@ async def archive_state(self, session_id: str, state_data: str) -> bool: return False async def restore_state(self, session_id: str) -> Optional[str]: - """Restore a session state from MinIO. + """Restore a session state from S3. If found, the state is also saved back to Redis for fast access. @@ -169,17 +172,16 @@ async def restore_state(self, session_id: str) -> Optional[str]: object_key = self._get_state_object_key(session_id) loop = asyncio.get_event_loop() - # Check if object exists try: response = await loop.run_in_executor( None, - lambda: self.minio_client.get_object(self.bucket_name, object_key), + lambda: self.s3_client.get_object( + Bucket=self.bucket_name, Key=object_key + ), ) - state_bytes = response.read() - response.close() - response.release_conn() - except S3Error as e: - if e.code == "NoSuchKey": + state_bytes = response["Body"].read() + except ClientError as e: + if e.response["Error"]["Code"] == "NoSuchKey": logger.debug("No archived state found", session_id=session_id[:12]) return None raise @@ -197,18 +199,17 @@ async def restore_state(self, session_id: str) -> Optional[str]: session_id, state_data, ttl_seconds=settings.state_ttl_seconds ) else: - # Too large for Redis — save only pointer await self.state_service.save_state_pointer( session_id, state_data, ttl_seconds=settings.state_ttl_seconds ) logger.info( - "State too large for Redis, kept in MinIO only", + "State too large for Redis, kept in S3 only", session_id=session_id[:12], state_size_mb=round(raw_size / 1024 / 1024, 1), ) logger.info( - "Restored state from MinIO", + "Restored state from S3", session_id=session_id[:12], size_bytes=len(state_bytes), ) @@ -221,7 +222,7 @@ async def restore_state(self, session_id: str) -> Optional[str]: return None async def delete_archived_state(self, session_id: str) -> bool: - """Delete an archived state from MinIO. + """Delete an archived state from S3. Args: session_id: Session identifier @@ -235,23 +236,17 @@ async def delete_archived_state(self, session_id: str) -> bool: object_key = self._get_state_object_key(session_id) loop = asyncio.get_event_loop() + # boto3 delete_object is idempotent — no error on missing key await loop.run_in_executor( None, - lambda: self.minio_client.remove_object(self.bucket_name, object_key), + lambda: self.s3_client.delete_object( + Bucket=self.bucket_name, Key=object_key + ), ) logger.debug("Deleted archived state", session_id=session_id[:12]) return True - except S3Error as e: - if e.code == "NoSuchKey": - return True # Already doesn't exist - logger.error( - "Failed to delete archived state", - session_id=session_id[:12], - error=str(e), - ) - return False except Exception as e: logger.error( "Failed to delete archived state", @@ -261,7 +256,7 @@ async def delete_archived_state(self, session_id: str) -> bool: return False async def has_archived_state(self, session_id: str) -> bool: - """Check if a session has archived state in MinIO. + """Check if a session has archived state in S3. Args: session_id: Session identifier @@ -278,11 +273,13 @@ async def has_archived_state(self, session_id: str) -> bool: try: await loop.run_in_executor( None, - lambda: self.minio_client.stat_object(self.bucket_name, object_key), + lambda: self.s3_client.head_object( + Bucket=self.bucket_name, Key=object_key + ), ) return True - except S3Error as e: - if e.code == "NoSuchKey": + except ClientError as e: + if e.response["Error"]["Code"] == "404": return False raise @@ -295,7 +292,7 @@ async def has_archived_state(self, session_id: str) -> bool: return False async def archive_inactive_states(self) -> Dict[str, Any]: - """Archive inactive states from Redis to MinIO. + """Archive inactive states from Redis to S3. This is the main archival task that runs periodically. It finds states with low TTL (indicating inactivity) and archives them. @@ -313,22 +310,18 @@ async def archive_inactive_states(self) -> Dict[str, Any]: } try: - # Find states ready for archival states_to_archive = await self.state_service.get_states_for_archival() for session_id, remaining_ttl, size in states_to_archive: try: - # Check if already archived if await self.has_archived_state(session_id): summary["already_archived"] += 1 continue - # Get the state data state_data = await self.state_service.get_state(session_id) if not state_data: continue - # Archive to MinIO if await self.archive_state(session_id, state_data): summary["archived"] += 1 else: @@ -379,22 +372,22 @@ async def cleanup_expired_archives(self) -> Dict[str, Any]: ttl_days = settings.state_archive_ttl_days cutoff = datetime.now(timezone.utc).timestamp() - (ttl_days * 24 * 3600) - # List all archived states objects = await loop.run_in_executor( None, lambda: list( - self.minio_client.list_objects( - self.bucket_name, prefix=prefix, recursive=True - ) + self.s3_client.get_paginator("list_objects_v2") + .paginate(Bucket=self.bucket_name, Prefix=prefix) + .search("Contents[]") ), ) - for obj in objects: + for entry in objects: + if entry is None: + continue try: - # Check object age - if obj.last_modified and obj.last_modified.timestamp() < cutoff: - # Extract session_id from path - parts = obj.object_name.split("/") + last_modified = entry.get("LastModified") + if last_modified and last_modified.timestamp() < cutoff: + parts = entry["Key"].split("/") if len(parts) >= 2: session_id = parts[1] if await self.delete_archived_state(session_id): @@ -405,7 +398,7 @@ async def cleanup_expired_archives(self) -> Dict[str, Any]: except Exception as e: logger.warning( "Failed to cleanup archived state", - object_name=obj.object_name, + object_name=entry.get("Key"), error=str(e), ) summary["failed"] += 1 diff --git a/src/utils/config_validator.py b/src/utils/config_validator.py index e44f65d..237ed6a 100644 --- a/src/utils/config_validator.py +++ b/src/utils/config_validator.py @@ -4,8 +4,8 @@ import shutil from typing import List, Dict, Any import redis -from minio import Minio -from minio.error import S3Error +import boto3 +from botocore.exceptions import ClientError from ..config import settings @@ -38,7 +38,7 @@ def validate_all(self) -> bool: # Validate external services self._validate_redis_connection() - self._validate_minio_connection() + self._validate_s3_connection() self._validate_nsjail() # Log results @@ -121,40 +121,40 @@ def _validate_redis_connection(self): else: self.errors.append(f"Redis validation error: {e}") - def _validate_minio_connection(self): - """Validate MinIO/S3 connection.""" + def _validate_s3_connection(self): + """Validate S3 storage connection.""" try: - client = Minio( - settings.minio_endpoint, - access_key=settings.minio_access_key, - secret_key=settings.minio_secret_key, - secure=settings.minio_secure, + client = boto3.client( + "s3", + endpoint_url=settings.s3.endpoint_url, + aws_access_key_id=settings.s3_access_key, + aws_secret_access_key=settings.s3_secret_key, + region_name=settings.s3_region, ) # Test connection by listing buckets - buckets = list(client.list_buckets()) + response = client.list_buckets() + buckets = response.get("Buckets", []) # Check if our bucket exists bucket_exists = any( - bucket.name == settings.minio_bucket for bucket in buckets + bucket["Name"] == settings.s3_bucket for bucket in buckets ) if not bucket_exists: self.warnings.append( - f"MinIO bucket '{settings.minio_bucket}' does not exist - will be created" + f"S3 bucket '{settings.s3_bucket}' does not exist - will be created" ) - except S3Error as e: - # Treat as warning in development mode to allow startup without MinIO + except ClientError as e: if settings.api_debug: - self.warnings.append(f"MinIO S3 error: {e}") + self.warnings.append(f"S3 error: {e}") else: - self.errors.append(f"MinIO S3 error: {e}") + self.errors.append(f"S3 error: {e}") except Exception as e: - # Treat as warning in development mode if settings.api_debug: - self.warnings.append(f"MinIO validation error: {e}") + self.warnings.append(f"S3 validation error: {e}") else: - self.errors.append(f"MinIO validation error: {e}") + self.errors.append(f"S3 validation error: {e}") def _validate_nsjail(self): """Validate nsjail sandbox availability.""" diff --git a/src/utils/logging.py b/src/utils/logging.py index 5d178e6..8155fa7 100644 --- a/src/utils/logging.py +++ b/src/utils/logging.py @@ -95,7 +95,8 @@ def configure_third_party_loggers() -> None: """Configure logging levels for third-party libraries.""" # Reduce noise from third-party libraries logging.getLogger("urllib3").setLevel(logging.WARNING) - logging.getLogger("minio").setLevel(logging.WARNING) + logging.getLogger("botocore").setLevel(logging.WARNING) + logging.getLogger("boto3").setLevel(logging.WARNING) # Suppress uvicorn access logs - RequestLoggingMiddleware handles this # with status-aware levels (DEBUG for 2xx, WARNING for 4xx, ERROR for 5xx). diff --git a/src/utils/request_helpers.py b/src/utils/request_helpers.py index 069a02d..bc282bf 100644 --- a/src/utils/request_helpers.py +++ b/src/utils/request_helpers.py @@ -4,6 +4,7 @@ the middleware and dependencies layers. """ +import base64 from typing import Optional from fastapi import Request @@ -11,7 +12,17 @@ def extract_api_key(request: Request) -> Optional[str]: """Extract API key from request headers. - Only checks the x-api-key header. + Checks two sources in order: + 1. x-api-key header (preserved for backwards compatibility with older + LibreChat versions and reverse-proxy setups that inject this header). + 2. Authorization: Basic header (single-token convention, matching how + Stripe / DigitalOcean / GitHub PATs work). Current LibreChat versions + no longer send x-api-key but axios/node-fetch will automatically + convert URL-embedded credentials (LIBRECHAT_CODE_BASEURL=https://KEY@host/v1) + into a Basic auth header. + + The x-api-key header wins when both are present so deployments using a + reverse-proxy injection pattern have deterministic behavior. Args: request: FastAPI Request object @@ -19,7 +30,22 @@ def extract_api_key(request: Request) -> Optional[str]: Returns: API key string or None if not found """ - return request.headers.get("x-api-key") + key = request.headers.get("x-api-key") + if key: + return key + + auth = request.headers.get("authorization", "") + if auth.lower().startswith("basic "): + try: + decoded = base64.b64decode(auth.split(" ", 1)[1]).decode( + "utf-8", errors="replace" + ) + except Exception: + return None + user, _, password = decoded.partition(":") + return user or password or None + + return None def get_client_ip(request: Request) -> str: diff --git a/tests/conftest.py b/tests/conftest.py index fd279da..37e432c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -5,7 +5,6 @@ import pytest import redis.asyncio as redis -from minio import Minio # Set test environment before importing config # These match the docker-compose infrastructure settings @@ -13,10 +12,10 @@ os.environ.setdefault("API_KEY", "test-api-key-for-testing-12345") os.environ.setdefault("REDIS_HOST", "localhost") os.environ.setdefault("REDIS_PORT", "6379") -os.environ.setdefault("MINIO_ENDPOINT", "localhost:9000") -os.environ.setdefault("MINIO_ACCESS_KEY", "minioadmin") -os.environ.setdefault("MINIO_SECRET_KEY", "minioadmin") -os.environ.setdefault("MINIO_SECURE", "false") +os.environ.setdefault("S3_ENDPOINT", "localhost:3900") +os.environ.setdefault("S3_ACCESS_KEY", "GKminioadmin0000") +os.environ.setdefault("S3_SECRET_KEY", "minioadminsecret") +os.environ.setdefault("S3_SECURE", "false") from src.services.session import SessionService from src.services.execution import CodeExecutionService @@ -48,19 +47,17 @@ def mock_redis(): @pytest.fixture -def mock_minio(): - """Mock MinIO client for testing.""" - mock_client = MagicMock(spec=Minio) - - # Mock common MinIO operations - mock_client.bucket_exists.return_value = True - mock_client.make_bucket.return_value = None - mock_client.presigned_put_object.return_value = "https://example.com/upload" - mock_client.presigned_get_object.return_value = "https://example.com/download" - mock_client.stat_object.return_value = MagicMock(size=1024) - mock_client.put_object.return_value = None - mock_client.get_object.return_value = MagicMock() - mock_client.remove_object.return_value = None +def mock_s3_client(): + """Mock S3 client for testing.""" + mock_client = MagicMock() + + mock_client.head_bucket.return_value = {} + mock_client.create_bucket.return_value = {} + mock_client.generate_presigned_url.return_value = "https://example.com/presigned" + mock_client.head_object.return_value = {"ContentLength": 1024} + mock_client.put_object.return_value = {} + mock_client.get_object.return_value = {"Body": MagicMock(read=MagicMock(return_value=b""))} + mock_client.delete_object.return_value = {} return mock_client @@ -116,14 +113,16 @@ def execution_service(mock_sandbox_manager): @pytest.fixture -def file_service(mock_minio, mock_redis): +def file_service(mock_s3_client, mock_redis): """Create FileService instance with mocked dependencies.""" - with patch("src.services.file.Minio", return_value=mock_minio), patch( - "src.services.file.redis.Redis", return_value=mock_redis + with patch("src.services.file.boto3") as mock_boto3, patch( + "src.services.file.redis.from_url", return_value=mock_redis ): + mock_boto3.client.return_value = mock_s3_client service = FileService() yield service + @pytest.fixture def mock_settings(): """Mock settings for testing.""" @@ -135,11 +134,12 @@ def mock_settings(): mock_settings.redis_url = None mock_settings.session_ttl_hours = 24 mock_settings.session_cleanup_interval_minutes = 60 - mock_settings.minio_endpoint = "localhost:9000" - mock_settings.minio_access_key = "test_key" - mock_settings.minio_secret_key = "test_secret" - mock_settings.minio_secure = False - mock_settings.minio_bucket = "test-bucket" + mock_settings.s3_endpoint = "localhost:3900" + mock_settings.s3_access_key = "test_key" + mock_settings.s3_secret_key = "test_secret" + mock_settings.s3_secure = False + mock_settings.s3_bucket = "test-bucket" + mock_settings.s3_region = "garage" mock_settings.api_key = "test-api-key-12345" mock_settings.max_execution_time = 30 mock_settings.max_file_size_mb = 10 @@ -151,6 +151,8 @@ def mock_settings(): ) yield mock_settings + + # ============================================================================ # Integration Test Fixtures # ============================================================================ @@ -170,6 +172,7 @@ def auth_headers(): """Provide authentication headers for integration tests.""" return {"x-api-key": "test-api-key-for-testing-12345"} + def pytest_collection_modifyitems(config, items): """Apply shared markers based on the suite layer.""" contract_only_files = ( @@ -185,9 +188,7 @@ def pytest_collection_modifyitems(config, items): "tests/functional/test_mounted_file_edits.py", "tests/functional/test_timing.py", ) - client_replay_files = ( - "tests/functional/test_client_replay.py", - ) + client_replay_files = ("tests/functional/test_client_replay.py",) for item in items: path = Path(str(item.fspath)).as_posix() diff --git a/tests/functional/test_client_replay.py b/tests/functional/test_client_replay.py index 6990b4b..187eefd 100644 --- a/tests/functional/test_client_replay.py +++ b/tests/functional/test_client_replay.py @@ -140,7 +140,8 @@ async def test_uploaded_files_follow_runtime_session_when_first_exec_has_no_outp injected_files=upload_refs, ) assert "a,b" in first["stdout"] - assert first["files"] == [] + generated = [f for f in first["files"] if not f.get("inherited")] + assert generated == [], f"Expected no generated files, got: {generated}" second = await _exec_like_runtime( async_client, diff --git a/tests/functional/test_concurrent_file_exec.py b/tests/functional/test_concurrent_file_exec.py index 0d28da0..8e3d3a0 100644 --- a/tests/functional/test_concurrent_file_exec.py +++ b/tests/functional/test_concurrent_file_exec.py @@ -1,7 +1,7 @@ """Functional tests for concurrent execution with large file uploads. Regression test for event loop blocking bug: when large files (>40MB) are -downloaded from MinIO during file mounting, response.read() blocks the +downloaded from S3 during file mounting, response.read() blocks the asyncio event loop, starving all concurrent HTTP connections. This manifests as "socket hang up" errors in clients like LibreChat. @@ -13,11 +13,12 @@ import httpx import pytest - # 50MB of CSV data — large enough to trigger measurable event loop blocking LARGE_FILE_SIZE_MB = 50 LARGE_CSV_ROW = b"col1,col2,col3,col4,col5,col6,col7,col8\n" -LARGE_CSV_DATA = LARGE_CSV_ROW * (LARGE_FILE_SIZE_MB * 1024 * 1024 // len(LARGE_CSV_ROW)) +LARGE_CSV_DATA = LARGE_CSV_ROW * ( + LARGE_FILE_SIZE_MB * 1024 * 1024 // len(LARGE_CSV_ROW) +) # Threshold: concurrent pings must complete within this time (seconds). # Without the fix, pings take 8-11s due to event loop blocking. @@ -46,7 +47,10 @@ async def test_large_file_exec_does_not_block_concurrent_requests( data = {"entity_id": unique_entity_id} upload_resp = await async_client.post( - "/upload", headers=upload_headers, files=files, data=data, + "/upload", + headers=upload_headers, + files=files, + data=data, timeout=120.0, ) assert upload_resp.status_code == 200, f"Upload failed: {upload_resp.text}" diff --git a/tests/functional/test_exec_workflow.py b/tests/functional/test_exec_workflow.py index 17885b1..9e4c5ed 100644 --- a/tests/functional/test_exec_workflow.py +++ b/tests/functional/test_exec_workflow.py @@ -219,16 +219,10 @@ async def test_file_ref_does_not_leak_session_across_users( assert "shared content" in r_b.json()["stdout"] # Neither user should reuse the upload session - assert session_a != upload_session, ( - "User A should not reuse the upload session" - ) - assert session_b != upload_session, ( - "User B should not reuse the upload session" - ) + assert session_a != upload_session, "User A should not reuse the upload session" + assert session_b != upload_session, "User B should not reuse the upload session" # Each user should get a different session - assert session_a != session_b, ( - "Different users should get different sessions" - ) + assert session_a != session_b, "Different users should get different sessions" class TestStatePersistence: diff --git a/tests/functional/test_files.py b/tests/functional/test_files.py index 3a084c0..6e057f6 100644 --- a/tests/functional/test_files.py +++ b/tests/functional/test_files.py @@ -219,9 +219,9 @@ async def test_detail_full_has_original_filename_metadata( for item in data: assert "metadata" in item, "Full detail must include 'metadata'" - assert "original-filename" in item["metadata"], ( - "metadata must include 'original-filename'" - ) + assert ( + "original-filename" in item["metadata"] + ), "metadata must include 'original-filename'" assert isinstance(item["metadata"]["original-filename"], str) assert len(item["metadata"]["original-filename"]) > 0 @@ -341,9 +341,7 @@ async def test_uploaded_file_readable_at_mnt_data( ), "lang": "py", "session_id": session_id, - "files": [ - {"id": file_id, "session_id": session_id, "name": filename} - ], + "files": [{"id": file_id, "session_id": session_id, "name": filename}], }, ) @@ -379,9 +377,7 @@ async def test_uploaded_file_readable_via_relative_path( "code": f"print(open('{filename}').read())", "lang": "py", "session_id": session_id, - "files": [ - {"id": file_id, "session_id": session_id, "name": filename} - ], + "files": [{"id": file_id, "session_id": session_id, "name": filename}], }, ) @@ -428,9 +424,7 @@ async def test_upload_execute_generate_download( ), "lang": "py", "session_id": session_id, - "files": [ - {"id": file_id, "session_id": session_id, "name": filename} - ], + "files": [{"id": file_id, "session_id": session_id, "name": filename}], }, ) diff --git a/tests/functional/test_mounted_file_edits.py b/tests/functional/test_mounted_file_edits.py index 3dfdcfb..32089af 100644 --- a/tests/functional/test_mounted_file_edits.py +++ b/tests/functional/test_mounted_file_edits.py @@ -1,16 +1,29 @@ -"""Functional tests for mounted file edit persistence against a live API.""" +"""Functional tests for mounted file edit persistence against a live API. + +Modified mounted files surface as new generated outputs with fresh file_ids +(not in-place overwrites of the original S3 object). The exec response +includes a `modified_from` reference back to the original upload. LibreChat +downloads the new file_id to capture the edited content. +""" import pytest +def _find_modified_file(exec_result, original_file_id): + """Find the generated file entry that was modified from the original.""" + for f in exec_result.get("files", []): + modified_from = f.get("modified_from") + if modified_from and modified_from.get("id") == original_file_id: + return f + return None + + class TestMountedFileEdits: - """Verify in-place edits to mounted files persist after execution.""" + """Verify in-place edits to mounted files surface as new generated outputs.""" @pytest.mark.asyncio - async def test_overwrite_mounted_file_persists( - self, async_client, auth_headers - ): - """Overwriting a mounted user file should persist the new content.""" + async def test_overwrite_mounted_file_persists(self, async_client, auth_headers): + """Overwriting a mounted file should produce a new output with modified content.""" upload = await async_client.post( "/upload", headers={"x-api-key": auth_headers["x-api-key"]}, @@ -38,20 +51,26 @@ async def test_overwrite_mounted_file_persists( }, ) assert execute.status_code == 200, execute.text - assert "File modified" in execute.json()["stdout"] + exec_result = execute.json() + assert "File modified" in exec_result["stdout"] + + modified = _find_modified_file(exec_result, file_id) + assert modified is not None, ( + f"No modified_from entry for {file_id} in files: {exec_result['files']}" + ) + assert modified.get("inherited") is not True + assert modified["name"] == "test.txt" download = await async_client.get( - f"/download/{session_id}/{file_id}", + f"/download/{session_id}/{modified['id']}", headers=auth_headers, ) assert download.status_code == 200 assert download.text == "modified content" @pytest.mark.asyncio - async def test_append_to_mounted_file_persists( - self, async_client, auth_headers - ): - """Appending to a mounted file should persist all new lines.""" + async def test_append_to_mounted_file_persists(self, async_client, auth_headers): + """Appending to a mounted file should produce a new output with all lines.""" upload = await async_client.post( "/upload", headers={"x-api-key": auth_headers["x-api-key"]}, @@ -78,9 +97,16 @@ async def test_append_to_mounted_file_persists( }, ) assert execute.status_code == 200, execute.text + exec_result = execute.json() + + modified = _find_modified_file(exec_result, file_id) + assert modified is not None, ( + f"No modified_from entry for {file_id} in files: {exec_result['files']}" + ) + assert modified.get("inherited") is not True download = await async_client.get( - f"/download/{session_id}/{file_id}", + f"/download/{session_id}/{modified['id']}", headers=auth_headers, ) assert download.status_code == 200 @@ -115,7 +141,9 @@ async def test_delete_mounted_file_does_not_error( "os.remove('/mnt/data/temp.txt')\n" "print('File deleted')\n" ), - "files": [{"id": file_id, "session_id": session_id, "name": "temp.txt"}], + "files": [ + {"id": file_id, "session_id": session_id, "name": "temp.txt"} + ], }, ) assert execute.status_code == 200, execute.text @@ -124,10 +152,8 @@ async def test_delete_mounted_file_does_not_error( assert "Failed to update mounted file" not in execute_result["stderr"] @pytest.mark.asyncio - async def test_edit_csv_file_persists( - self, async_client, auth_headers - ): - """Editing a mounted CSV file should persist the transformed data.""" + async def test_edit_csv_file_persists(self, async_client, auth_headers): + """Editing a mounted CSV file should produce a new output with transformed data.""" upload = await async_client.post( "/upload", headers={"x-api-key": auth_headers["x-api-key"]}, @@ -151,14 +177,23 @@ async def test_edit_csv_file_persists( "print('csv updated')\n" ), "session_id": session_id, - "files": [{"id": file_id, "session_id": session_id, "name": "data.csv"}], + "files": [ + {"id": file_id, "session_id": session_id, "name": "data.csv"} + ], }, ) assert execute.status_code == 200, execute.text - assert "csv updated" in execute.json()["stdout"] + exec_result = execute.json() + assert "csv updated" in exec_result["stdout"] + + modified = _find_modified_file(exec_result, file_id) + assert modified is not None, ( + f"No modified_from entry for {file_id} in files: {exec_result['files']}" + ) + assert modified.get("inherited") is not True download = await async_client.get( - f"/download/{session_id}/{file_id}", + f"/download/{session_id}/{modified['id']}", headers=auth_headers, ) assert download.status_code == 200 diff --git a/tests/functional/test_ptc.py b/tests/functional/test_ptc.py index df0594f..f1bcaa3 100644 --- a/tests/functional/test_ptc.py +++ b/tests/functional/test_ptc.py @@ -7,18 +7,14 @@ class TestPTCInitialExecution: """Test POST /exec/programmatic with initial code execution.""" @pytest.mark.asyncio - async def test_ptc_simple_code_completes( - self, async_client, auth_headers - ): + async def test_ptc_simple_code_completes(self, async_client, auth_headers): """PTC request with code that doesn't call any tools completes immediately.""" response = await async_client.post( "/exec/programmatic", headers=auth_headers, json={ "code": "print('hello from ptc')", - "tools": [ - {"name": "unused_tool", "description": "Not called"} - ], + "tools": [{"name": "unused_tool", "description": "Not called"}], }, ) @@ -29,9 +25,7 @@ async def test_ptc_simple_code_completes( assert "hello from ptc" in data["stdout"] @pytest.mark.asyncio - async def test_ptc_response_has_all_fields( - self, async_client, auth_headers - ): + async def test_ptc_response_has_all_fields(self, async_client, auth_headers): """PTC response includes all expected fields.""" response = await async_client.post( "/exec/programmatic", @@ -54,9 +48,7 @@ async def test_ptc_response_has_all_fields( assert "error" in data @pytest.mark.asyncio - async def test_ptc_no_code_returns_error( - self, async_client, auth_headers - ): + async def test_ptc_no_code_returns_error(self, async_client, auth_headers): """PTC request without code or continuation_token returns error.""" response = await async_client.post( "/exec/programmatic", @@ -70,18 +62,14 @@ async def test_ptc_no_code_returns_error( assert data["error"] is not None @pytest.mark.asyncio - async def test_ptc_timeout_uses_milliseconds( - self, async_client, auth_headers - ): + async def test_ptc_timeout_uses_milliseconds(self, async_client, auth_headers): """A 1000ms timeout should behave like a 1 second execution budget.""" response = await async_client.post( "/exec/programmatic", headers=auth_headers, json={ "code": ( - "import time\n" - "time.sleep(5)\n" - "print('should not complete')" + "import time\n" "time.sleep(5)\n" "print('should not complete')" ), "tools": [], "timeout": 1000, @@ -140,19 +128,14 @@ class TestPTCToolCallFlow: """Test the full PTC tool call round-trip: code calls tool, we supply result.""" @pytest.mark.asyncio - async def test_ptc_tool_call_and_continuation( - self, async_client, auth_headers - ): + async def test_ptc_tool_call_and_continuation(self, async_client, auth_headers): """Full PTC round-trip: code calls a tool, receives result, completes.""" # Step 1: Send code that calls a tool initial_response = await async_client.post( "/exec/programmatic", headers=auth_headers, json={ - "code": ( - "result = await get_number()\n" - "print(f'got: {result}')" - ), + "code": ("result = await get_number()\n" "print(f'got: {result}')"), "tools": [ { "name": "get_number", @@ -199,18 +182,13 @@ async def test_ptc_tool_call_and_continuation( assert "got: 42" in result["stdout"] @pytest.mark.asyncio - async def test_ptc_tool_with_arguments( - self, async_client, auth_headers - ): + async def test_ptc_tool_with_arguments(self, async_client, auth_headers): """Tool call passes arguments correctly.""" initial = await async_client.post( "/exec/programmatic", headers=auth_headers, json={ - "code": ( - "result = await add(a=3, b=7)\n" - "print(f'sum={result}')" - ), + "code": ("result = await add(a=3, b=7)\n" "print(f'sum={result}')"), "tools": [ { "name": "add", @@ -258,9 +236,7 @@ async def test_ptc_tool_with_arguments( assert "sum=10" in result["stdout"] @pytest.mark.asyncio - async def test_ptc_tool_error_result( - self, async_client, auth_headers - ): + async def test_ptc_tool_error_result(self, async_client, auth_headers): """Tool result with is_error=true is handled by the code.""" initial = await async_client.post( "/exec/programmatic", @@ -273,9 +249,7 @@ async def test_ptc_tool_error_result( "except Exception as e:\n" " print(f'caught: {e}')" ), - "tools": [ - {"name": "failing_tool", "description": "Will fail"} - ], + "tools": [{"name": "failing_tool", "description": "Will fail"}], }, ) @@ -311,18 +285,14 @@ class TestPTCInvalidToken: """Test PTC continuation with invalid/expired tokens.""" @pytest.mark.asyncio - async def test_ptc_invalid_continuation_token( - self, async_client, auth_headers - ): + async def test_ptc_invalid_continuation_token(self, async_client, auth_headers): """Invalid continuation token returns error status.""" response = await async_client.post( "/exec/programmatic", headers=auth_headers, json={ "continuation_token": "nonexistent-token-xyz", - "tool_results": [ - {"call_id": "fake-call", "result": "data"} - ], + "tool_results": [{"call_id": "fake-call", "result": "data"}], }, ) diff --git a/tests/functional/test_timing.py b/tests/functional/test_timing.py index f6906cc..4567dde 100644 --- a/tests/functional/test_timing.py +++ b/tests/functional/test_timing.py @@ -113,5 +113,3 @@ async def test_download_under_5s( assert response.status_code == 200 assert latency < 5.0, f"Download took {latency:.1f}s, expected < 5s" - - diff --git a/tests/integration/test_api_contracts.py b/tests/integration/test_api_contracts.py index e48fbcd..59f1221 100644 --- a/tests/integration/test_api_contracts.py +++ b/tests/integration/test_api_contracts.py @@ -133,7 +133,7 @@ def mock_file_service(): created_at=datetime.utcnow(), path="/test.txt", ) - service.download_file.return_value = "https://minio.example.com/download-url" + service.download_file.return_value = "https://s3.example.com/download-url" service.validate_uploads = MagicMock(return_value=None) return service diff --git a/tests/integration/test_auth_basic.py b/tests/integration/test_auth_basic.py new file mode 100644 index 0000000..26558e3 --- /dev/null +++ b/tests/integration/test_auth_basic.py @@ -0,0 +1,143 @@ +"""Integration tests for HTTP Basic auth in URL credentials. + +LibreChat (since librechat-agents commit dd3de99, April 2026) no longer sends +the X-API-Key header. Operators wanting per-client auth point LibreChat at +`https://KEY@your-api/v1` — axios/node-fetch automatically generate +`Authorization: Basic base64(KEY:)`. These tests verify our server accepts +that pattern, with x-api-key still taking precedence when both are present. +""" + +import base64 +from unittest.mock import AsyncMock, patch + +import pytest +from fastapi.testclient import TestClient + +from src.main import app + +VALID_KEY = "test-api-key-for-testing-12345" + + +def _basic_header(token_pair: str) -> str: + return "Basic " + base64.b64encode(token_pair.encode()).decode() + + +@pytest.fixture +def client(): + return TestClient(app) + + +@pytest.fixture +def mock_services(): + """Mock service deps so the request gets past handler stage; we only care about auth.""" + from src.dependencies.services import ( + get_session_service, + get_execution_service, + get_file_service, + get_state_service, + get_state_archival_service, + ) + + mocks = { + get_session_service: AsyncMock(), + get_execution_service: AsyncMock(), + get_file_service: AsyncMock(), + get_state_service: AsyncMock(), + get_state_archival_service: AsyncMock(), + } + for dep, mock in mocks.items(): + app.dependency_overrides[dep] = lambda m=mock: m + + yield mocks + app.dependency_overrides.clear() + + +class TestBasicAuthAccepted: + def test_valid_basic_auth_passes_authentication(self, client, mock_services): + """LibreChat-style URL credentials => Authorization: Basic base64(KEY:).""" + with patch("src.services.auth.settings") as mock_settings: + mock_settings.api_key = VALID_KEY + response = client.get( + "/files/some-session-id", + headers={"authorization": _basic_header(f"{VALID_KEY}:")}, + ) + assert response.status_code != 401, response.text + + def test_basic_auth_via_testclient_auth_param(self, client, mock_services): + """End-to-end: TestClient.auth=(KEY, '') generates the same header axios would.""" + with patch("src.services.auth.settings") as mock_settings: + mock_settings.api_key = VALID_KEY + response = client.get("/files/some-session-id", auth=(VALID_KEY, "")) + assert response.status_code != 401, response.text + + def test_invalid_basic_auth_rejected(self, client, mock_services): + with patch("src.services.auth.settings") as mock_settings: + mock_settings.api_key = VALID_KEY + response = client.get( + "/files/some-session-id", + headers={"authorization": _basic_header("wrong-key:")}, + ) + assert response.status_code == 401 + + def test_basic_auth_with_password_field_uses_username(self, client, mock_services): + """Conventionally the key is the username; password is empty. Verify username wins.""" + with patch("src.services.auth.settings") as mock_settings: + mock_settings.api_key = VALID_KEY + response = client.get( + "/files/some-session-id", + headers={ + "authorization": _basic_header(f"{VALID_KEY}:ignored-password") + }, + ) + assert response.status_code != 401 + + +class TestPrecedence: + def test_x_api_key_wins_when_both_present(self, client, mock_services): + """If both headers present, x-api-key is used (deterministic for proxy setups).""" + with patch("src.services.auth.settings") as mock_settings: + mock_settings.api_key = VALID_KEY + response = client.get( + "/files/some-session-id", + headers={ + "x-api-key": VALID_KEY, + "authorization": _basic_header("wrong-key:"), + }, + ) + assert response.status_code != 401 + + def test_invalid_x_api_key_does_not_fall_back_to_basic(self, client, mock_services): + """If x-api-key is present but invalid, we reject — no quiet Basic fallback.""" + with patch("src.services.auth.settings") as mock_settings: + mock_settings.api_key = VALID_KEY + response = client.get( + "/files/some-session-id", + headers={ + "x-api-key": "wrong-key", + "authorization": _basic_header(f"{VALID_KEY}:"), + }, + ) + assert response.status_code == 401 + + +class TestNonBasicSchemesRejected: + def test_bearer_still_rejected(self, client, mock_services): + response = client.get( + "/files/some-session-id", + headers={"authorization": f"Bearer {VALID_KEY}"}, + ) + assert response.status_code == 401 + + def test_apikey_scheme_still_rejected(self, client, mock_services): + response = client.get( + "/files/some-session-id", + headers={"authorization": f"ApiKey {VALID_KEY}"}, + ) + assert response.status_code == 401 + + def test_malformed_basic_auth_rejected(self, client, mock_services): + response = client.get( + "/files/some-session-id", + headers={"authorization": "Basic !!!not-base64!!!"}, + ) + assert response.status_code == 401 diff --git a/tests/integration/test_auth_disabled.py b/tests/integration/test_auth_disabled.py new file mode 100644 index 0000000..7d41719 --- /dev/null +++ b/tests/integration/test_auth_disabled.py @@ -0,0 +1,126 @@ +"""Integration tests for AUTH_ENABLED=false (operator-controlled bypass). + +When AUTH_ENABLED=false, requests to user endpoints (/exec, /upload, etc.) +no longer require x-api-key. This is for deployments behind a trusted +network boundary where auth is enforced at a layer above us. Admin +endpoints (/api/v1/admin/*) MUST still require the master key. +""" + +from unittest.mock import AsyncMock, patch + +import pytest +from fastapi.testclient import TestClient + +from src.main import app + + +@pytest.fixture +def client(): + return TestClient(app) + + +@pytest.fixture +def mock_services(): + from src.dependencies.services import ( + get_session_service, + get_execution_service, + get_file_service, + get_state_service, + get_state_archival_service, + ) + + mocks = { + get_session_service: AsyncMock(), + get_execution_service: AsyncMock(), + get_file_service: AsyncMock(), + get_state_service: AsyncMock(), + get_state_archival_service: AsyncMock(), + } + for dep, mock in mocks.items(): + app.dependency_overrides[dep] = lambda m=mock: m + + yield mocks + app.dependency_overrides.clear() + + +@pytest.fixture +def auth_disabled(monkeypatch): + """Flip AUTH_ENABLED off for the duration of the test.""" + monkeypatch.setattr("src.middleware.security.settings.auth_enabled", False) + monkeypatch.setattr("src.dependencies.auth.settings.auth_enabled", False) + yield + + +class TestUserEndpointsWithAuthDisabled: + def test_no_x_api_key_succeeds(self, client, mock_services, auth_disabled): + """No header at all => still gets past auth.""" + response = client.get("/files/some-session-id") + assert response.status_code != 401, response.text + + def test_invalid_x_api_key_succeeds(self, client, mock_services, auth_disabled): + """Invalid key is still accepted because the check is bypassed.""" + response = client.get( + "/files/some-session-id", + headers={"x-api-key": "literally-not-a-real-key"}, + ) + assert response.status_code != 401, response.text + + def test_health_endpoint_without_header(self, client, auth_disabled): + """Health endpoints with verify_api_key dep work without a header.""" + # /health/redis uses Depends(verify_api_key); should now pass without header + response = client.get("/health/redis") + # Endpoint returns 200 or 503 depending on Redis state, but never 401 + assert response.status_code != 401, response.text + + +class TestAdminEndpointsStillRequireMasterKey: + def test_admin_keys_without_master_key_rejected( + self, client, mock_services, auth_disabled + ): + """AUTH_ENABLED=false must NOT relax master-key requirement on admin paths.""" + with patch("src.middleware.security.settings") as ms: + ms.auth_enabled = False + ms.master_api_key = "master-secret-32chars-aaaaaaaaaa" + response = client.get("/api/v1/admin/keys") + # No master key => 401 (or 403 depending on the exact code path) + assert response.status_code in (401, 403), response.text + + def test_admin_keys_with_wrong_master_key_rejected( + self, client, mock_services, auth_disabled + ): + with patch("src.middleware.security.settings") as ms: + ms.auth_enabled = False + ms.master_api_key = "master-secret-32chars-aaaaaaaaaa" + response = client.get( + "/api/v1/admin/keys", + headers={"x-api-key": "wrong-master-key"}, + ) + assert response.status_code in (401, 403), response.text + + +class TestDashboardSkipUnchanged: + def test_dashboard_html_loads_without_master_key_when_auth_disabled( + self, client, auth_disabled + ): + """The /admin-dashboard skip-auth path is unchanged; HTML loads.""" + # Dashboard route returns the HTML shell; no auth required for the shell itself + response = client.get("/admin-dashboard/") + # Either 200 (HTML served), 404 (route shape), or 405 (method) — but never 401 + assert response.status_code != 401, response.text + + def test_dashboard_admin_api_still_requires_master_key( + self, client, mock_services, auth_disabled + ): + """Even with AUTH_ENABLED=false, /api/v1/admin/* still locked behind master key.""" + with patch("src.middleware.security.settings") as ms: + ms.auth_enabled = False + ms.master_api_key = "master-secret-32chars-aaaaaaaaaa" + response = client.get("/api/v1/admin/stats?hours=1") + assert response.status_code in (401, 403), response.text + + +class TestAuthEnabledDefaultUnchanged: + def test_default_settings_keep_auth_required(self, client, mock_services): + """Sanity: with AUTH_ENABLED untouched, requests without a key still 401.""" + response = client.get("/files/some-session-id") + assert response.status_code == 401, response.text diff --git a/tests/integration/test_exec_api.py b/tests/integration/test_exec_api.py index 9ab5d51..b9c65a1 100644 --- a/tests/integration/test_exec_api.py +++ b/tests/integration/test_exec_api.py @@ -417,9 +417,7 @@ def test_exec_service_error(self, client, auth_headers, mock_execution_service): assert response.status_code == 503 assert "error" in response.json() - def test_exec_delayed_service_error_after_stream_start( - self, client, auth_headers - ): + def test_exec_delayed_service_error_after_stream_start(self, client, auth_headers): """Delayed failures should return a JSON error payload, not crash the stream.""" async def _delayed_failure(*args, **kwargs): diff --git a/tests/integration/test_librechat_compat.py b/tests/integration/test_librechat_compat.py index 0310810..093e96e 100644 --- a/tests/integration/test_librechat_compat.py +++ b/tests/integration/test_librechat_compat.py @@ -1733,9 +1733,7 @@ def test_ptc_tool_call_required_response( session_id="ptc-tool-session", continuation_token="cont-token-xyz", tool_calls=[ - PTCToolCall( - id="call-abc", name="get_weather", input={"city": "NYC"} - ), + PTCToolCall(id="call-abc", name="get_weather", input={"city": "NYC"}), ], stdout="", stderr="", @@ -1876,3 +1874,244 @@ def test_ptc_error_tool_result(self, mock_get_service, client, auth_headers): # Response should be valid regardless of tool error assert data["status"] in ("completed", "error") mock_service.continue_execution.assert_called_once() + + +# ============================================================================= +# /upload/batch — multi-file uploads (LibreChat skill priming flow) +# ============================================================================= + + +class TestLibreChatUploadBatch: + """Tests for POST /upload/batch. + + LibreChat's `crud.js:118` posts repeated `file` fields plus optional + `entity_id`. The response must include `succeeded`/`failed` counts and + per-file `status` so LibreChat's caller can distinguish hard failures + from partial successes (`crud.js:146-172`). + + Filenames may include subdirectories — the agents library uploads skill + bundles like `skills/foo/SKILL.md` and verifies on the response with + `f.filename.endsWith('/SKILL.md')` (`packages/api/src/agents/skillFiles.ts:160`). + """ + + @pytest.fixture(autouse=True) + def setup_mocks(self): + # Track which filenames were stored to verify nested-path preservation + # without re-implementing storage in the mock. + stored_filenames = [] + + async def fake_store( + session_id, + filename, + content, + content_type, + is_agent_file, + is_read_only=False, + original_filename=None, + ): + stored_filenames.append(filename) + return f"fid-{len(stored_filenames)}" + + mock_file_service = AsyncMock() + mock_file_service.store_uploaded_file = AsyncMock(side_effect=fake_store) + + mock_session_service = AsyncMock() + mock_session_service.create_session.return_value = Session( + session_id="batch-session-456", + status=SessionStatus.ACTIVE, + created_at=datetime.now(timezone.utc), + last_activity=datetime.now(timezone.utc), + expires_at=datetime.now(timezone.utc) + timedelta(hours=24), + metadata={}, + ) + + from src.dependencies.services import get_file_service, get_session_service + + app.dependency_overrides[get_file_service] = lambda: mock_file_service + app.dependency_overrides[get_session_service] = lambda: mock_session_service + + yield {"stored": stored_filenames, "file_service": mock_file_service} + + app.dependency_overrides.clear() + + def test_response_shape_matches_librechat_contract(self, client, auth_headers): + files = [ + ("file", ("a.txt", io.BytesIO(b"alpha"), "text/plain")), + ("file", ("b.txt", io.BytesIO(b"bravo"), "text/plain")), + ] + response = client.post("/upload/batch", files=files, headers=auth_headers) + + assert response.status_code == 200 + result = response.json() + # All five top-level keys present (LibreChat reads each one) + for key in ("message", "session_id", "files", "succeeded", "failed"): + assert key in result, f"Missing required key: {key}" + assert result["message"] == "success" + assert result["succeeded"] == 2 + assert result["failed"] == 0 + for entry in result["files"]: + # Per-file shape: status, fileId (success only), filename + assert entry["status"] == "success" + assert "fileId" in entry + assert "filename" in entry + + def test_partial_failure_reports_per_file_errors( + self, client, auth_headers, setup_mocks + ): + # `.exe` is not in allowed_file_extensions -> per-file error. + files = [ + ("file", ("good.txt", io.BytesIO(b"ok"), "text/plain")), + ("file", ("bad.exe", io.BytesIO(b"\x00"), "application/octet-stream")), + ] + response = client.post("/upload/batch", files=files, headers=auth_headers) + + assert response.status_code == 200 + result = response.json() + assert result["message"] == "partial" + assert result["succeeded"] == 1 + assert result["failed"] == 1 + + statuses = {f["filename"]: f for f in result["files"]} + assert statuses["good.txt"]["status"] == "success" + assert statuses["bad.exe"]["status"] == "error" + assert "error" in statuses["bad.exe"] + + def test_all_failures_report_message_error(self, client, auth_headers): + # All files have disallowed extensions + files = [ + ("file", ("bad1.exe", io.BytesIO(b"x"), "application/octet-stream")), + ("file", ("bad2.dll", io.BytesIO(b"y"), "application/octet-stream")), + ] + response = client.post("/upload/batch", files=files, headers=auth_headers) + + assert response.status_code == 200 + result = response.json() + # LibreChat throws if message=='error' (crud.js:158) + assert result["message"] == "error" + assert result["succeeded"] == 0 + assert result["failed"] == 2 + + def test_empty_batch_returns_422(self, client, auth_headers): + response = client.post("/upload/batch", headers=auth_headers) + assert response.status_code == 422 + + def test_files_field_name_must_be_singular(self, client, auth_headers): + # The repeated 'files' (plural) field name is not what LibreChat uses; + # we should treat it as missing and 422. + files = [("files", ("a.txt", io.BytesIO(b"x"), "text/plain"))] + response = client.post("/upload/batch", files=files, headers=auth_headers) + assert response.status_code == 422 + + def test_entity_id_marks_files_as_agent(self, client, auth_headers, setup_mocks): + files = [("file", ("doc.txt", io.BytesIO(b"hi"), "text/plain"))] + data = {"entity_id": "asst_skill_123"} + response = client.post( + "/upload/batch", files=files, data=data, headers=auth_headers + ) + + assert response.status_code == 200 + # Verify is_agent_file=True was passed through + store = setup_mocks["file_service"].store_uploaded_file + assert store.await_count == 1 + kwargs = store.await_args.kwargs + assert kwargs["is_agent_file"] is True + + def test_nested_filename_preserved_in_response( + self, client, auth_headers, setup_mocks + ): + # LibreChat skill priming sends `skills//SKILL.md`. + files = [ + ( + "file", + ( + "skills/weather_lookup/SKILL.md", + io.BytesIO(b"# Weather skill"), + "text/markdown", + ), + ) + ] + response = client.post("/upload/batch", files=files, headers=auth_headers) + + assert response.status_code == 200 + result = response.json() + assert result["files"][0]["filename"] == "skills/weather_lookup/SKILL.md" + # The stored filename also preserves the path so S3/sandbox round-trip works. + assert "skills/weather_lookup/SKILL.md" in setup_mocks["stored"] + + +# ============================================================================= +# GET /sessions/{session_id}/objects/{file_id} — liveness probe +# ============================================================================= + + +class TestLibreChatSessionObjectMetadata: + """LibreChat's `process.js:363` reads `lastModified` to decide whether + a session is still valid (>23h old means re-upload). Format matches the + `?detail=summary` listing: ISO 8601 with `Z` and millisecond precision.""" + + @pytest.fixture(autouse=True) + def setup_mocks(self): + from src.dependencies.services import get_file_service + + self.mock = AsyncMock() + app.dependency_overrides[get_file_service] = lambda: self.mock + yield + app.dependency_overrides.clear() + + def test_returns_lastmodified_with_z_suffix(self, client, auth_headers): + self.mock.get_file_info.return_value = FileInfo( + file_id="fid-1", + filename="data.csv", + size=12, + content_type="text/csv", + created_at=datetime(2026, 4, 22, 9, 17, 6, tzinfo=timezone.utc), + path="/data/data.csv", + ) + response = client.get("/sessions/sess-1/objects/fid-1", headers=auth_headers) + assert response.status_code == 200 + body = response.json() + assert set(body.keys()) == {"lastModified"} + assert body["lastModified"].endswith("Z") + # Parseable by JS `new Date(...)` => parseable by Python's ISO parser too + assert "2026-04-22" in body["lastModified"] + + def test_naive_datetime_normalized_to_utc(self, client, auth_headers): + self.mock.get_file_info.return_value = FileInfo( + file_id="fid-2", + filename="x.txt", + size=1, + content_type="text/plain", + created_at=datetime(2026, 1, 15, 12, 0, 0), # naive + path="/data/x.txt", + ) + response = client.get("/sessions/sess-1/objects/fid-2", headers=auth_headers) + assert response.status_code == 200 + assert response.json()["lastModified"].endswith("Z") + + def test_missing_file_returns_404(self, client, auth_headers): + self.mock.get_file_info.return_value = None + response = client.get("/sessions/sess-1/objects/missing", headers=auth_headers) + # LibreChat catches and re-uploads on 404 -> the desired fallback. + assert response.status_code == 404 + + def test_lastmodified_matches_summary_endpoint(self, client, auth_headers): + """Same FileInfo => same lastModified value across both endpoints + (so LibreChat's two read paths agree about session age).""" + file_info = FileInfo( + file_id="fid-3", + filename="y.txt", + size=2, + content_type="text/plain", + created_at=datetime(2026, 3, 14, 9, 26, 53, tzinfo=timezone.utc), + path="/data/y.txt", + ) + # Both endpoints share the same get_file_info / list_files-derived value + self.mock.get_file_info.return_value = file_info + self.mock.list_files.return_value = [file_info] + + obj_resp = client.get("/sessions/sess-1/objects/fid-3", headers=auth_headers) + list_resp = client.get("/files/sess-1?detail=summary", headers=auth_headers) + + obj_modified = obj_resp.json()["lastModified"] + list_modified = list_resp.json()[0]["lastModified"] + assert obj_modified == list_modified diff --git a/tests/integration/test_programmatic_api.py b/tests/integration/test_programmatic_api.py index a92d65b..811c760 100644 --- a/tests/integration/test_programmatic_api.py +++ b/tests/integration/test_programmatic_api.py @@ -561,3 +561,81 @@ def test_invalid_auth_returns_401(self, client): headers={"x-api-key": "wrong-key"}, ) assert response.status_code == 401 + + +class TestProgrammaticLangField: + """Tests for the `lang` field on /exec/programmatic. + + LibreChat's BashProgrammaticToolCalling sends {lang: "bash", ...}; the + Python tool sends nothing (default). Invalid languages must be rejected + so silent Python execution doesn't surprise callers.""" + + @patch("src.api.programmatic._get_ptc_service") + def test_lang_defaults_to_py( + self, + mock_get_service, + client, + auth_headers, + mock_session, + mock_ptc_completed_response, + ): + mock_service = AsyncMock() + mock_service.start_execution.return_value = mock_ptc_completed_response + mock_get_service.return_value = mock_service + + from src.dependencies.services import get_session_service + + mock_session_svc = AsyncMock() + mock_session_svc.create_session.return_value = mock_session + app.dependency_overrides[get_session_service] = lambda: mock_session_svc + + try: + response = client.post( + "/exec/programmatic", + json={"code": "print('hi')"}, + headers=auth_headers, + ) + finally: + app.dependency_overrides.clear() + + assert response.status_code == 200 + assert mock_service.start_execution.await_args.kwargs["lang"] == "py" + + @patch("src.api.programmatic._get_ptc_service") + def test_lang_bash_routed_to_service( + self, + mock_get_service, + client, + auth_headers, + mock_session, + mock_ptc_completed_response, + ): + mock_service = AsyncMock() + mock_service.start_execution.return_value = mock_ptc_completed_response + mock_get_service.return_value = mock_service + + from src.dependencies.services import get_session_service + + mock_session_svc = AsyncMock() + mock_session_svc.create_session.return_value = mock_session + app.dependency_overrides[get_session_service] = lambda: mock_session_svc + + try: + response = client.post( + "/exec/programmatic", + json={"code": "echo hello", "lang": "bash", "tools": []}, + headers=auth_headers, + ) + finally: + app.dependency_overrides.clear() + + assert response.status_code == 200 + assert mock_service.start_execution.await_args.kwargs["lang"] == "bash" + + def test_invalid_lang_returns_422(self, client, auth_headers): + response = client.post( + "/exec/programmatic", + json={"code": "puts 'hi'", "lang": "ruby"}, + headers=auth_headers, + ) + assert response.status_code == 422 diff --git a/tests/integration/test_session_behavior.py b/tests/integration/test_session_behavior.py index 7881f00..effa95e 100644 --- a/tests/integration/test_session_behavior.py +++ b/tests/integration/test_session_behavior.py @@ -498,7 +498,7 @@ def test_generated_file_downloadable(self, client, auth_headers): path="/output.txt", ) ] - mock_file_service.download_file.return_value = "https://minio.test/download" + mock_file_service.download_file.return_value = "https://s3.test/download" from src.dependencies.services import ( get_session_service, diff --git a/tests/unit/test_egress_proxy.py b/tests/unit/test_egress_proxy.py new file mode 100644 index 0000000..f53c1ea --- /dev/null +++ b/tests/unit/test_egress_proxy.py @@ -0,0 +1,231 @@ +"""Unit tests for the sandbox egress proxy.""" + +import asyncio +import socket + +import pytest + +from src.services.sandbox.egress_proxy import ( + EgressProxy, + _is_private_ip, + _matches_allowlist, + _normalize_host, +) + + +def _free_port() -> int: + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.bind(("127.0.0.1", 0)) + port = s.getsockname()[1] + s.close() + return port + + +# --- Pure-function tests (no proxy server needed) --------------------------- + + +class TestAllowlistMatching: + def test_exact_match(self): + assert _matches_allowlist("pypi.org", {"pypi.org"}) + + def test_subdomain_match(self): + assert _matches_allowlist("files.pythonhosted.org", {"pythonhosted.org"}) + + def test_subdomain_match_two_levels(self): + assert _matches_allowlist("a.b.example.com", {"example.com"}) + + def test_unrelated_host_rejected(self): + assert not _matches_allowlist("evil.com", {"pypi.org"}) + + def test_substring_does_not_match(self): + # `evilpypi.org` is NOT a subdomain of `pypi.org`. + assert not _matches_allowlist("evilpypi.org", {"pypi.org"}) + + def test_case_insensitive(self): + assert _matches_allowlist("PyPI.ORG", {"pypi.org"}) + + def test_normalize_strips_brackets(self): + assert _normalize_host("[::1]") == "::1" + + +class TestPrivateIpDetection: + def test_loopback(self): + assert _is_private_ip("127.0.0.1") + + def test_rfc1918_10(self): + assert _is_private_ip("10.0.0.1") + + def test_rfc1918_172(self): + assert _is_private_ip("172.16.5.5") + + def test_rfc1918_192(self): + assert _is_private_ip("192.168.1.1") + + def test_link_local(self): + assert _is_private_ip("169.254.169.254") + + def test_public_ipv4_not_private(self): + assert not _is_private_ip("8.8.8.8") + + def test_hostname_returns_false(self): + # Hostnames aren't IP literals. + assert not _is_private_ip("pypi.org") + + +# --- Proxy server tests (start a real EgressProxy + drive it as a client) --- + + +async def _send_connect( + proxy_port: int, target: str +) -> tuple[bytes, asyncio.StreamReader, asyncio.StreamWriter]: + """Open a TCP connection to the proxy, send a CONNECT, return status bytes.""" + reader, writer = await asyncio.open_connection("127.0.0.1", proxy_port) + writer.write(f"CONNECT {target} HTTP/1.1\r\nHost: {target}\r\n\r\n".encode()) + await writer.drain() + + raw = b"" + while b"\r\n\r\n" not in raw: + chunk = await asyncio.wait_for(reader.read(1024), timeout=2) + if not chunk: + break + raw += chunk + return raw, reader, writer + + +@pytest.mark.asyncio +async def test_disallowed_host_returns_403(): + port = _free_port() + proxy = EgressProxy(port=port, allowlist={"good.test"}) + await proxy.start() + try: + status, _r, w = await _send_connect(port, "evil.com:443") + w.close() + assert b"403" in status, status + finally: + await proxy.stop() + + +@pytest.mark.asyncio +async def test_private_ip_literal_returns_403(): + port = _free_port() + proxy = EgressProxy(port=port, allowlist={"good.test"}) + await proxy.start() + try: + status, _r, w = await _send_connect(port, "10.0.0.1:443") + w.close() + assert b"403" in status, status + finally: + await proxy.stop() + + +@pytest.mark.asyncio +async def test_loopback_literal_returns_403(): + port = _free_port() + proxy = EgressProxy(port=port, allowlist={"good.test"}) + await proxy.start() + try: + status, _r, w = await _send_connect(port, "127.0.0.1:443") + w.close() + assert b"403" in status, status + finally: + await proxy.stop() + + +@pytest.mark.asyncio +async def test_non_connect_method_returns_405(): + port = _free_port() + proxy = EgressProxy(port=port, allowlist={"good.test"}) + await proxy.start() + try: + reader, writer = await asyncio.open_connection("127.0.0.1", port) + writer.write(b"GET /something HTTP/1.1\r\nHost: pypi.org\r\n\r\n") + await writer.drain() + raw = b"" + while b"\r\n\r\n" not in raw: + chunk = await asyncio.wait_for(reader.read(1024), timeout=2) + if not chunk: + break + raw += chunk + writer.close() + assert b"405" in raw, raw + finally: + await proxy.stop() + + +@pytest.mark.asyncio +async def test_allowed_host_unresolvable_returns_502(): + """Allowlist passes but DNS fails => 502, NOT 403. Confirms the allowlist + check accepts the host before we try to connect.""" + port = _free_port() + proxy = EgressProxy(port=port, allowlist={"definitely-not-a-real-tld.test"}) + await proxy.start() + try: + status, _r, w = await _send_connect(port, "definitely-not-a-real-tld.test:443") + w.close() + # Allowlist passed; resolution failed -> 502 + assert b"502" in status, status + assert b"403" not in status + finally: + await proxy.stop() + + +@pytest.mark.asyncio +async def test_subdomain_allowed_via_parent(monkeypatch): + """`a.good.test` should match allowlist entry `good.test` and proceed + past the allowlist check (resolves to 502 since it doesn't exist).""" + port = _free_port() + proxy = EgressProxy(port=port, allowlist={"good.test"}) + await proxy.start() + try: + status, _r, w = await _send_connect(port, "a.good.test:443") + w.close() + assert b"403" not in status, status + finally: + await proxy.stop() + + +@pytest.mark.asyncio +async def test_tunnel_pipes_bytes_when_allowed(monkeypatch): + """Successful CONNECT then bidirectional byte pipe. + + We bypass the private-IP guard so we can test the tunnel against a local + echo server. The allowlist itself is what enforces this in production. + """ + from src.services.sandbox import egress_proxy as _ep + + monkeypatch.setattr(_ep, "_is_private_ip", lambda host: False) + + # Start a tiny echo server on localhost. + echo_port = _free_port() + echo_received: bytearray = bytearray() + + async def echo_handler(reader, writer): + try: + data = await asyncio.wait_for(reader.read(64), timeout=2) + echo_received.extend(data) + writer.write(data) + await writer.drain() + finally: + writer.close() + + echo_server = await asyncio.start_server(echo_handler, "127.0.0.1", echo_port) + + proxy_port = _free_port() + proxy = EgressProxy(port=proxy_port, allowlist={"127.0.0.1"}) + await proxy.start() + try: + status, reader, writer = await _send_connect( + proxy_port, f"127.0.0.1:{echo_port}" + ) + assert b"200" in status, status + + writer.write(b"ping\n") + await writer.drain() + + echoed = await asyncio.wait_for(reader.read(64), timeout=2) + assert echoed == b"ping\n" + writer.close() + finally: + await proxy.stop() + echo_server.close() + await echo_server.wait_closed() diff --git a/tests/unit/test_exec_models.py b/tests/unit/test_exec_models.py new file mode 100644 index 0000000..e873bff --- /dev/null +++ b/tests/unit/test_exec_models.py @@ -0,0 +1,95 @@ +"""Unit tests for /exec request and response models. + +Covers the client-compatibility additions: FileRef.inherited / entity_id / +modified_from, RequestFile.entity_id, and ExecRequest.timeout (with bounds). +""" + +import pytest +from pydantic import ValidationError + +from src.models.exec import ExecRequest, FileRef, RequestFile + + +class TestFileRefSerialization: + """FileRef adds inherited / entity_id / modified_from. With + `exclude_none=True` (the API serializes responses this way) the + `inherited=None` case must drop the field entirely so non-inherited + files don't ship `"inherited": null`.""" + + def test_inherited_true_serializes(self): + ref = FileRef( + id="orig-1", + name="data.csv", + session_id="sess-1", + inherited=True, + entity_id="agent-1", + ) + dumped = ref.model_dump(exclude_none=True) + assert dumped["inherited"] is True + assert dumped["entity_id"] == "agent-1" + assert dumped["id"] == "orig-1" + assert dumped["session_id"] == "sess-1" + + def test_inherited_none_excluded_with_exclude_none(self): + ref = FileRef(id="fid", name="out.png", session_id="sess-1") + dumped = ref.model_dump(exclude_none=True) + assert "inherited" not in dumped + assert "entity_id" not in dumped + assert "modified_from" not in dumped + # Existing optional fields must also be excluded. + assert "path" not in dumped + + def test_modified_from_preserved(self): + ref = FileRef( + id="new-fid", + name="report.csv", + session_id="sess-2", + modified_from={"id": "old-fid", "session_id": "sess-1"}, + ) + dumped = ref.model_dump(exclude_none=True) + assert dumped["modified_from"] == {"id": "old-fid", "session_id": "sess-1"} + + +class TestRequestFileEntityId: + """RequestFile must accept and round-trip entity_id (Gap 3).""" + + def test_entity_id_preserved(self): + rf = RequestFile( + id="fid", + session_id="sess", + name="data.csv", + entity_id="agent-xyz", + ) + assert rf.entity_id == "agent-xyz" + + def test_entity_id_optional(self): + rf = RequestFile(id="fid", session_id="sess", name="data.csv") + assert rf.entity_id is None + + +class TestExecRequestTimeout: + """ExecRequest.timeout: optional, milliseconds, range 1000-300000.""" + + def test_timeout_within_range_accepted(self): + req = ExecRequest(code="print(1)", lang="py", timeout=5000) + assert req.timeout == 5000 + + def test_timeout_at_lower_bound(self): + req = ExecRequest(code="print(1)", lang="py", timeout=1000) + assert req.timeout == 1000 + + def test_timeout_at_upper_bound(self): + req = ExecRequest(code="print(1)", lang="py", timeout=300000) + assert req.timeout == 300000 + + def test_timeout_below_minimum_rejected(self): + with pytest.raises(ValidationError): + ExecRequest(code="print(1)", lang="py", timeout=999) + + def test_timeout_above_maximum_rejected(self): + with pytest.raises(ValidationError): + ExecRequest(code="print(1)", lang="py", timeout=300001) + + def test_timeout_optional(self): + req = ExecRequest(code="print(1)", lang="py") + assert req.timeout is None diff --git a/tests/unit/test_file_service.py b/tests/unit/test_file_service.py index a261fff..58aebb6 100644 --- a/tests/unit/test_file_service.py +++ b/tests/unit/test_file_service.py @@ -9,12 +9,14 @@ @pytest.fixture -def mock_minio_client(): - """Mock MinIO client.""" +def mock_s3_client(): + """Mock S3 client.""" client = MagicMock() - client.bucket_exists = MagicMock(return_value=True) + client.head_bucket = MagicMock(return_value={}) client.put_object = MagicMock() client.get_object = MagicMock() + client.delete_object = MagicMock() + client.head_object = MagicMock(return_value={"ContentLength": 1024}) return client @@ -35,14 +37,14 @@ def mock_redis_client(): @pytest.fixture -def file_service(mock_minio_client, mock_redis_client): +def file_service(mock_s3_client, mock_redis_client): """Create FileService with mocked clients.""" - with patch("src.services.file.Minio") as mock_minio_class: - mock_minio_class.return_value = mock_minio_client + with patch("src.services.file.boto3") as mock_boto3: + mock_boto3.client.return_value = mock_s3_client with patch("src.services.file.redis.from_url") as mock_redis_from_url: mock_redis_from_url.return_value = mock_redis_client service = FileService() - service.minio_client = mock_minio_client + service.s3_client = mock_s3_client service.redis_client = mock_redis_client return service @@ -52,7 +54,7 @@ class TestUpdateFileContent: @pytest.mark.asyncio async def test_update_file_content_rejects_read_only_file( - self, file_service, mock_minio_client, mock_redis_client + self, file_service, mock_s3_client, mock_redis_client ): """Read-only linked aliases must not overwrite the source object.""" session_id = "test-session" @@ -73,13 +75,13 @@ async def test_update_file_content_rejects_read_only_file( ) assert result is False - mock_minio_client.put_object.assert_not_called() + mock_s3_client.put_object.assert_not_called() @pytest.mark.asyncio async def test_update_file_content_success( - self, file_service, mock_minio_client, mock_redis_client + self, file_service, mock_s3_client, mock_redis_client ): - """Test that update_file_content overwrites file in MinIO.""" + """Test that update_file_content overwrites file in S3.""" session_id = "test-session-123" file_id = "test-file-456" new_content = b"modified file content" @@ -99,14 +101,12 @@ async def test_update_file_content_success( ) assert result is True - # Verify MinIO put_object was called - mock_minio_client.put_object.assert_called_once() - # Verify metadata was updated + mock_s3_client.put_object.assert_called_once() mock_redis_client.hset.assert_called() @pytest.mark.asyncio async def test_update_file_content_updates_metadata( - self, file_service, mock_minio_client, mock_redis_client + self, file_service, mock_s3_client, mock_redis_client ): """Test that update_file_content updates file size metadata.""" session_id = "test-session-123" @@ -177,10 +177,10 @@ async def test_update_file_content_no_object_key( assert result is False @pytest.mark.asyncio - async def test_update_file_content_minio_error( - self, file_service, mock_minio_client, mock_redis_client + async def test_update_file_content_s3_error( + self, file_service, mock_s3_client, mock_redis_client ): - """Test handling of MinIO error during update.""" + """Test handling of S3 error during update.""" session_id = "test-session" file_id = "file-id" @@ -191,8 +191,7 @@ async def test_update_file_content_minio_error( "content_type": "text/plain", } - # Mock MinIO error - mock_minio_client.put_object.side_effect = Exception("MinIO connection error") + mock_s3_client.put_object.side_effect = Exception("S3 connection error") result = await file_service.update_file_content( session_id=session_id, @@ -204,7 +203,7 @@ async def test_update_file_content_minio_error( @pytest.mark.asyncio async def test_update_file_content_preserves_content_type( - self, file_service, mock_minio_client, mock_redis_client + self, file_service, mock_s3_client, mock_redis_client ): """Test that content_type is preserved from original metadata.""" session_id = "test-session" @@ -225,14 +224,12 @@ async def test_update_file_content_preserves_content_type( ) assert result is True - # Verify put_object was called with preserved content_type - put_call = mock_minio_client.put_object.call_args - # The content_type should be "image/png" from the metadata - assert "image/png" in str(put_call) + put_call = mock_s3_client.put_object.call_args + assert put_call.kwargs.get("ContentType") == "image/png" @pytest.mark.asyncio async def test_update_file_content_only_updates_size( - self, file_service, mock_minio_client, mock_redis_client + self, file_service, mock_s3_client, mock_redis_client ): """Test that update_file_content only updates size metadata.""" session_id = "test-session" @@ -354,7 +351,7 @@ async def test_link_file_into_session_reuses_existing_alias( @pytest.mark.asyncio async def test_delete_linked_file_only_removes_metadata( - self, file_service, mock_minio_client, mock_redis_client + self, file_service, mock_s3_client, mock_redis_client ): """Deleting a linked alias must not delete the shared object.""" mock_redis_client.hgetall.return_value = { @@ -375,13 +372,13 @@ async def test_delete_linked_file_only_removes_metadata( result = await file_service.delete_file("target-session", "linked-file") assert result is True - mock_minio_client.remove_object.assert_not_called() + mock_s3_client.delete_object.assert_not_called() mock_redis_client.delete.assert_called_once() assert mock_redis_client.srem.call_count == 2 @pytest.mark.asyncio async def test_delete_source_file_keeps_object_when_aliases_exist( - self, file_service, mock_minio_client, mock_redis_client + self, file_service, mock_s3_client, mock_redis_client ): """Deleting the source metadata must not delete a shared object still referenced by aliases.""" mock_redis_client.hgetall.return_value = { @@ -400,12 +397,12 @@ async def test_delete_source_file_keeps_object_when_aliases_exist( result = await file_service.delete_file("source-session", "source-file") assert result is True - mock_minio_client.remove_object.assert_not_called() + mock_s3_client.delete_object.assert_not_called() mock_redis_client.delete.assert_called_once() @pytest.mark.asyncio async def test_delete_last_linked_file_cleans_orphaned_shared_object( - self, file_service, mock_minio_client, mock_redis_client + self, file_service, mock_s3_client, mock_redis_client ): """The final alias cleanup should delete the shared object once the source is gone.""" mock_redis_client.hgetall.side_effect = [ @@ -430,7 +427,7 @@ async def test_delete_last_linked_file_cleans_orphaned_shared_object( result = await file_service.delete_file("target-session", "linked-file") assert result is True - mock_minio_client.remove_object.assert_called_once_with( - file_service.bucket_name, - "sessions/source/uploads/source-file", + mock_s3_client.delete_object.assert_called_once_with( + Bucket=file_service.bucket_name, + Key="sessions/source/uploads/source-file", ) diff --git a/tests/unit/test_orchestrator.py b/tests/unit/test_orchestrator.py index 49f3a5b..3a39ef3 100644 --- a/tests/unit/test_orchestrator.py +++ b/tests/unit/test_orchestrator.py @@ -280,6 +280,7 @@ async def test_auto_mount_returns_session_files( "size": 100, "session_id": "test-session-123", "is_linked_input": False, + "is_read_only": False, } ] @@ -631,3 +632,271 @@ async def test_explicit_mount_skips_not_found_files( result = await orchestrator._mount_explicit_files(ctx) assert len(result) == 0 + + +class TestExecuteCodeTimeout: + """Per-request timeout (ms) → execution timeout (s), clamped to server max. + + Implementation lives in `_execute_code` at orchestrator.py:661+. We patch + the execution service to capture the constructed `ExecuteCodeRequest` and + assert on its `timeout` (seconds).""" + + @pytest.mark.asyncio + async def test_timeout_ms_to_seconds_with_ceil(self, orchestrator): + from types import SimpleNamespace + from src.models.execution import CodeExecution, ExecutionStatus + from src.models.exec import ExecRequest + + captured = {} + + async def _capture(session_id, exec_request, mounted_files, **kwargs): + captured["request"] = exec_request + return ( + CodeExecution( + execution_id="x", + session_id="s", + code="", + language="py", + status=ExecutionStatus.COMPLETED, + outputs=[], + started_at=datetime.now(), + ), + SimpleNamespace(), + None, + None, + None, + ) + + orchestrator.execution_service.execute_code = _capture + + ctx = ExecutionContext( + request=ExecRequest(code="x", lang="py", timeout=5000), + request_id="r", + session_id="s", + ) + await orchestrator._execute_code(ctx) + # 5000 ms == 5 s exactly. + assert captured["request"].timeout == 5 + + @pytest.mark.asyncio + async def test_timeout_ms_ceil_for_non_integer_seconds(self, orchestrator): + from types import SimpleNamespace + from src.models.execution import CodeExecution, ExecutionStatus + from src.models.exec import ExecRequest + + captured = {} + + async def _capture(session_id, exec_request, mounted_files, **kwargs): + captured["request"] = exec_request + return ( + CodeExecution( + execution_id="x", + session_id="s", + code="", + language="py", + status=ExecutionStatus.COMPLETED, + outputs=[], + started_at=datetime.now(), + ), + SimpleNamespace(), + None, + None, + None, + ) + + orchestrator.execution_service.execute_code = _capture + # 4500 ms → ceil(4.5) == 5 + ctx = ExecutionContext( + request=ExecRequest(code="x", lang="py", timeout=4500), + request_id="r", + session_id="s", + ) + await orchestrator._execute_code(ctx) + assert captured["request"].timeout == 5 + + @pytest.mark.asyncio + async def test_timeout_none_uses_server_default(self, orchestrator): + from types import SimpleNamespace + from src.config import settings + from src.models.execution import CodeExecution, ExecutionStatus + from src.models.exec import ExecRequest + + captured = {} + + async def _capture(session_id, exec_request, mounted_files, **kwargs): + captured["request"] = exec_request + return ( + CodeExecution( + execution_id="x", + session_id="s", + code="", + language="py", + status=ExecutionStatus.COMPLETED, + outputs=[], + started_at=datetime.now(), + ), + SimpleNamespace(), + None, + None, + None, + ) + + orchestrator.execution_service.execute_code = _capture + + ctx = ExecutionContext( + request=ExecRequest(code="x", lang="py"), + request_id="r", + session_id="s", + ) + await orchestrator._execute_code(ctx) + assert captured["request"].timeout == settings.max_execution_time + + @pytest.mark.asyncio + async def test_timeout_clamped_to_server_max(self, orchestrator, monkeypatch): + """The pydantic validator caps `timeout` at 300000 ms == 300 s. The + orchestrator must additionally clamp to `settings.max_execution_time` + so a client can't exceed the per-server cap.""" + from types import SimpleNamespace + from src.config import settings + from src.models.execution import CodeExecution, ExecutionStatus + from src.models.exec import ExecRequest + + # Force the server max well below the validator's upper bound so + # we can observe clamping. + monkeypatch.setattr(settings, "max_execution_time", 10) + + captured = {} + + async def _capture(session_id, exec_request, mounted_files, **kwargs): + captured["request"] = exec_request + return ( + CodeExecution( + execution_id="x", + session_id="s", + code="", + language="py", + status=ExecutionStatus.COMPLETED, + outputs=[], + started_at=datetime.now(), + ), + SimpleNamespace(), + None, + None, + None, + ) + + orchestrator.execution_service.execute_code = _capture + + ctx = ExecutionContext( + request=ExecRequest(code="x", lang="py", timeout=300000), + request_id="r", + session_id="s", + ) + await orchestrator._execute_code(ctx) + assert captured["request"].timeout == 10 + + +class TestHandleGeneratedFilesNestedPaths: + """Tests that _handle_generated_files preserves subdirectory paths + (LibreChat PR #12848 expects e.g. name='charts/foo.png').""" + + async def test_nested_path_preserved_in_fileref_name( + self, orchestrator, mock_file_service + ): + from src.models.exec import ExecRequest + + # Mock the helper that pulls bytes out of the container. + orchestrator._get_file_from_container = AsyncMock(return_value=b"data") + mock_file_service.store_execution_output_file = AsyncMock(return_value="fid-1") + + request = ExecRequest(code="print()", lang="py") + + # Build a minimal execution stub with a single file output. Use a SimpleNamespace + # so we don't depend on the full CodeExecution constructor surface. + from types import SimpleNamespace + from src.models import OutputType + + execution = SimpleNamespace( + outputs=[ + SimpleNamespace( + type=OutputType.FILE, + content="/mnt/data/charts/foo.png", + metadata=None, + ) + ] + ) + ctx = ExecutionContext( + request=request, + request_id="r1", + session_id="sess-abc", + execution=execution, + container=SimpleNamespace(), + ) + + refs = await orchestrator._handle_generated_files(ctx) + + assert len(refs) == 1 + assert refs[0].name == "charts/foo.png" + # Storage call uses the same nested path as the FileRef name. + mock_file_service.store_execution_output_file.assert_awaited_once() + args = mock_file_service.store_execution_output_file.call_args + assert args.args[1] == "charts/foo.png" + + async def test_top_level_path_unchanged(self, orchestrator, mock_file_service): + from src.models.exec import ExecRequest + from types import SimpleNamespace + from src.models import OutputType + + orchestrator._get_file_from_container = AsyncMock(return_value=b"data") + mock_file_service.store_execution_output_file = AsyncMock(return_value="fid") + + execution = SimpleNamespace( + outputs=[ + SimpleNamespace( + type=OutputType.FILE, + content="/mnt/data/foo.png", + metadata=None, + ) + ] + ) + ctx = ExecutionContext( + request=ExecRequest(code="print()", lang="py"), + request_id="r1", + session_id="s", + execution=execution, + container=SimpleNamespace(), + ) + + refs = await orchestrator._handle_generated_files(ctx) + + assert len(refs) == 1 + assert refs[0].name == "foo.png" + + async def test_hidden_basename_skipped(self, orchestrator, mock_file_service): + from src.models.exec import ExecRequest + from types import SimpleNamespace + from src.models import OutputType + + orchestrator._get_file_from_container = AsyncMock(return_value=b"data") + mock_file_service.store_execution_output_file = AsyncMock(return_value="fid") + + # Subdirectory is fine, but file basename starts with `.` -> skip. + execution = SimpleNamespace( + outputs=[ + SimpleNamespace( + type=OutputType.FILE, + content="/mnt/data/charts/.hidden.png", + metadata=None, + ) + ] + ) + ctx = ExecutionContext( + request=ExecRequest(code="print()", lang="py"), + request_id="r1", + session_id="s", + execution=execution, + container=SimpleNamespace(), + ) + + refs = await orchestrator._handle_generated_files(ctx) + assert refs == [] diff --git a/tests/unit/test_output_processor.py b/tests/unit/test_output_processor.py index fffcf2d..fa1aa24 100644 --- a/tests/unit/test_output_processor.py +++ b/tests/unit/test_output_processor.py @@ -62,10 +62,61 @@ def test_absolute_path_stripped(self): result = OutputProcessor.sanitize_filename("/absolute/path/file.txt") assert result == "file.txt" - def test_unicode_characters_replaced(self): - """Test that non-ASCII characters are replaced.""" + def test_unicode_characters_preserved(self): + """Test that Unicode letters are preserved.""" result = OutputProcessor.sanitize_filename("résumé.docx") - assert result == "r_sum_.docx" + assert result == "résumé.docx" + + def test_cjk_characters_preserved(self): + """Test that CJK characters are preserved.""" + result = OutputProcessor.sanitize_filename("日本語レポート.xlsx") + assert result == "日本語レポート.xlsx" + + def test_cyrillic_characters_preserved(self): + """Test that Cyrillic characters are preserved.""" + result = OutputProcessor.sanitize_filename("файл.txt") + assert result == "файл.txt" + + def test_korean_characters_preserved(self): + """Test that Korean characters are preserved.""" + result = OutputProcessor.sanitize_filename("보고서.xlsx") + assert result == "보고서.xlsx" + + def test_arabic_characters_preserved(self): + """Test that Arabic characters are preserved.""" + result = OutputProcessor.sanitize_filename("تقرير.pdf") + assert result == "تقرير.pdf" + + def test_mixed_unicode_and_ascii(self): + """Test mixed Unicode and ASCII filename.""" + result = OutputProcessor.sanitize_filename("report_2024_報告.pdf") + assert result == "report_2024_報告.pdf" + + def test_unicode_with_spaces_sanitized(self): + """Test that spaces in Unicode filenames are still replaced.""" + result = OutputProcessor.sanitize_filename("日本語 レポート.xlsx") + assert result == "日本語_レポート.xlsx" + + def test_dangerous_chars_still_blocked(self): + """Test that shell metacharacters are still replaced.""" + result = OutputProcessor.sanitize_filename("file<>|&;$().txt") + assert result == "file________.txt" + + def test_underscores_preserved(self): + """Test that underscores are preserved.""" + result = OutputProcessor.sanitize_filename("my_file_name.txt") + assert result == "my_file_name.txt" + + def test_emoji_preserved(self): + """Test that emoji are preserved (matches LibreChat's \\p{Emoji}).""" + result = OutputProcessor.sanitize_filename("chart\U0001F4CA.csv") + assert result == "chart\U0001F4CA.csv" + + def test_nfd_normalized_to_nfc(self): + """Test that decomposed Unicode is NFC-normalized before sanitizing.""" + # e + combining acute (U+0301) -> precomposed e-acute + result = OutputProcessor.sanitize_filename("Café.txt") + assert result == "Café.txt" def test_brackets_replaced(self): """Test that brackets are replaced with underscores.""" @@ -102,3 +153,81 @@ def test_long_filename_truncated(self): assert result.endswith(".txt") # Should have a random suffix before extension assert "-" in result + + +class TestSanitizeRelativePath: + """Tests for sanitize_relative_path — used wherever filenames may legitimately + contain subdirectories (LibreChat skill bundles, nested artifacts).""" + + def test_simple_filename_unchanged(self): + assert OutputProcessor.sanitize_relative_path("foo.png") == "foo.png" + + def test_subdirs_preserved(self): + assert ( + OutputProcessor.sanitize_relative_path("charts/foo.png") == "charts/foo.png" + ) + + def test_deep_subdirs_preserved(self): + assert ( + OutputProcessor.sanitize_relative_path("a/b/c/d/file.txt") + == "a/b/c/d/file.txt" + ) + + def test_each_segment_sanitized(self): + assert ( + OutputProcessor.sanitize_relative_path("my charts/foo bar.png") + == "my_charts/foo_bar.png" + ) + + def test_traversal_segments_dropped(self): + # `..` is dropped per-segment; remaining segments survive. + assert OutputProcessor.sanitize_relative_path("a/../b/c.txt") == "a/b/c.txt" + + def test_only_traversal_returns_underscore(self): + assert OutputProcessor.sanitize_relative_path("../../..") == "_" + + def test_leading_slash_stripped(self): + assert ( + OutputProcessor.sanitize_relative_path("/charts/foo.png") + == "charts/foo.png" + ) + + def test_trailing_slash_dropped(self): + assert OutputProcessor.sanitize_relative_path("charts/") == "charts" + + def test_consecutive_slashes_collapsed(self): + assert ( + OutputProcessor.sanitize_relative_path("charts//foo.png") + == "charts/foo.png" + ) + + def test_empty_string_returns_underscore(self): + assert OutputProcessor.sanitize_relative_path("") == "_" + + def test_just_slash_returns_underscore(self): + assert OutputProcessor.sanitize_relative_path("/") == "_" + + def test_backslashes_treated_as_separators(self): + assert ( + OutputProcessor.sanitize_relative_path("charts\\foo.png") + == "charts/foo.png" + ) + + def test_librechat_skill_bundle_pattern(self): + # The exact shape LibreChat sends for skill priming uploads. + assert ( + OutputProcessor.sanitize_relative_path("skills/foo/SKILL.md") + == "skills/foo/SKILL.md" + ) + + def test_unicode_segments_preserved(self): + """Test that Unicode directory and file names are preserved.""" + assert ( + OutputProcessor.sanitize_relative_path("報告/2024年/レポート.xlsx") + == "報告/2024年/レポート.xlsx" + ) + + def test_sanitize_filename_unchanged_for_basename_callers(self): + """Regression: sanitize_filename still flattens (legacy upload behavior).""" + # Existing single-call sites rely on this. + assert OutputProcessor.sanitize_filename("path/to/file.txt") == "file.txt" diff --git a/tests/unit/test_programmatic.py b/tests/unit/test_programmatic.py index ac79390..644b33e 100644 --- a/tests/unit/test_programmatic.py +++ b/tests/unit/test_programmatic.py @@ -497,9 +497,7 @@ async def test_start_execution_mounts_referenced_files( mock_proc.stdout = AsyncMock() mock_proc.stdout.read = AsyncMock( return_value=( - json.dumps( - {"type": "completed", "stdout": "ok\n", "stderr": ""} - ) + json.dumps({"type": "completed", "stdout": "ok\n", "stderr": ""}) + PTC_DELIMITER ).encode() ) @@ -529,10 +527,13 @@ async def test_start_execution_mounts_referenced_files( "upload-session", "file-123", ) + # Subdirectories are preserved (Item 4b symmetry — LibreChat skill + # bundles ship `skills//SKILL.md` and expect to read them at + # the nested path inside the sandbox). assert mock_sandbox_manager.copy_content_to_sandbox.call_args_list[1].args == ( mock_sandbox_manager.create_sandbox.return_value, b"col1,col2\n1,2\n", - "/mnt/data/report.csv", + "/mnt/data/nested/report.csv", ) async def test_start_execution_errors_when_referenced_file_missing( @@ -545,7 +546,9 @@ async def test_start_execution_errors_when_referenced_file_missing( with ( patch("pathlib.Path.exists", return_value=True), patch("pathlib.Path.read_bytes", return_value=b"# ptc_server.py"), - patch("asyncio.create_subprocess_exec", new_callable=AsyncMock) as mock_proc, + patch( + "asyncio.create_subprocess_exec", new_callable=AsyncMock + ) as mock_proc, ): response = await ptc_service.start_execution( code="print('hello')", @@ -591,11 +594,14 @@ async def test_continue_uses_remaining_execution_timeout(self, ptc_service): ) ptc_service._paused_contexts[token] = ctx - with patch("time.monotonic", return_value=70.2), patch.object( - ptc_service, - "_read_ptc_response", - new_callable=AsyncMock, - ) as mock_read: + with ( + patch("time.monotonic", return_value=70.2), + patch.object( + ptc_service, + "_read_ptc_response", + new_callable=AsyncMock, + ) as mock_read, + ): mock_read.return_value = ProgrammaticExecResponse(status="completed") await ptc_service.continue_execution( continuation_token=token, @@ -809,3 +815,76 @@ async def test_cleanup_all(self, ptc_service): await ptc_service.cleanup_all() assert len(ptc_service._paused_contexts) == 0 + + +class TestStartExecutionLangRouting: + """start_execution(lang=...) must select the matching PTC server script + and create the sandbox in the matching language. + + We short-circuit at copy_content_to_sandbox so we don't have to set up + nsjail / unshare / a real subprocess just to verify the routing. + """ + + async def _exercise_start(self, lang: str): + ptc_service = ProgrammaticService() + + sandbox_info = SandboxInfo( + sandbox_id="sb-test", + sandbox_dir=Path("/tmp/sb-test"), + data_dir=Path("/tmp/sb-test/data"), + language=lang, + session_id="sess-1", + created_at=datetime.utcnow(), + repl_mode=False, + ) + ptc_service._sandbox_manager = MagicMock() + ptc_service._sandbox_manager.create_sandbox.return_value = sandbox_info + # Make copy_content_to_sandbox raise so we abort before nsjail/subprocess. + boom = RuntimeError("__short_circuit__") + ptc_service._sandbox_manager.copy_content_to_sandbox.side_effect = boom + + with patch("src.services.programmatic.Path") as mock_path_cls: + inst = mock_path_cls.return_value + inst.exists.return_value = True + inst.read_bytes.return_value = b"# fake script" + # Path("/opt") / filename should also resolve through the mock. + inst.__truediv__ = lambda self, other: inst + + response = await ptc_service.start_execution( + code="print('hi')" if lang == "py" else "echo hi", + tools=[], + session_id="sess-1", + lang=lang, + ) + + create_kwargs = ptc_service._sandbox_manager.create_sandbox.call_args.kwargs + copy_args = ptc_service._sandbox_manager.copy_content_to_sandbox.call_args.args + return response, create_kwargs, copy_args + + async def test_lang_py_routes_to_python_server(self): + response, create_kwargs, copy_args = await self._exercise_start("py") + assert create_kwargs["language"] == "py" + # 3rd positional arg is the destination path under /mnt/data. + assert copy_args[2] == "/mnt/data/ptc_server.py" + # We intentionally raised inside copy, so this is an error response — + # the routing assertions above are the real check. + assert response.status == "error" + + async def test_lang_bash_routes_to_bash_server(self): + response, create_kwargs, copy_args = await self._exercise_start("bash") + assert create_kwargs["language"] == "bash" + assert copy_args[2] == "/mnt/data/ptc_bash_server.py" + assert response.status == "error" + + async def test_invalid_lang_short_circuits_before_sandbox(self): + ptc_service = ProgrammaticService() + ptc_service._sandbox_manager = MagicMock() + + response = await ptc_service.start_execution( + code="x", tools=[], session_id="s", lang="ruby" + ) + + assert response.status == "error" + assert "Unsupported PTC lang" in (response.error or "") + # No sandbox creation attempt for an invalid lang. + ptc_service._sandbox_manager.create_sandbox.assert_not_called() diff --git a/tests/unit/test_ptc_bash_server.py b/tests/unit/test_ptc_bash_server.py new file mode 100644 index 0000000..46b2a40 --- /dev/null +++ b/tests/unit/test_ptc_bash_server.py @@ -0,0 +1,232 @@ +"""Local smoke tests for docker/ptc_bash_server.py. + +These are unit-level tests that run the bash PTC server as a subprocess +on the host (with PTC_BASH_DIR pointed at a temp dir). They verify the +end-to-end protocol — bash code calls the generated wrapper functions, +the server forwards them as `tool_calls` on its outer stdout, the test +sends `tool_results` back on stdin, and bash receives and prints the +result. + +Skipped automatically when `bash` or `jq` aren't on PATH. +""" + +import json +import os +import shutil +import subprocess +import sys +import time +from pathlib import Path + +import pytest + +_PTC_BASH_PATH = ( + Path(__file__).resolve().parent.parent.parent / "docker" / "ptc_bash_server.py" +) + +_REQUIRED = ("bash", "jq") +_missing = [b for b in _REQUIRED if shutil.which(b) is None] +pytestmark = pytest.mark.skipif( + bool(_missing), reason=f"Missing required binaries: {_missing}" +) + +DELIMITER = "\n---PTC_END---\n" + + +def _run_bash_ptc( + code: str, + tools: list, + tool_responder, + tmp_path: Path, + timeout: float = 15.0, +) -> dict: + """Spawn ptc_bash_server.py and drive its protocol from this process. + + `tool_responder(call)` is invoked for each tool_call message and must + return the JSON-serializable value to send back as the result. + """ + env = os.environ.copy() + env["PTC_BASH_DIR"] = str(tmp_path) + + proc = subprocess.Popen( + [sys.executable, str(_PTC_BASH_PATH)], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=env, + ) + + initial = json.dumps({"code": code, "tools": tools}) + DELIMITER + assert proc.stdin is not None + proc.stdin.write(initial.encode()) + proc.stdin.flush() + + deadline = time.monotonic() + timeout + out_buf = b"" + final_message: dict = {} + + try: + while time.monotonic() < deadline: + assert proc.stdout is not None + chunk = proc.stdout.read1(4096) + if not chunk: + if proc.poll() is not None: + break + continue + out_buf += chunk + + while DELIMITER.encode() in out_buf: + msg_bytes, _, rest = out_buf.partition(DELIMITER.encode()) + out_buf = rest + msg = json.loads(msg_bytes.decode()) + + if msg.get("type") == "tool_calls": + results = [] + for call in msg["calls"]: + try: + value = tool_responder(call) + results.append( + { + "call_id": call["id"], + "result": value, + "is_error": False, + } + ) + except Exception as exc: + results.append( + { + "call_id": call["id"], + "result": None, + "is_error": True, + "error_message": str(exc), + } + ) + response = ( + json.dumps({"type": "tool_results", "results": results}) + + DELIMITER + ) + proc.stdin.write(response.encode()) + proc.stdin.flush() + else: + final_message = msg + break + + if final_message: + break + + if not final_message: + proc.kill() + proc.wait(timeout=2) + raise AssertionError( + "ptc_bash_server did not return a final message before timeout. " + f"stderr: {proc.stderr.read().decode(errors='replace') if proc.stderr else ''}" + ) + finally: + try: + if proc.poll() is None: + proc.kill() + proc.wait(timeout=2) + except Exception: + pass + + return final_message + + +def test_no_tools_simple_echo(tmp_path): + """Bash code that doesn't call any tool should complete with stdout.""" + result = _run_bash_ptc( + code='echo "hello world"', + tools=[], + tool_responder=lambda call: None, + tmp_path=tmp_path, + ) + assert result["type"] == "completed" + assert "hello world" in result["stdout"] + + +def test_single_tool_call_round_trip(tmp_path): + """Bash function calls a tool, prints the JSON result.""" + code = """ +result=$(weather_lookup '{"city":"NYC"}') +echo "got: $result" +""" + calls_seen = [] + + def responder(call): + calls_seen.append(call) + # Return a JSON-serializable value + return {"temp": 72, "condition": "sunny"} + + tools = [{"name": "weather_lookup", "description": "Get weather"}] + result = _run_bash_ptc( + code=code, tools=tools, tool_responder=responder, tmp_path=tmp_path + ) + + assert result["type"] == "completed" + assert len(calls_seen) == 1 + assert calls_seen[0]["name"] == "weather_lookup" + assert calls_seen[0]["input"] == {"city": "NYC"} + # The bash code echoed the JSON result + assert '"temp"' in result["stdout"] + assert '"sunny"' in result["stdout"] + + +def test_multiple_sequential_tool_calls(tmp_path): + """Bash calls two different tools sequentially; both round-trip cleanly.""" + code = """ +a=$(get_temperature '{"city":"NYC"}') +b=$(get_humidity '{"city":"NYC"}') +echo "T=$a" +echo "H=$b" +""" + counter = {"i": 0} + + def responder(call): + counter["i"] += 1 + if call["name"] == "get_temperature": + return 72 + return 50 + + tools = [ + {"name": "get_temperature", "description": "Temp"}, + {"name": "get_humidity", "description": "Humid"}, + ] + result = _run_bash_ptc( + code=code, tools=tools, tool_responder=responder, tmp_path=tmp_path + ) + + assert result["type"] == "completed" + assert counter["i"] == 2 + assert "T=72" in result["stdout"] + assert "H=50" in result["stdout"] + + +def test_bash_nonzero_exit_returns_error(tmp_path): + """Bash code with `exit 7` should yield status=error with stderr.""" + code = "echo before; exit 7" + result = _run_bash_ptc( + code=code, tools=[], tool_responder=lambda c: None, tmp_path=tmp_path + ) + assert result["type"] == "error" + assert "exited with code 7" in result["error"] + assert "before" in result["stdout"] + + +def test_invalid_tool_name_not_wrapped(tmp_path): + """Tools whose names aren't valid bash identifiers are silently skipped. + + We assert the bash code can't call them — bash reports 'command not found' + on stderr but the script still completes (exit 127 since the last command + failed). + """ + code = "weird-name '{}'" + tools = [{"name": "weird-name", "description": "Has a hyphen"}] + result = _run_bash_ptc( + code=code, tools=tools, tool_responder=lambda c: None, tmp_path=tmp_path + ) + # Bash exits non-zero because the function isn't defined. + assert result["type"] == "error" + assert ( + "command not found" in result["stderr"].lower() + or "not found" in result["stderr"].lower() + ) diff --git a/tests/unit/test_ptc_name_normalization.py b/tests/unit/test_ptc_name_normalization.py new file mode 100644 index 0000000..a16ebbe --- /dev/null +++ b/tests/unit/test_ptc_name_normalization.py @@ -0,0 +1,78 @@ +"""Unit tests for PTC tool-name normalization. + +Both PTC servers (Python and Bash) accept arbitrary tool names from the host +but must turn them into legal identifiers in the language they expose. The +SDK runs the same normalization on the client when generating user code, so +the two halves must agree exactly. +""" + +import sys +from pathlib import Path + +import pytest + +# The docker/ scripts aren't a package — add the repo root so we can import +# them by module name. +_REPO_ROOT = Path(__file__).resolve().parent.parent.parent +sys.path.insert(0, str(_REPO_ROOT)) + + +from docker.ptc_server import _normalize_python_name # noqa: E402 +from docker.ptc_bash_server import _normalize_bash_name # noqa: E402 + + +class TestPythonNameNormalization: + """Python rules: replace [-\\s] with _, strip non-alnum/_, prefix _ for + leading digits, suffix _tool for keywords. Dots are stripped (not + replaced) — they fall through the strip step.""" + + def test_hyphen_replaced(self): + assert _normalize_python_name("my-tool") == "my_tool" + + def test_keyword_suffixed(self): + assert _normalize_python_name("for") == "for_tool" + + def test_leading_digit_prefixed(self): + assert _normalize_python_name("2fast") == "_2fast" + + def test_dot_stripped_not_replaced(self): + # Dots fall through the strip step (they're not in [-\s] and not + # in [a-zA-Z0-9_]) so they vanish entirely. + assert _normalize_python_name("my.tool") == "mytool" + + def test_space_replaced(self): + assert _normalize_python_name("my tool") == "my_tool" + + def test_async_keyword(self): + assert _normalize_python_name("async") == "async_tool" + + def test_already_valid_unchanged(self): + assert _normalize_python_name("good_name") == "good_name" + + +class TestBashNameNormalization: + """Bash rules: replace [-\\s.] with _ (note: dots ARE replaced), strip + non-alnum/_, prefix _ for leading digits, suffix _tool for reserved + words.""" + + def test_hyphen_replaced(self): + assert _normalize_bash_name("my-tool") == "my_tool" + + def test_reserved_suffixed(self): + assert _normalize_bash_name("if") == "if_tool" + + def test_dot_replaced_with_underscore(self): + # Different from Python: dots map to _, not stripped. + assert _normalize_bash_name("my.tool") == "my_tool" + + def test_leading_digit_prefixed(self): + assert _normalize_bash_name("2fast") == "_2fast" + + def test_function_keyword(self): + assert _normalize_bash_name("function") == "function_tool" + + def test_space_replaced(self): + assert _normalize_bash_name("my tool") == "my_tool" + + def test_already_valid_unchanged(self): + assert _normalize_bash_name("good_name") == "good_name" diff --git a/tests/unit/test_request_helpers.py b/tests/unit/test_request_helpers.py new file mode 100644 index 0000000..b633a7f --- /dev/null +++ b/tests/unit/test_request_helpers.py @@ -0,0 +1,120 @@ +"""Unit tests for request helper utilities.""" + +import base64 +from unittest.mock import MagicMock + +import pytest + +from src.utils.request_helpers import extract_api_key, get_client_ip + + +def _make_request(headers: dict) -> MagicMock: + """Build a minimal Request stub with case-insensitive header access.""" + request = MagicMock() + # Lower-case the header dict to mimic Starlette's case-insensitive Headers. + normalized = {k.lower(): v for k, v in headers.items()} + request.headers.get = lambda key, default=None: normalized.get(key.lower(), default) + return request + + +def _basic(token_pair: str) -> str: + """Build a `Basic ` Authorization header value.""" + return "Basic " + base64.b64encode(token_pair.encode()).decode() + + +class TestExtractApiKey: + def test_x_api_key_header_takes_precedence(self): + request = _make_request( + { + "x-api-key": "from-header", + "authorization": _basic("from-basic:"), + } + ) + assert extract_api_key(request) == "from-header" + + def test_x_api_key_header_only(self): + request = _make_request({"x-api-key": "the-key"}) + assert extract_api_key(request) == "the-key" + + def test_basic_auth_username_used_as_key(self): + request = _make_request({"authorization": _basic("the-key:")}) + assert extract_api_key(request) == "the-key" + + def test_basic_auth_username_with_password_prefers_username(self): + request = _make_request({"authorization": _basic("user-key:password")}) + assert extract_api_key(request) == "user-key" + + def test_basic_auth_password_only_falls_back_to_password(self): + request = _make_request({"authorization": _basic(":only-password")}) + assert extract_api_key(request) == "only-password" + + def test_basic_auth_case_insensitive_scheme(self): + request = _make_request( + {"authorization": "basic " + base64.b64encode(b"k:").decode()} + ) + assert extract_api_key(request) == "k" + request = _make_request( + {"authorization": "BASIC " + base64.b64encode(b"k:").decode()} + ) + assert extract_api_key(request) == "k" + + def test_bearer_auth_not_accepted(self): + request = _make_request({"authorization": "Bearer some-token"}) + assert extract_api_key(request) is None + + def test_apikey_auth_not_accepted(self): + request = _make_request({"authorization": "ApiKey some-token"}) + assert extract_api_key(request) is None + + def test_basic_auth_with_invalid_base64_returns_none(self): + request = _make_request({"authorization": "Basic !!!not-base64!!!"}) + assert extract_api_key(request) is None + + def test_basic_auth_with_empty_payload_returns_none(self): + request = _make_request( + {"authorization": "Basic " + base64.b64encode(b":").decode()} + ) + assert extract_api_key(request) is None + + def test_basic_auth_with_no_colon_treated_as_username(self): + # `partition(":")` on a string without ":" returns (whole, "", "") + request = _make_request( + {"authorization": "Basic " + base64.b64encode(b"justakey").decode()} + ) + assert extract_api_key(request) == "justakey" + + def test_no_headers_returns_none(self): + request = _make_request({}) + assert extract_api_key(request) is None + + def test_empty_x_api_key_header_falls_through_to_basic(self): + request = _make_request( + { + "x-api-key": "", + "authorization": _basic("from-basic:"), + } + ) + assert extract_api_key(request) == "from-basic" + + def test_basic_auth_unicode_in_key_decodes(self): + request = _make_request({"authorization": _basic("kéy:")}) + assert extract_api_key(request) == "kéy" + + +class TestGetClientIp: + """Light coverage so refactoring the helper module doesn't drop these.""" + + def test_x_forwarded_for_first_ip(self): + request = _make_request({"x-forwarded-for": "1.2.3.4, 5.6.7.8"}) + request.client = None + assert get_client_ip(request) == "1.2.3.4" + + def test_x_real_ip_fallback(self): + request = _make_request({"x-real-ip": "9.10.11.12"}) + request.client = None + assert get_client_ip(request) == "9.10.11.12" + + def test_unknown_when_no_source(self): + request = _make_request({}) + request.client = None + assert get_client_ip(request) == "unknown" diff --git a/tests/unit/test_runner_nested_paths.py b/tests/unit/test_runner_nested_paths.py new file mode 100644 index 0000000..88c376d --- /dev/null +++ b/tests/unit/test_runner_nested_paths.py @@ -0,0 +1,560 @@ +"""Unit tests for ExecutionRunner nested-path handling. + +Covers the two filesystem-touching points where subdirectory structure must +survive the round-trip: + - `_detect_generated_files` (output side: scan /mnt/data recursively) + - `_mount_files_to_sandbox` (input side: create parent dirs before writing) + +Both are exercised against real temporary directories so we don't have to +mock the os.walk / mkdir / chown call graph. +""" + +import asyncio +import os +from pathlib import Path +from types import SimpleNamespace +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from src.services.execution.runner import CodeExecutionRunner + + +@pytest.fixture +def runner(): + """Build a runner with the dependencies it needs for these methods stubbed.""" + return CodeExecutionRunner( + sandbox_manager=MagicMock(), + sandbox_pool=None, + ) + + +def _sandbox_info(tmp_path: Path): + """Minimal SandboxInfo-shaped object with a real data_dir.""" + data_dir = tmp_path / "data" + data_dir.mkdir() + return SimpleNamespace( + sandbox_id="test-sandbox-id", + data_dir=data_dir, + repl_mode=False, + # _detect_generated_files reads this to skip unchanged mounted files + # and surface in-place edits. Empty for tests that don't exercise mounts. + mounted_file_stats={}, + ) + + +class TestDetectGeneratedFilesRecursive: + async def test_walks_subdirectories(self, runner, tmp_path): + info = _sandbox_info(tmp_path) + # Top-level + one nested + two-deep nested + (info.data_dir / "top.png").write_bytes(b"x") + (info.data_dir / "charts").mkdir() + (info.data_dir / "charts" / "out.png").write_bytes(b"y") + (info.data_dir / "charts" / "sub").mkdir() + (info.data_dir / "charts" / "sub" / "deep.txt").write_bytes(b"z") + + files = await runner._detect_generated_files(info) + + names = sorted(f["path"] for f in files) + assert names == [ + "/mnt/data/charts/out.png", + "/mnt/data/charts/sub/deep.txt", + "/mnt/data/top.png", + ] + + async def test_skips_hidden_files_and_dirs(self, runner, tmp_path): + info = _sandbox_info(tmp_path) + (info.data_dir / "visible.txt").write_bytes(b"v") + (info.data_dir / ".hidden_file").write_bytes(b"h") + (info.data_dir / ".hidden_dir").mkdir() + (info.data_dir / ".hidden_dir" / "inside.txt").write_bytes(b"i") + + files = await runner._detect_generated_files(info) + + paths = [f["path"] for f in files] + assert "/mnt/data/visible.txt" in paths + assert all(".hidden" not in p for p in paths) + + async def test_skips_code_source_files(self, runner, tmp_path): + info = _sandbox_info(tmp_path) + (info.data_dir / "code.py").write_bytes(b"# code") + (info.data_dir / "Code.java").write_bytes(b"// Code") + (info.data_dir / "actual_output.txt").write_bytes(b"out") + + files = await runner._detect_generated_files(info) + + paths = [f["path"] for f in files] + assert paths == ["/mnt/data/actual_output.txt"] + + async def test_results_sorted_for_stability(self, runner, tmp_path): + info = _sandbox_info(tmp_path) + for name in ["zeta.txt", "alpha.txt", "mid.txt"]: + (info.data_dir / name).write_bytes(b"x") + + files = await runner._detect_generated_files(info) + + paths = [f["path"] for f in files] + assert paths == sorted(paths) + + async def test_oversized_files_excluded(self, runner, tmp_path): + info = _sandbox_info(tmp_path) + (info.data_dir / "small.txt").write_bytes(b"x") + + with patch("src.services.execution.runner.settings") as ms: + ms.max_file_size_mb = 0 # cap to 0 bytes -> nothing fits + ms.max_output_files = 50 + files = await runner._detect_generated_files(info) + + assert files == [] + + async def test_max_output_files_applied_after_sort(self, runner, tmp_path): + info = _sandbox_info(tmp_path) + for name in ["c.txt", "a.txt", "b.txt", "d.txt"]: + (info.data_dir / name).write_bytes(b"x") + + with patch("src.services.execution.runner.settings") as ms: + ms.max_file_size_mb = 100 + ms.max_output_files = 2 + files = await runner._detect_generated_files(info) + + # First two after sorting alphabetically + assert [f["path"] for f in files] == ["/mnt/data/a.txt", "/mnt/data/b.txt"] + + async def test_skips_node_modules(self, runner, tmp_path): + """A user file at the top level should be detected; the entire + node_modules tree (which can contain tens of thousands of files + from one `npm install`) should be ignored entirely.""" + info = _sandbox_info(tmp_path) + (info.data_dir / "user_output.png").write_bytes(b"x") + nm = info.data_dir / "node_modules" + nm.mkdir() + (nm / "package1").mkdir() + (nm / "package1" / "index.js").write_bytes(b"// pkg") + (nm / "package1" / "README.md").write_bytes(b"# readme") + (nm / "package2").mkdir() + (nm / "package2" / "index.js").write_bytes(b"// pkg2") + + files = await runner._detect_generated_files(info) + paths = [f["path"] for f in files] + + assert "/mnt/data/user_output.png" in paths + assert all("node_modules" not in p for p in paths), paths + + async def test_skips_pycache_and_other_dep_dirs(self, runner, tmp_path): + info = _sandbox_info(tmp_path) + (info.data_dir / "report.csv").write_bytes(b"data") + for skip in ("__pycache__", ".venv", "target", "dist", "build"): + d = info.data_dir / skip + d.mkdir() + (d / "junk.bin").write_bytes(b"x" * 100) + + files = await runner._detect_generated_files(info) + paths = [f["path"] for f in files] + + assert paths == ["/mnt/data/report.csv"] + + async def test_includes_user_subdirs_that_arent_dep_caches(self, runner, tmp_path): + """Don't over-exclude — `charts/`, `data/`, etc. are user content.""" + info = _sandbox_info(tmp_path) + (info.data_dir / "charts").mkdir() + (info.data_dir / "charts" / "out.png").write_bytes(b"png") + (info.data_dir / "data").mkdir() + (info.data_dir / "data" / "rows.csv").write_bytes(b"csv") + + files = await runner._detect_generated_files(info) + paths = sorted(f["path"] for f in files) + + assert paths == [ + "/mnt/data/charts/out.png", + "/mnt/data/data/rows.csv", + ] + + +class TestDetectGeneratedFilesInPlaceEdits: + """The mtime/size snapshot stored in `sandbox_info.mounted_file_stats` + drives whether a mounted file gets surfaced as a generated file. This + is the iteration-killer fix: edits to mounted scripts must produce a + new file_id so LibreChat tracks the edit on its next call.""" + + async def test_unchanged_mounted_file_is_skipped(self, runner, tmp_path): + info = _sandbox_info(tmp_path) + f = info.data_dir / "demo_deck.js" + f.write_bytes(b"// v1 content\n") + st = os.stat(f) + info.mounted_file_stats = { + "demo_deck.js": ( + st.st_mtime_ns, + st.st_size, + "file_id_123", + "session_123", + None, + ) + } + + files = await runner._detect_generated_files(info) + paths = [f["path"] for f in files] + + # No edit happened -> the mounted file comes back as "inherited" + # rather than as a regenerated artifact, so the path will appear + # but with inherited=True. Confirm it is marked inherited. + assert paths == ["/mnt/data/demo_deck.js"] + assert files[0]["inherited"] is True + assert files[0]["original_file_id"] == "file_id_123" + assert files[0]["original_session_id"] == "session_123" + + async def test_edited_mounted_file_is_surfaced(self, runner, tmp_path): + info = _sandbox_info(tmp_path) + f = info.data_dir / "demo_deck.js" + f.write_bytes(b"// v1 content\n") + st = os.stat(f) + info.mounted_file_stats = { + "demo_deck.js": ( + st.st_mtime_ns, + st.st_size, + "file_id_123", + "session_123", + None, + ) + } + + # Simulate user code editing the file in place. Touching mtime is + # enough since size also changes here, but we'd want to detect either. + import time + + time.sleep(0.01) # ensure mtime_ns advances on coarse-grained FS + f.write_bytes(b"// v2 content with extra bytes\n") + + files = await runner._detect_generated_files(info) + paths = [f["path"] for f in files] + + # Edited mounted file is now surfaced as a generated file. + # Orchestrator will create a new file_id for it. + assert paths == ["/mnt/data/demo_deck.js"] + + async def test_size_change_is_detected_even_if_mtime_unchanged( + self, runner, tmp_path + ): + """Defensive: if mtime is somehow preserved but size differs, + treat as edited.""" + info = _sandbox_info(tmp_path) + f = info.data_dir / "report.csv" + f.write_bytes(b"col1\n") + st = os.stat(f) + # Pretend the prior snapshot had a different size at the same mtime. + info.mounted_file_stats = { + "report.csv": ( + st.st_mtime_ns, + st.st_size + 100, + "file_id_456", + "session_456", + None, + ) + } + + files = await runner._detect_generated_files(info) + paths = [f["path"] for f in files] + + assert paths == ["/mnt/data/report.csv"] + + async def test_nested_mounted_file_edit_is_surfaced(self, runner, tmp_path): + """Mounted file at a nested path (e.g. skills/foo/SKILL.md) — edit + detection must work whether the snapshot key is the rel path or the + basename.""" + info = _sandbox_info(tmp_path) + sub = info.data_dir / "skills" / "weather" + sub.mkdir(parents=True) + f = sub / "SKILL.md" + f.write_bytes(b"# v1\n") + st = os.stat(f) + stat_tuple = ( + st.st_mtime_ns, + st.st_size, + "file_id_789", + "session_789", + None, + ) + info.mounted_file_stats = { + "skills/weather/SKILL.md": stat_tuple, + "SKILL.md": stat_tuple, + } + + # No change: surfaced as inherited (not skipped under the new + # inherited-passthrough behavior). + unchanged = await runner._detect_generated_files(info) + assert len(unchanged) == 1 + assert unchanged[0]["inherited"] is True + assert unchanged[0]["path"] == "/mnt/data/skills/weather/SKILL.md" + + # Edit: surfaced. + import time + + time.sleep(0.01) + f.write_bytes(b"# v2 content edited\n") + files = await runner._detect_generated_files(info) + paths = [f["path"] for f in files] + assert paths == ["/mnt/data/skills/weather/SKILL.md"] + + async def test_new_file_alongside_unchanged_mount(self, runner, tmp_path): + """A truly-new file is detected even when an unchanged mount sits + next to it.""" + info = _sandbox_info(tmp_path) + existing = info.data_dir / "input.csv" + existing.write_bytes(b"data") + st = os.stat(existing) + info.mounted_file_stats = { + "input.csv": ( + st.st_mtime_ns, + st.st_size, + "input_file_id", + "input_session", + None, + ) + } + + # User code generates a new artifact. + (info.data_dir / "output.png").write_bytes(b"png") + + files = await runner._detect_generated_files(info) + # Inherited (input.csv) + new (output.png). + by_path = {f["path"]: f for f in files} + assert set(by_path.keys()) == { + "/mnt/data/input.csv", + "/mnt/data/output.png", + } + assert by_path["/mnt/data/input.csv"]["inherited"] is True + assert "inherited" not in by_path["/mnt/data/output.png"] + + +class TestMountFilesNestedPaths: + """The mount path is harder to fully exercise because it pulls bytes from + S3. We patch FileService.stream_file_to_path and just confirm that + parent directories are created at the right nested location.""" + + async def test_nested_filename_creates_parent_dirs(self, runner, tmp_path): + info = _sandbox_info(tmp_path) + + # Avoid os.chown (would need root); patch the perm helpers to no-op. + async def _fake_stream(session_id, file_id, dest_path): + Path(dest_path).write_bytes(b"hello") + return True + + with patch("src.services.file.FileService") as MockFS, patch( + "src.services.execution.runner.os.chown" + ), patch("src.services.execution.runner.os.chmod"): + instance = MockFS.return_value + instance.stream_file_to_path = AsyncMock(side_effect=_fake_stream) + + files = [ + { + "filename": "skills/foo/SKILL.md", + "file_id": "fid-1", + "session_id": "sid-1", + "size": 10, + } + ] + await runner._mount_files_to_sandbox(info, files, language="py") + + landed = info.data_dir / "skills" / "foo" / "SKILL.md" + assert landed.is_file() + assert landed.read_bytes() == b"hello" + + async def test_top_level_filename_unchanged(self, runner, tmp_path): + info = _sandbox_info(tmp_path) + + async def _fake_stream(session_id, file_id, dest_path): + Path(dest_path).write_bytes(b"data") + return True + + with patch("src.services.file.FileService") as MockFS, patch( + "src.services.execution.runner.os.chown" + ), patch("src.services.execution.runner.os.chmod"): + instance = MockFS.return_value + instance.stream_file_to_path = AsyncMock(side_effect=_fake_stream) + + files = [ + { + "filename": "data.csv", + "file_id": "fid", + "session_id": "sid", + "size": 4, + } + ] + await runner._mount_files_to_sandbox(info, files, language="py") + + landed = info.data_dir / "data.csv" + assert landed.is_file() + + +class TestDetectGeneratedFilesInheritance: + """Direct coverage of the inherited / modified_from / new branches in + _detect_generated_files. These determine what the orchestrator hands back + to clients on the response.""" + + async def test_unchanged_mount_emits_inherited_with_lineage(self, runner, tmp_path): + info = _sandbox_info(tmp_path) + f = info.data_dir / "data.csv" + f.write_bytes(b"col1\n1\n") + st = os.stat(f) + info.mounted_file_stats = { + "data.csv": ( + st.st_mtime_ns, + st.st_size, + "orig-fid", + "orig-sess", + "agent-xyz", + ) + } + + files = await runner._detect_generated_files(info) + assert len(files) == 1 + info_ = files[0] + assert info_["inherited"] is True + assert info_["original_file_id"] == "orig-fid" + assert info_["original_session_id"] == "orig-sess" + assert info_["original_entity_id"] == "agent-xyz" + + async def test_edited_mount_emits_modified_from(self, runner, tmp_path): + import time + + info = _sandbox_info(tmp_path) + f = info.data_dir / "report.csv" + f.write_bytes(b"v1\n") + st = os.stat(f) + info.mounted_file_stats = { + "report.csv": ( + st.st_mtime_ns, + st.st_size, + "orig-fid", + "orig-sess", + None, + ) + } + + time.sleep(0.01) + f.write_bytes(b"v2 content extended\n") + + files = await runner._detect_generated_files(info) + assert len(files) == 1 + info_ = files[0] + assert info_.get("inherited") is None + assert info_["modified_from_id"] == "orig-fid" + assert info_["modified_from_session_id"] == "orig-sess" + + async def test_new_unmounted_file_has_no_lineage(self, runner, tmp_path): + info = _sandbox_info(tmp_path) + (info.data_dir / "fresh.png").write_bytes(b"png-bytes") + + files = await runner._detect_generated_files(info) + assert len(files) == 1 + info_ = files[0] + assert "inherited" not in info_ + assert "modified_from_id" not in info_ + assert "original_file_id" not in info_ + + async def test_inherited_files_not_counted_against_budget( + self, runner, tmp_path, monkeypatch + ): + """Inherited files bypass `max_output_files`. Force the budget to 1 + and confirm that an inherited file plus a generated file both come + back.""" + from src.services.execution import runner as runner_mod + + monkeypatch.setattr(runner_mod.settings, "max_output_files", 1) + + info = _sandbox_info(tmp_path) + mounted = info.data_dir / "input.csv" + mounted.write_bytes(b"col1\n") + st = os.stat(mounted) + info.mounted_file_stats = { + "input.csv": ( + st.st_mtime_ns, + st.st_size, + "orig-fid", + "orig-sess", + None, + ) + } + # Two new generated files; budget=1 should clip the generated set + # to one but the inherited file must still come back. + (info.data_dir / "out_a.png").write_bytes(b"a") + (info.data_dir / "out_b.png").write_bytes(b"b") + + files = await runner._detect_generated_files(info) + inherited = [f for f in files if f.get("inherited")] + non_inherited = [f for f in files if not f.get("inherited")] + assert len(inherited) == 1 + assert inherited[0]["path"] == "/mnt/data/input.csv" + assert len(non_inherited) == 1 + + +class TestSetFilePerms: + """`_set_file_perms` is a closure inside `_mount_files_to_sandbox`. We + don't have direct access to it, so we exercise the chmod choice through + the public mount path with `read_only` toggled.""" + + async def test_read_only_false_sets_644(self, runner, tmp_path): + from unittest.mock import MagicMock as _MM + + info = _sandbox_info(tmp_path) + chmod_calls = [] + + def _capture_chmod(path, mode): + chmod_calls.append((path, mode)) + + async def _fake_stream(session_id, file_id, dest_path): + Path(dest_path).write_bytes(b"data") + return True + + with patch("src.services.file.FileService") as MockFS, patch( + "src.services.execution.runner.os.chown" + ), patch("src.services.execution.runner.os.chmod", side_effect=_capture_chmod): + MockFS.return_value.stream_file_to_path = AsyncMock( + side_effect=_fake_stream + ) + files = [ + { + "filename": "data.csv", + "file_id": "fid", + "session_id": "sid", + "size": 4, + "is_read_only": False, + } + ] + await runner._mount_files_to_sandbox(info, files, language="py") + + # The data file gets chmod 0o644 in the writable case. + modes = [mode for path, mode in chmod_calls if path.endswith("data.csv")] + assert 0o644 in modes + assert 0o444 not in modes + + async def test_read_only_true_sets_444(self, runner, tmp_path): + info = _sandbox_info(tmp_path) + chmod_calls = [] + + def _capture_chmod(path, mode): + chmod_calls.append((path, mode)) + + async def _fake_stream(session_id, file_id, dest_path): + Path(dest_path).write_bytes(b"data") + return True + + with patch("src.services.file.FileService") as MockFS, patch( + "src.services.execution.runner.os.chown" + ), patch("src.services.execution.runner.os.chmod", side_effect=_capture_chmod): + MockFS.return_value.stream_file_to_path = AsyncMock( + side_effect=_fake_stream + ) + files = [ + { + "filename": "data.csv", + "file_id": "fid", + "session_id": "sid", + "size": 4, + "is_read_only": True, + } + ] + await runner._mount_files_to_sandbox(info, files, language="py") + + modes = [mode for path, mode in chmod_calls if path.endswith("data.csv")] + assert 0o444 in modes + assert 0o644 not in modes diff --git a/tests/unit/test_sandbox_executor.py b/tests/unit/test_sandbox_executor.py index cabe9f1..0011951 100644 --- a/tests/unit/test_sandbox_executor.py +++ b/tests/unit/test_sandbox_executor.py @@ -100,10 +100,22 @@ def test_fortran_env(self): assert env["FC"] == "gfortran" def test_unknown_language_has_base_env(self): - """Test unknown language gets base env only.""" - config = NsjailConfig() - executor = SandboxExecutor(config) - env = executor._build_sanitized_env("unknown") + """Test unknown language gets base env only. + + Note: when ENABLE_SANDBOX_NETWORK=true the network section adds + HTTPS_PROXY + per-language install paths regardless of `language` + (so bash skills can pip/npm install). Disable that toggle here so + the test pins the original "unknown lang -> base env only" intent. + """ + from unittest.mock import patch + + with patch("src.services.sandbox.executor.settings") as ms: + ms.enable_sandbox_network = False + ms.skill_deps_path = "/opt/skill-deps" + config = NsjailConfig() + executor = SandboxExecutor(config) + env = executor._build_sanitized_env("unknown") + assert "PATH" in env assert "HOME" in env assert "TMPDIR" in env diff --git a/tests/unit/test_sandbox_manager.py b/tests/unit/test_sandbox_manager.py index 1be081f..54831c8 100644 --- a/tests/unit/test_sandbox_manager.py +++ b/tests/unit/test_sandbox_manager.py @@ -65,7 +65,9 @@ def test_get_initialization_error_from_init(self): manager._executor = MagicMock() manager._base_dir = Path("/tmp/test-sandboxes") manager._initialization_error = "Failed to create directory" - assert manager.get_initialization_error() == "Failed to create directory" + assert ( + manager.get_initialization_error() == "Failed to create directory" + ) class TestSandboxLifecycle: @@ -73,8 +75,7 @@ class TestSandboxLifecycle: def test_create_sandbox_creates_directory(self, tmp_path): """Test create_sandbox creates the data directory.""" - with patch("shutil.which", return_value="/usr/bin/nsjail"), \ - patch("os.chown"): + with patch("shutil.which", return_value="/usr/bin/nsjail"), patch("os.chown"): with patch.object(SandboxManager, "__init__", lambda self: None): manager = SandboxManager() manager._nsjail_config = MagicMock() @@ -90,8 +91,7 @@ def test_create_sandbox_creates_directory(self, tmp_path): def test_create_sandbox_sets_repl_mode(self, tmp_path): """Test create_sandbox sets repl_mode correctly.""" - with patch("shutil.which", return_value="/usr/bin/nsjail"), \ - patch("os.chown"): + with patch("shutil.which", return_value="/usr/bin/nsjail"), patch("os.chown"): with patch.object(SandboxManager, "__init__", lambda self: None): manager = SandboxManager() manager._nsjail_config = MagicMock() @@ -105,8 +105,7 @@ def test_create_sandbox_sets_repl_mode(self, tmp_path): def test_create_sandbox_sets_labels(self, tmp_path): """Test create_sandbox sets appropriate labels.""" - with patch("shutil.which", return_value="/usr/bin/nsjail"), \ - patch("os.chown"): + with patch("shutil.which", return_value="/usr/bin/nsjail"), patch("os.chown"): with patch.object(SandboxManager, "__init__", lambda self: None): manager = SandboxManager() manager._nsjail_config = MagicMock() @@ -122,8 +121,7 @@ def test_create_sandbox_sets_labels(self, tmp_path): def test_create_sandbox_generates_unique_ids(self, tmp_path): """Test create_sandbox generates unique sandbox IDs.""" - with patch("shutil.which", return_value="/usr/bin/nsjail"), \ - patch("os.chown"): + with patch("shutil.which", return_value="/usr/bin/nsjail"), patch("os.chown"): with patch.object(SandboxManager, "__init__", lambda self: None): manager = SandboxManager() manager._nsjail_config = MagicMock() @@ -138,8 +136,7 @@ def test_create_sandbox_generates_unique_ids(self, tmp_path): def test_destroy_sandbox_removes_directory(self, tmp_path): """Test destroy_sandbox removes the sandbox directory.""" - with patch("shutil.which", return_value="/usr/bin/nsjail"), \ - patch("os.chown"): + with patch("shutil.which", return_value="/usr/bin/nsjail"), patch("os.chown"): with patch.object(SandboxManager, "__init__", lambda self: None): manager = SandboxManager() manager._nsjail_config = MagicMock() @@ -156,8 +153,7 @@ def test_destroy_sandbox_removes_directory(self, tmp_path): def test_destroy_sandbox_nonexistent_returns_true(self, tmp_path): """Test destroying a non-existent sandbox returns True.""" - with patch("shutil.which", return_value="/usr/bin/nsjail"), \ - patch("os.chown"): + with patch("shutil.which", return_value="/usr/bin/nsjail"), patch("os.chown"): with patch.object(SandboxManager, "__init__", lambda self: None): manager = SandboxManager() manager._nsjail_config = MagicMock() @@ -186,9 +182,9 @@ class TestFileOperations: def test_copy_content_to_sandbox(self, tmp_path): """Test writing content to a sandbox.""" - with patch("shutil.which", return_value="/usr/bin/nsjail"), \ - patch("os.chown"), \ - patch("os.chmod"): + with patch("shutil.which", return_value="/usr/bin/nsjail"), patch( + "os.chown" + ), patch("os.chmod"): with patch.object(SandboxManager, "__init__", lambda self: None): manager = SandboxManager() manager._nsjail_config = MagicMock() @@ -203,11 +199,11 @@ def test_copy_content_to_sandbox(self, tmp_path): assert result is True assert (info.data_dir / "test.txt").read_bytes() == b"hello world" - def test_copy_content_extracts_filename(self, tmp_path): - """Test that copy extracts filename from full path.""" - with patch("shutil.which", return_value="/usr/bin/nsjail"), \ - patch("os.chown"), \ - patch("os.chmod"): + def test_copy_content_preserves_subdirectories(self, tmp_path): + """Subdirectories under /mnt/data/ are preserved (LibreChat skill bundles).""" + with patch("shutil.which", return_value="/usr/bin/nsjail"), patch( + "os.chown" + ), patch("os.chmod"): with patch.object(SandboxManager, "__init__", lambda self: None): manager = SandboxManager() manager._nsjail_config = MagicMock() @@ -220,14 +216,14 @@ def test_copy_content_extracts_filename(self, tmp_path): info, b"data", "/mnt/data/subdir/file.txt", "py" ) assert result is True - # Should extract just the filename - assert (info.data_dir / "file.txt").read_bytes() == b"data" + # Subdirectory is preserved; parent dir is created automatically. + assert (info.data_dir / "subdir" / "file.txt").read_bytes() == b"data" def test_get_file_content_from_sandbox(self, tmp_path): """Test reading content from a sandbox.""" - with patch("shutil.which", return_value="/usr/bin/nsjail"), \ - patch("os.chown"), \ - patch("os.chmod"): + with patch("shutil.which", return_value="/usr/bin/nsjail"), patch( + "os.chown" + ), patch("os.chmod"): with patch.object(SandboxManager, "__init__", lambda self: None): manager = SandboxManager() manager._nsjail_config = MagicMock() @@ -244,8 +240,7 @@ def test_get_file_content_from_sandbox(self, tmp_path): def test_get_file_content_not_found(self, tmp_path): """Test reading non-existent file returns None.""" - with patch("shutil.which", return_value="/usr/bin/nsjail"), \ - patch("os.chown"): + with patch("shutil.which", return_value="/usr/bin/nsjail"), patch("os.chown"): with patch.object(SandboxManager, "__init__", lambda self: None): manager = SandboxManager() manager._nsjail_config = MagicMock() @@ -261,8 +256,7 @@ def test_get_file_content_not_found(self, tmp_path): def test_get_file_content_mnt_data_prefix(self, tmp_path): """Test reading file with /mnt/data/ prefix.""" - with patch("shutil.which", return_value="/usr/bin/nsjail"), \ - patch("os.chown"): + with patch("shutil.which", return_value="/usr/bin/nsjail"), patch("os.chown"): with patch.object(SandboxManager, "__init__", lambda self: None): manager = SandboxManager() manager._nsjail_config = MagicMock() diff --git a/tests/unit/test_upload_read_only.py b/tests/unit/test_upload_read_only.py new file mode 100644 index 0000000..d044ca5 --- /dev/null +++ b/tests/unit/test_upload_read_only.py @@ -0,0 +1,117 @@ +"""Unit tests for `read_only` form-field handling on /upload/batch. + +LibreChat sends `read_only=true` on skill-prime batch uploads. The endpoint +extracts it inline (`src/api/files.py:253-258`), passes it through to +`FileService.store_uploaded_file`, and the service stores it in Redis +metadata as `is_read_only="1"` (or `"0"`). The orchestrator later reads +that key when building the mounted-file dict. + +We don't spin up the API; instead we exercise: + - the same `read_only_raw` parsing expression with the inputs the API + passes through (string, missing, casing variants); and + - the service-side metadata write so the round-trip matches what the + runner / orchestrator expects. +""" + +from io import BytesIO +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + + +def _parse_read_only(form_value): + """Mirror the inline expression at `src/api/files.py:253-258`.""" + return isinstance(form_value, str) and form_value.lower() in ("1", "true", "yes") + + +class TestReadOnlyFormParsing: + """The endpoint accepts the form value as-is from `form.get("read_only")`, + so the parser must handle bare strings, missing values, and case.""" + + def test_read_only_true_string(self): + assert _parse_read_only("true") is True + + def test_read_only_true_uppercase(self): + assert _parse_read_only("TRUE") is True + + def test_read_only_one(self): + assert _parse_read_only("1") is True + + def test_read_only_yes(self): + assert _parse_read_only("yes") is True + + def test_read_only_false_string(self): + assert _parse_read_only("false") is False + + def test_read_only_zero(self): + assert _parse_read_only("0") is False + + def test_read_only_missing_returns_false(self): + # form.get() returns None when the field isn't present. + assert _parse_read_only(None) is False + + def test_read_only_arbitrary_string(self): + assert _parse_read_only("maybe") is False + + +class TestStoreUploadedFileReadOnlyMetadata: + """`FileService.store_uploaded_file` is the boundary between the API + parsing and Redis storage — assert the metadata write reflects + `is_read_only`.""" + + @pytest.fixture + def file_service(self): + from src.services.file import FileService + + svc = FileService() + svc._ensure_bucket_exists = AsyncMock() + svc.s3_client = MagicMock() + svc.s3_client.put_object = MagicMock() + svc._store_file_metadata = AsyncMock() + return svc + + @pytest.mark.asyncio + async def test_read_only_true_stored_as_1(self, file_service): + await file_service.store_uploaded_file( + session_id="s", + filename="data.csv", + content=b"x", + is_read_only=True, + ) + meta = file_service._store_file_metadata.call_args.args[2] + assert meta["is_read_only"] == "1" + + @pytest.mark.asyncio + async def test_read_only_false_stored_as_0(self, file_service): + await file_service.store_uploaded_file( + session_id="s", + filename="data.csv", + content=b"x", + is_read_only=False, + ) + meta = file_service._store_file_metadata.call_args.args[2] + assert meta["is_read_only"] == "0" + + @pytest.mark.asyncio + async def test_read_only_default_is_false(self, file_service): + # No `is_read_only` passed — defaults to False unless is_agent_file. + await file_service.store_uploaded_file( + session_id="s", + filename="data.csv", + content=b"x", + ) + meta = file_service._store_file_metadata.call_args.args[2] + assert meta["is_read_only"] == "0" + + @pytest.mark.asyncio + async def test_agent_file_implies_read_only(self, file_service): + """`is_agent_file=True` (skill prime) implies read-only even when + `is_read_only` isn't passed explicitly.""" + await file_service.store_uploaded_file( + session_id="s", + filename="SKILL.md", + content=b"# skill", + is_agent_file=True, + ) + meta = file_service._store_file_metadata.call_args.args[2] + assert meta["is_read_only"] == "1"