From 64f285f57ca14c26419173d6d52fa099ccddaca1 Mon Sep 17 00:00:00 2001 From: nprodromou <73134621+nprodromou@users.noreply.github.com> Date: Wed, 6 May 2026 22:39:51 -0700 Subject: [PATCH 1/3] refactor: multi-agent build (codex|claude) + auto-resume + agent-config sync MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Genericizes the image so the same Dockerfile produces both the codex variant and a new claude variant, each with its own GHCR tag. Dockerfile - ARG AGENT=codex|claude validated at build time - Per-agent npm install (@openai/codex vs @anthropic-ai/claude-code) - Non-root user named after AGENT (uid/gid 1000), HOME=/home/${AGENT} - npm upgraded to latest after Node install — NodeSource lags Entrypoint - Per-agent env-var contract: codex requires GH_TOKEN + optional CODEX_SESSION (seeds auth.json); claude requires only GH_TOKEN (interactive /login on first connect, persists on PVC) - Auto-resume on connect: `codex resume --last` / `claude --continue`, falling through to a fresh agent run if no session exists, then to bash if the agent exits - Pulls nprodromou/agent-config and symlinks instructions/CLAUDE.md into the agent's expected path: ~/.codex/AGENTS.md or ~/.claude/CLAUDE.md. Pulls fresh on every restart, no image rebuild needed for instruction updates. Workflow - Build matrix on agent: [codex, claude] - Per-agent tags only (no shared :latest): codex-latest, claude-latest, sha-XXXXX-codex, sha-XXXXX-claude, etc. Fresh tag namespace avoids the kubelet/containerd cache wedge that froze :latest on the older bubblewrap-less image. - Independent gha cache scopes per agent (cache-from/cache-to) Apk8s manifests still need updating to point at codex-latest (instead of :latest) and to add a parallel claude-cli app — separate follow-up PR. --- .github/workflows/build.yml | 26 +++++-- Dockerfile | 70 ++++++++++------- bin/entrypoint.sh | 148 ++++++++++++++++++++++++------------ 3 files changed, 158 insertions(+), 86 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 2063c9a..9cc988e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -14,6 +14,12 @@ env: jobs: build: runs-on: ubuntu-latest + strategy: + # Don't cancel the other agent on a single failure — better to know + # which one broke and which one shipped. + fail-fast: false + matrix: + agent: [codex, claude] permissions: contents: read packages: write @@ -36,13 +42,15 @@ jobs: uses: docker/metadata-action@v5 with: images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + flavor: | + latest=false tags: | - type=ref,event=branch - type=ref,event=pr - type=semver,pattern={{version}} - type=semver,pattern={{major}}.{{minor}} - type=sha,prefix=sha-,format=short - type=raw,value=latest,enable={{is_default_branch}} + type=raw,value=${{ matrix.agent }}-latest,enable=${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }} + type=ref,event=branch,suffix=-${{ matrix.agent }} + type=ref,event=pr,suffix=-${{ matrix.agent }} + type=sha,prefix=sha-,suffix=-${{ matrix.agent }},format=short + type=semver,pattern={{version}},suffix=-${{ matrix.agent }} + type=semver,pattern={{major}}.{{minor}},suffix=-${{ matrix.agent }} - name: Build and push uses: docker/build-push-action@v6 @@ -50,7 +58,9 @@ jobs: context: . platforms: linux/amd64 push: ${{ github.event_name != 'pull_request' }} + build-args: | + AGENT=${{ matrix.agent }} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} - cache-from: type=gha - cache-to: type=gha,mode=max + cache-from: type=gha,scope=${{ matrix.agent }} + cache-to: type=gha,mode=max,scope=${{ matrix.agent }} diff --git a/Dockerfile b/Dockerfile index 815e303..71d0bfa 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,10 +1,13 @@ -# Codex CLI agent image — single-instance codex-cli runtime for apk8s. +# Multi-agent browser shell image — single-instance ttyd-fronted +# terminal that auto-launches an LLM coding agent. Built once per agent +# via the AGENT build arg (codex|claude). Each agent variant gets its +# own image tag (codex-latest, claude-latest) and is deployed as a +# separate pod in apk8s under kubernetes/apps/agents/-cli. # -# Runs ttyd → bash with codex-cli on PATH (codex-cli in the browser). # Identity is provided at runtime via env vars sourced from a k8s Secret # backed by 1Password (deploy vault, typically `Kubernetes`). gh + git -# are configured by the entrypoint script so commits/PRs from inside the -# pod attribute to codex-prodromou. +# are configured by the entrypoint so commits/PRs from inside the pod +# attribute to -prodromou. # # code-server (VS Code in browser) is intentionally NOT installed here — # that is a separate concern tracked by WOVED-35. @@ -12,16 +15,23 @@ FROM debian:bookworm-slim ARG NODE_VERSION=22 +ARG AGENT=codex -ENV DEBIAN_FRONTEND=noninteractive \ +# Validate AGENT early so an unsupported value fails the build cleanly. +RUN case "$AGENT" in codex|claude) ;; \ + *) echo "Unsupported AGENT: $AGENT (expected codex|claude)" >&2; exit 1 ;; \ + esac + +ENV AGENT=${AGENT} \ + DEBIAN_FRONTEND=noninteractive \ LANG=C.UTF-8 \ LC_ALL=C.UTF-8 \ TZ=America/Los_Angeles \ - HOME=/home/codex \ - PATH=/home/codex/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin + HOME=/home/${AGENT} \ + PATH=/home/${AGENT}/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin -# System deps + gh + tmux + standard CLI utilities. ttyd is fetched -# separately below — Debian Bookworm doesn't carry it. +# System deps + gh + tmux + Node + bubblewrap + standard CLI utilities. +# ttyd is fetched separately below — Debian Bookworm doesn't carry it. RUN set -eux; \ apt-get update; \ apt-get install -y --no-install-recommends \ @@ -30,11 +40,11 @@ RUN set -eux; \ build-essential python3 python3-pip \ bubblewrap \ passwd; \ - # Node.js from NodeSource (pinned major version). The previous - # node:*-bookworm-slim base shipped a phantom uid/gid 1000 user that - # collided with the codex user we add below. + # Node.js from NodeSource (pinned major version). Then upgrade + # npm to latest — NodeSource lags behind upstream by months. curl -fsSL "https://deb.nodesource.com/setup_${NODE_VERSION}.x" | bash -; \ apt-get install -y --no-install-recommends nodejs; \ + npm install -g npm@latest; \ # GitHub CLI from official apt repo. curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \ | gpg --dearmor -o /usr/share/keyrings/githubcli-archive-keyring.gpg; \ @@ -45,10 +55,7 @@ RUN set -eux; \ apt-get install -y --no-install-recommends gh; \ rm -rf /var/lib/apt/lists/* -# ttyd — fetch the upstream static binary release. Debian Bookworm -# doesn't ship a ttyd package, and building from source pulls in -# libwebsockets + cmake + a long toolchain. The upstream releases -# publish per-arch static binaries that we drop into /usr/local/bin. +# ttyd — fetch the upstream static binary release. ARG TTYD_VERSION=1.7.7 RUN set -eux; \ arch="$(dpkg --print-architecture)"; \ @@ -63,24 +70,29 @@ RUN set -eux; \ chmod +x /usr/local/bin/ttyd; \ ttyd --version -# Codex CLI (OpenAI's terminal coding agent). -RUN npm install -g @openai/codex && npm cache clean --force +# Per-agent CLI install. Both are npm packages; the global install puts +# `codex` or `claude` on PATH for the non-root user. +RUN case "$AGENT" in \ + codex) npm install -g @openai/codex ;; \ + claude) npm install -g @anthropic-ai/claude-code ;; \ + esac && npm cache clean --force -# Non-root user. Same uid/gid as bjw-s defaults so PVCs work cleanly. -RUN groupadd -g 1000 codex \ - && useradd -m -u 1000 -g 1000 -s /bin/bash codex \ - && mkdir -p /home/codex/.config /home/codex/workspace \ - && chown -R codex:codex /home/codex +# Non-root user. uid/gid 1000, name = AGENT. Matching the AGENT name to +# the user keeps PVC ownership obvious and avoids shell prompts that +# lie about which agent is running. +RUN groupadd -g 1000 ${AGENT} \ + && useradd -m -u 1000 -g 1000 -s /bin/bash ${AGENT} \ + && mkdir -p /home/${AGENT}/.config /home/${AGENT}/workspace \ + && chown -R ${AGENT}:${AGENT} /home/${AGENT} # Entrypoint + bash profile. -COPY --chmod=0755 bin/entrypoint.sh /usr/local/bin/entrypoint.sh -COPY --chown=codex:codex profile/.bashrc /home/codex/.bashrc -COPY --chown=codex:codex profile/.tmux.conf /home/codex/.tmux.conf +COPY --chmod=0755 bin/entrypoint.sh /usr/local/bin/entrypoint.sh +COPY --chown=${AGENT}:${AGENT} profile/.bashrc /home/${AGENT}/.bashrc +COPY --chown=${AGENT}:${AGENT} profile/.tmux.conf /home/${AGENT}/.tmux.conf -USER codex -WORKDIR /home/codex/workspace +USER ${AGENT} +WORKDIR /home/${AGENT}/workspace EXPOSE 7681 -# tini reaps zombies; entrypoint sets up identity then exec's ttyd. ENTRYPOINT ["/usr/bin/tini", "--", "/usr/local/bin/entrypoint.sh"] diff --git a/bin/entrypoint.sh b/bin/entrypoint.sh index 2b9d99f..fda867b 100644 --- a/bin/entrypoint.sh +++ b/bin/entrypoint.sh @@ -1,75 +1,125 @@ #!/usr/bin/env bash -# Codex CLI pod entrypoint. +# Multi-agent shell pod entrypoint. # 1. Wires gh + git identity from env (mounted by k8s Secret from 1Password). -# 2. Exposes the configured codex-cli shell over ttyd on port 7681. +# 2. Bootstraps per-agent config + auth. +# 3. Pulls nprodromou/agent-config and symlinks the shared instructions +# file into the agent's expected path (~/.codex/AGENTS.md or +# ~/.claude/CLAUDE.md), so every fresh pod starts with the canonical +# Nate-org instructions in place. +# 4. Exposes the configured agent shell over ttyd on port 7681, +# auto-resuming the last session (or starting a fresh one). set -euo pipefail -: "${GH_TOKEN:?GH_TOKEN must be set (1Password: op://Kubernetes/codex-github-pat/pat)}" -: "${GIT_USER_NAME:=Codex CoWork}" -: "${GIT_USER_EMAIL:=codex@prodromou.com}" +# AGENT is set by the Dockerfile based on the build-time AGENT arg. +: "${AGENT:?AGENT must be set by the image (build bug if missing)}" -# Codex CLI config — sourced from a k8s ConfigMap mounted at -# /etc/codex-config/. The ConfigMap (managed in the apk8s repo) is the -# source of truth; we copy its contents into ~/.codex/ on every boot, -# overwriting any in-pod edits. To add MCPs or tweak config, edit the -# ConfigMap and push — Stakater Reloader will restart this pod. -if [ -d /etc/codex-config ]; then - mkdir -p "${HOME}/.codex" - # cp -L follows symlinks (configmap mounts are symlink farms). - cp -fL /etc/codex-config/. "${HOME}/.codex/" 2>/dev/null || true - chmod -R u+w "${HOME}/.codex" 2>/dev/null || true -fi +# Common env-var contract — required for every agent. +: "${GH_TOKEN:?GH_TOKEN must be set (1Password: op://Kubernetes/${AGENT}-github-pat/pat)}" +: "${GIT_USER_NAME:=${AGENT^} CoWork}" +: "${GIT_USER_EMAIL:=${AGENT}@prodromou.com}" -# Codex CLI session — seeded ONCE from the CODEX_SESSION env var (sourced -# from op://Kubernetes/codex-session/session) on first boot. After that, -# ~/.codex/auth.json lives on the Longhorn PVC and survives restarts; the -# in-shell `/login` flow is the supported path for re-auth when the -# session expires. To force a re-seed from 1Password, delete -# ~/.codex/auth.json from inside the pod and restart it. WOVED-38. -if [ ! -f "${HOME}/.codex/auth.json" ] && [ -n "${CODEX_SESSION:-}" ]; then - mkdir -p "${HOME}/.codex" - printf '%s' "${CODEX_SESSION}" > "${HOME}/.codex/auth.json" - chmod 600 "${HOME}/.codex/auth.json" -fi - -# git identity — applies to every commit made inside the pod. +# git identity + gh credential helper — set up early so the +# agent-config clone below can use it for private-repo HTTPS auth. git config --global user.name "${GIT_USER_NAME}" git config --global user.email "${GIT_USER_EMAIL}" git config --global init.defaultBranch main git config --global pull.rebase false - -# Prefer HTTPS over SSH so the gh-managed token is used. git config --global url."https://github.com/".insteadOf "git@github.com:" - -# Make `gh` the credential helper for HTTPS clones (uses GH_TOKEN automatically). gh auth setup-git # Plane Gateway env (consumed by tools/scripts the user runs in-shell). -# PLANE_TOKEN is the codex-prodromou Plane API key. -# PLANE_GATEWAY_URL points at the n8n Plane Gateway v2.1 webhook. export PLANE_GATEWAY_URL="${PLANE_GATEWAY_URL:-https://n8n.prodromou.com/webhook/plane-gateway-v21}" +# Per-agent config + auth bootstrap. Each agent variant declares: +# AGENT_CONFIG_DIR — where the agent CLI looks for config (~/.codex, ~/.claude) +# AGENT_CONFIG_SOURCE — ConfigMap mount path for managed config +# AGENT_LAUNCH_CMD — what ttyd runs on connect (auto-resume + bash fallback) +# INSTRUCTIONS_LINK — agent-specific path where the shared CLAUDE.md +# from agent-config gets symlinked +case "$AGENT" in +codex) + AGENT_CONFIG_DIR="${HOME}/.codex" + AGENT_CONFIG_SOURCE="/etc/codex-config" + AGENT_AUTH_FILE="${AGENT_CONFIG_DIR}/auth.json" + # Codex looks for AGENTS.md as the global instructions file. + INSTRUCTIONS_LINK="${AGENT_CONFIG_DIR}/AGENTS.md" + # Resume the last session; fall back to a fresh codex if there is + # none, then drop to bash if codex exits. + AGENT_LAUNCH_CMD='codex resume --last 2>/dev/null || codex; exec bash -l' + + mkdir -p "${AGENT_CONFIG_DIR}" + + # Optional: seed auth.json from CODEX_SESSION on first boot. + if [ ! -f "${AGENT_AUTH_FILE}" ] && [ -n "${CODEX_SESSION:-}" ]; then + printf '%s' "${CODEX_SESSION}" > "${AGENT_AUTH_FILE}" + chmod 600 "${AGENT_AUTH_FILE}" + fi + ;; +claude) + AGENT_CONFIG_DIR="${HOME}/.claude" + AGENT_CONFIG_SOURCE="/etc/claude-config" + INSTRUCTIONS_LINK="${AGENT_CONFIG_DIR}/CLAUDE.md" + # Continue the most recent session; fall back to fresh claude if + # none exists, then bash if claude exits. + AGENT_LAUNCH_CMD='claude --continue 2>/dev/null || claude; exec bash -l' + + mkdir -p "${AGENT_CONFIG_DIR}" + # Claude Code uses interactive `/login` on first connect; credentials + # persist on the PVC at ~/.claude/. No env-var session seed. + ;; +esac + +# Sync managed config from a ConfigMap mounted at /etc/-config/. +# The ConfigMap (apk8s repo) is the source of truth for model/MCP config; +# in-pod edits get blown away on restart. Stakater Reloader restarts the +# pod when the ConfigMap changes. +if [ -d "${AGENT_CONFIG_SOURCE}" ]; then + # cp -L follows symlinks (configmap mounts are symlink farms). + cp -fL "${AGENT_CONFIG_SOURCE}/." "${AGENT_CONFIG_DIR}/" 2>/dev/null || true + chmod -R u+w "${AGENT_CONFIG_DIR}" 2>/dev/null || true +fi + +# Pull nprodromou/agent-config for the canonical Nate-org instructions +# file (CLAUDE.md). Symlinked into the agent's expected location so +# updates to agent-config reach the pod on next restart without an +# image rebuild. +AGENT_CONFIG_REPO_DIR="${HOME}/.agent-config" +if [ ! -d "${AGENT_CONFIG_REPO_DIR}/.git" ]; then + git clone --depth=1 https://github.com/nprodromou/agent-config "${AGENT_CONFIG_REPO_DIR}" \ + || echo "warning: could not clone agent-config (continuing without shared instructions)" >&2 +else + git -C "${AGENT_CONFIG_REPO_DIR}" fetch --depth=1 origin main 2>/dev/null || true + git -C "${AGENT_CONFIG_REPO_DIR}" reset --hard origin/main 2>/dev/null || true +fi + +if [ -f "${AGENT_CONFIG_REPO_DIR}/instructions/CLAUDE.md" ]; then + ln -sf "${AGENT_CONFIG_REPO_DIR}/instructions/CLAUDE.md" "${INSTRUCTIONS_LINK}" +fi + # Identity banner — surfaced by the bash prompt on login. -cat > "${HOME}/.codex-identity" < -Plane gw : ${PLANE_GATEWAY_URL} -Codex CLI : $(codex --version 2>/dev/null || echo unknown) +AGENT_CONFIG_SHA="$(git -C "${AGENT_CONFIG_REPO_DIR}" rev-parse --short HEAD 2>/dev/null || echo missing)" +cat > "${HOME}/.${AGENT}-identity" < +Plane gw : ${PLANE_GATEWAY_URL} +${AGENT^} CLI : $(${AGENT} --version 2>/dev/null || echo unknown) +agent-config : ${AGENT_CONFIG_SHA} EOF # ttyd flags: -# -W : writable (input enabled) -# -p 7681 : listen port -# -t titleFixed: avoids leaking shell pid/host into the title -# -T xterm-256color : sane terminal +# --writable : input enabled +# --port 7681 : listen port +# titleFixed : avoids leaking shell pid/host into the title +# --terminal-type : sane terminal # -# The shell command auto-launches codex on connect so the user lands -# straight in the agent UI. If codex exits or crashes, we drop to an -# interactive bash login so the pod isn't bricked. +# AGENT_LAUNCH_CMD auto-resumes the agent's last session. If no session +# exists, falls back to a fresh agent run. If the agent exits or +# crashes, drops to an interactive bash login so the pod isn't bricked. exec ttyd \ --writable \ --port 7681 \ --terminal-type xterm-256color \ - --client-option titleFixed='codex-cli' \ + --client-option titleFixed="${AGENT}-cli" \ --client-option fontSize=14 \ - bash -lc 'codex; exec bash -l' + bash -lc "${AGENT_LAUNCH_CMD}" From dfa92d18484f04e5296e915b76b99c31671d516a Mon Sep 17 00:00:00 2001 From: nprodromou <73134621+nprodromou@users.noreply.github.com> Date: Wed, 6 May 2026 22:40:49 -0700 Subject: [PATCH 2/3] README: document multi-agent build, runtime contract, connect flow --- README.md | 135 ++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 86 insertions(+), 49 deletions(-) diff --git a/README.md b/README.md index c820c1e..e2d6535 100644 --- a/README.md +++ b/README.md @@ -1,57 +1,82 @@ # codex-shell -Container image for running [OpenAI codex-cli](https://github.com/openai/codex) -as a single, browser-accessible shell on Kubernetes — codex CLI exposed over -ttyd, identity locked to a dedicated GitHub user, persistent home volume. +Multi-agent browser shell image. One Dockerfile, two image variants — +`codex` (OpenAI's [codex-cli](https://github.com/openai/codex)) and +`claude` (Anthropic's [Claude Code](https://github.com/anthropics/claude-code)) +— each exposed over ttyd as a single, browser-accessible terminal on +Kubernetes. Identity is locked to a dedicated GitHub user per agent; +home directory persists on a PVC; canonical Nate-org instructions +(from [`nprodromou/agent-config`](https://github.com/nprodromou/agent-config)) +are pulled at boot and symlinked into the agent's expected path. + +The repo name is a historical artifact — it started as codex-only and +gained the claude variant later. Image content is cluster-agnostic; the +canonical deploy lives in [`nprodromou/apk8s`](https://github.com/nprodromou/apk8s) +under `kubernetes/apps/agents/{codex,claude}-cli`. + +`code-server` (VS Code in the browser) is intentionally **not** in this +image — that's a separate concern tracked by WOVED-35. + +## Images + +| Variant | Tag | Agent CLI | +| ------- | -------------------------------------------------- | ------------------------ | +| codex | `ghcr.io/nprodromou/codex-shell:codex-latest` | `@openai/codex` | +| claude | `ghcr.io/nprodromou/codex-shell:claude-latest` | `@anthropic-ai/claude-code` | + +Both are built from the same `Dockerfile` via the `AGENT` build arg +(`codex` or `claude`). The build matrix in `.github/workflows/build.yml` +publishes both variants on every push to `main`. Per-commit tags follow +the pattern `sha-XXXXX-{codex,claude}` for pinning. -Cluster-agnostic: the image runs anywhere Kubernetes can pull from GHCR. The -canonical deploy lives in `nprodromou/apk8s`, but nothing in the image is -specific to that cluster. +## Runtime contract -## What it is +The entrypoint requires the following environment variables. They are +mounted into the pod by an `ExternalSecret` that pulls from the deploy's +1Password vault (typically `Kubernetes`) per the canonical +[Agent Secret Naming Convention](https://prodromou.atlassian.net/wiki/spaces/Operations/pages/63438850). -A long-running pod that exposes a `bash` shell with `codex` (and `gh`, `git`, -`tmux`, etc.) on `PATH` over [ttyd](https://github.com/tsl0922/ttyd). Hit it -from a browser and you get a terminal. Identity is locked to a dedicated -GitHub user (`codex-prodromou` in the canonical deploy) so commits, PRs, and -Plane tickets attribute deterministically — no more `gh auth` collisions with -whichever identity a developer machine logged in last. +### Common (both agents) -This image is the runtime; the cluster manifests for the canonical deploy -live in [`nprodromou/apk8s` → `kubernetes/apps/agents/codex-cli`](https://github.com/nprodromou/apk8s). +| Env var | 1Password reference | Purpose | +| ---------------- | ----------------------------------------------------- | -------------------------------------------------- | +| `GH_TOKEN` | `op://Kubernetes/${agent}-github-pat/pat` | GitHub PAT (`${agent}-prodromou`); used by `gh` | +| `GIT_USER_NAME` | `op://Kubernetes/${agent}-github-pat/git_user_name` | Defaults to `${Agent} CoWork` | +| `GIT_USER_EMAIL` | `op://Kubernetes/${agent}-github-pat/git_user_email` | Defaults to `${agent}@prodromou.com` | +| `PLANE_TOKEN` | `op://Kubernetes/${agent}-plane-token/token` | Plane API key for the agent's workspace user | -`code-server` (VS Code in the browser) is intentionally **not** in this image -— see WOVED-35 for that. +### Codex-specific -## Image +| Env var | 1Password reference | Purpose | +| ---------------- | ----------------------------------------- | -------------------------------------------------------------------------------- | +| `CODEX_SESSION` | `op://Kubernetes/codex-session/session` | OpenAI Codex CLI auth blob. Optional. Seeds `~/.codex/auth.json` on first boot. | -``` -ghcr.io/nprodromou/codex-shell:latest -``` +### Claude-specific -Built by `.github/workflows/build.yml` on push to `main` or version tag. - -## Runtime contract +Claude Code uses interactive `/login` on first connect — no env-var +session seed. Credentials persist on the PVC at `~/.claude/`. -The entrypoint requires the following environment variables. They are mounted -into the pod by an `ExternalSecret` that pulls from the deploy's 1Password -vault (typically `Kubernetes`) per the canonical [Agent Secret Naming -Convention](https://prodromou.atlassian.net/wiki/spaces/Operations/pages/63438850). - -| Env var | 1Password reference | Purpose | -| ---------------- | ------------------------------------------------ | -------------------------------------------------- | -| `GH_TOKEN` | `op://Kubernetes/codex-github-pat/pat` | GitHub PAT (`codex-prodromou`); used by `gh` | -| `CODEX_SESSION` | `op://Kubernetes/codex-session/session` | OpenAI Codex CLI auth blob | -| `PLANE_TOKEN` | `op://Kubernetes/codex-plane-token/token` | Plane API key for `codex-prodromou` workspace user | -| `GIT_USER_NAME` | `op://Kubernetes/codex-github-pat/git_user_name` | Defaults to `Codex CoWork` | -| `GIT_USER_EMAIL` | `op://Kubernetes/codex-github-pat/git_user_email` | Defaults to `codex@prodromou.com` | - -Optional: +### Optional (both agents) | Env var | Default | | ------------------- | ------------------------------------------------------- | | `PLANE_GATEWAY_URL` | `https://n8n.prodromou.com/webhook/plane-gateway-v21` | +## How a connect works + +1. ttyd accepts the browser connection and runs the configured shell command. +2. Entrypoint has already wired `gh`, `git`, the agent's auth state, and + pulled the latest `nprodromou/agent-config` into `~/.agent-config`, + symlinking `instructions/CLAUDE.md` into: + - **codex:** `~/.codex/AGENTS.md` + - **claude:** `~/.claude/CLAUDE.md` +3. The shell command attempts to resume the most recent session: + - **codex:** `codex resume --last` + - **claude:** `claude --continue` +4. If no prior session exists, falls through to a fresh agent run. +5. If the agent exits or crashes, drops to an interactive bash login so + the pod isn't bricked. + ## Ports | Port | Purpose | @@ -60,31 +85,43 @@ Optional: ## Persistence -The pod's `/home/codex` is backed by a Longhorn `ReadWriteOnce` PVC declared in -the apk8s manifests. That gives you durable shell history, codex-cli session -state, and any cloned repos under `~/workspace`. +The pod's `/home/${AGENT}` is backed by a Longhorn `ReadWriteOnce` PVC +declared in the apk8s manifests. That gives you durable shell history, +agent session state, persisted auth tokens, and any cloned repos under +`~/workspace`. ## Developing locally ```sh -# Build -docker build -t codex-shell:dev . +# Build the codex variant. +docker build -t codex-shell:codex --build-arg AGENT=codex . + +# Or the claude variant. +docker build -t codex-shell:claude --build-arg AGENT=claude . # Run with the env vars the entrypoint expects. docker run --rm -it -p 7681:7681 \ -e GH_TOKEN="$(gh auth token)" \ -e GIT_USER_NAME="Local Test" \ -e GIT_USER_EMAIL="$(git config user.email)" \ - codex-shell:dev + codex-shell:codex ``` Then open . ## Notes -- The image runs as non-root `codex` (uid 1000). +- Base is `debian:bookworm-slim`; Node 22 from NodeSource; `npm@latest` + installed on top because NodeSource lags upstream. +- The image runs as non-root with the user named after the agent + (`codex` or `claude`), uid/gid 1000. Matches the Longhorn PVC owner so + volume mounts work cleanly. - `tini` is PID 1 so zombie reaping is handled. -- `tmux` is preinstalled — start a session with `tmux` and your shell survives - closing the browser tab; `tmux attach` to reconnect. -- `gh` uses `GH_TOKEN` automatically; no interactive `gh auth login` needed. -- HTTPS clones via `gh` are seamless because `gh auth setup-git` runs at boot. +- `tmux`, `bubblewrap` (codex sandbox prereq), `ripgrep`-equivalents, + `jq`, `vim`, etc. are preinstalled. +- `gh` uses `GH_TOKEN` automatically; no interactive `gh auth login` + needed. HTTPS clones via `gh` are seamless because `gh auth setup-git` + runs at boot. +- Updates to `nprodromou/agent-config` reach the pod on next restart + (entrypoint pulls and resets to `origin/main`); no image rebuild + needed for instruction changes. From 0a55ebdfb9fd6091c12b8baa8716aef4547f7399 Mon Sep 17 00:00:00 2001 From: nprodromou <73134621+nprodromou@users.noreply.github.com> Date: Wed, 6 May 2026 22:42:45 -0700 Subject: [PATCH 3/3] =?UTF-8?q?Dockerfile:=20revert=20npm=20self-upgrade?= =?UTF-8?q?=20=E2=80=94=20triggers=20module-resolution=20bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 71d0bfa..f91d298 100644 --- a/Dockerfile +++ b/Dockerfile @@ -40,11 +40,13 @@ RUN set -eux; \ build-essential python3 python3-pip \ bubblewrap \ passwd; \ - # Node.js from NodeSource (pinned major version). Then upgrade - # npm to latest — NodeSource lags behind upstream by months. + # Node.js from NodeSource (pinned major version). NodeSource ships + # npm slightly behind upstream; we keep what they bundle since + # `npm install -g npm@latest` triggers a self-upgrade module-resolution + # bug at build time, and the bundled version works fine for installing + # the agent CLIs. curl -fsSL "https://deb.nodesource.com/setup_${NODE_VERSION}.x" | bash -; \ apt-get install -y --no-install-recommends nodejs; \ - npm install -g npm@latest; \ # GitHub CLI from official apt repo. curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \ | gpg --dearmor -o /usr/share/keyrings/githubcli-archive-keyring.gpg; \