-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathDockerfile
More file actions
341 lines (322 loc) · 17.4 KB
/
Copy pathDockerfile
File metadata and controls
341 lines (322 loc) · 17.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
# Multi-agent browser shell image — single-instance ttyd-fronted
# terminal that auto-launches an LLM coding agent. Built once per agent
# via the AGENT build arg (codex|claude). Each agent variant gets its
# own image tag (codex-latest, claude-latest) and is deployed as a
# separate pod in apk8s under kubernetes/apps/agents/<agent>-cli.
#
# Identity is provided at runtime via env vars sourced from a k8s Secret
# backed by 1Password (deploy vault, typically `Kubernetes`). gh + git
# are configured by the entrypoint so commits/PRs from inside the pod
# attribute to <agent>-prodromou.
#
# code-server (VS Code in browser) is intentionally NOT installed here —
# that is a separate concern tracked by WOVED-35.
FROM debian:bookworm-slim
ARG NODE_VERSION=22
ARG AGENT=codex
# Validate AGENT early so an unsupported value fails the build cleanly.
RUN case "$AGENT" in codex|claude) ;; \
*) echo "Unsupported AGENT: $AGENT (expected codex|claude)" >&2; exit 1 ;; \
esac
ENV AGENT=${AGENT} \
DEBIAN_FRONTEND=noninteractive \
LANG=C.UTF-8 \
LC_ALL=C.UTF-8 \
TZ=America/Los_Angeles \
HOME=/home/${AGENT} \
PATH=/home/${AGENT}/.local/bin:/home/${AGENT}/.agent-config/scripts:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
# System deps + gh + tmux + Node + bubblewrap + standard CLI utilities.
# ttyd is fetched separately below — Debian Bookworm doesn't carry it.
RUN set -eux; \
apt-get update; \
apt-get install -y --no-install-recommends \
ca-certificates curl git gnupg less vim sudo tini \
bash-completion locales tmux unzip zip openssh-client \
build-essential python3 python3-pip python3-venv \
ripgrep fd-find dnsutils iputils-ping \
bubblewrap \
passwd; \
# Node.js from NodeSource (pinned major version). NodeSource ships
# npm slightly behind upstream; we keep what they bundle since
# `npm install -g npm@latest` triggers a self-upgrade module-resolution
# bug at build time, and the bundled version works fine for installing
# the agent CLIs.
curl -fsSL "https://deb.nodesource.com/setup_${NODE_VERSION}.x" | bash -; \
apt-get install -y --no-install-recommends nodejs; \
rm -rf /var/lib/apt/lists/*
# ttyd — fetch the upstream static binary release.
ARG TTYD_VERSION=1.7.7
RUN set -eux; \
arch="$(dpkg --print-architecture)"; \
case "$arch" in \
amd64) ttyd_arch="x86_64" ;; \
arm64) ttyd_arch="aarch64" ;; \
*) echo "unsupported arch: $arch" >&2; exit 1 ;; \
esac; \
curl -fsSL \
"https://github.com/tsl0922/ttyd/releases/download/${TTYD_VERSION}/ttyd.${ttyd_arch}" \
-o /usr/local/bin/ttyd; \
chmod +x /usr/local/bin/ttyd; \
ttyd --version
# Debian renames fd → fdfind; expose canonical name codex/claude expect.
RUN ln -sf /usr/bin/fdfind /usr/local/bin/fd
# ----------------------------------------------------------------------
# Toolchain pinned to apk8s/.mise.toml — the canonical version source.
# When apk8s bumps a tool there, bump the matching ARG here in lockstep.
# Direct-install rather than mise-runtime so versions are reproducible
# from the image itself (no per-pod tool downloads, no PVC bloat, no
# trust prompts).
# ----------------------------------------------------------------------
# Python ecosystem (uv-managed; system python3 stays as Debian default
# for tools that hard-code /usr/bin/python3).
ARG UV_VERSION=0.10.7
ARG PYTHON_VERSION=3.14.3
ARG PIPX_VERSION=1.8.0
ARG MAKEJINJA_VERSION=2.8.2
RUN set -eux; \
arch="$(uname -m)"; \
curl -fsSL "https://github.com/astral-sh/uv/releases/download/${UV_VERSION}/uv-${arch}-unknown-linux-gnu.tar.gz" \
| tar -xz -C /tmp; \
mv "/tmp/uv-${arch}-unknown-linux-gnu/uv" /usr/local/bin/uv; \
mv "/tmp/uv-${arch}-unknown-linux-gnu/uvx" /usr/local/bin/uvx; \
rm -rf "/tmp/uv-${arch}-unknown-linux-gnu"; \
uv --version; \
uvx --version; \
# Pinned Python via uv (system-wide install path); symlink the
# binary somewhere predictable so pipx / others can `--python` it.
UV_PYTHON_INSTALL_DIR=/opt/python uv python install "${PYTHON_VERSION}"; \
PYTHON_BIN="$(find /opt/python -path '*/bin/python3.14' -type f -executable | head -1)"; \
test -x "${PYTHON_BIN}" || (echo "did not find python3.14 binary under /opt/python" >&2; exit 1); \
ln -sf "${PYTHON_BIN}" /usr/local/bin/python3.14; \
# pipx (kept on PATH for any user / script that wants it; uses system
# python3 by default — pass --python /usr/local/bin/python3.14 for
# tools that require >=3.12).
pip3 install --no-cache-dir --break-system-packages "pipx==${PIPX_VERSION}"; \
pipx --version; \
# makejinja via uv (pipx's default 3.11 doesn't satisfy
# makejinja>=3.12 requirement; uv tool install + --python pins it
# cleanly without needing pipx flag dance).
UV_TOOL_DIR=/opt/uv-tools UV_TOOL_BIN_DIR=/usr/local/bin \
uv tool install --python "${PYTHON_VERSION}" "makejinja==${MAKEJINJA_VERSION}"; \
makejinja --version
# Infra CLIs — apk8s/.mise.toml versions. Single-binary github releases
# unless noted. Grouped into one RUN to keep layers tight; each tool
# version-prints at the end so build logs catch a bad URL fast.
ARG KUBECTL_VERSION=v1.35.2
ARG HELM_VERSION=v4.1.1
ARG FLUX_VERSION=2.8.1
ARG SOPS_VERSION=v3.12.1
ARG AGE_VERSION=v1.3.1
ARG CUE_VERSION=v0.15.4
ARG TASK_VERSION=v3.48.0
ARG KUSTOMIZE_VERSION=v5.7.1
ARG YQ_VERSION=v4.52.4
ARG JQ_VERSION=jq-1.8.1
ARG TALOSCTL_VERSION=v1.12.4
ARG KUBECONFORM_VERSION=v0.7.0
ARG HELMFILE_VERSION=v1.3.2
ARG TALHELPER_VERSION=v3.1.5
ARG CILIUM_CLI_VERSION=v0.19.2
ARG GH_VERSION=v2.87.3
ARG CLOUDFLARED_VERSION=2026.2.0
ARG OP_VERSION=2.31.1
RUN set -eux; \
arch="$(dpkg --print-architecture)"; \
# ----- raw single-binary downloads -----
# kubectl
curl -fsSL "https://dl.k8s.io/release/${KUBECTL_VERSION}/bin/linux/${arch}/kubectl" -o /usr/local/bin/kubectl; \
chmod +x /usr/local/bin/kubectl; kubectl version --client=true --output=yaml | head -2; \
# cloudflared
curl -fsSL "https://github.com/cloudflare/cloudflared/releases/download/${CLOUDFLARED_VERSION}/cloudflared-linux-${arch}" -o /usr/local/bin/cloudflared; \
chmod +x /usr/local/bin/cloudflared; cloudflared --version | head -1; \
# sops
curl -fsSL "https://github.com/getsops/sops/releases/download/${SOPS_VERSION}/sops-${SOPS_VERSION}.linux.${arch}" -o /usr/local/bin/sops; \
chmod +x /usr/local/bin/sops; sops --version | head -1; \
# jq (pinned, replaces apt-installed jq if any).
curl -fsSL "https://github.com/jqlang/jq/releases/download/${JQ_VERSION}/jq-linux-${arch}" -o /usr/local/bin/jq; \
chmod +x /usr/local/bin/jq; jq --version; \
# yq
curl -fsSL "https://github.com/mikefarah/yq/releases/download/${YQ_VERSION}/yq_linux_${arch}" -o /usr/local/bin/yq; \
chmod +x /usr/local/bin/yq; yq --version; \
# talosctl
curl -fsSL "https://github.com/siderolabs/talos/releases/download/${TALOSCTL_VERSION}/talosctl-linux-${arch}" -o /usr/local/bin/talosctl; \
chmod +x /usr/local/bin/talosctl; talosctl version --client | head -2; \
# ----- tar.gz extractions -----
# helm (4.x layout: linux-${arch}/helm)
curl -fsSL "https://get.helm.sh/helm-${HELM_VERSION}-linux-${arch}.tar.gz" | tar -xz -C /tmp; \
mv "/tmp/linux-${arch}/helm" /usr/local/bin/helm; rm -rf "/tmp/linux-${arch}"; \
helm version --short; \
# flux
curl -fsSL "https://github.com/fluxcd/flux2/releases/download/v${FLUX_VERSION}/flux_${FLUX_VERSION}_linux_${arch}.tar.gz" | tar -xz -C /tmp; \
mv /tmp/flux /usr/local/bin/flux; flux --version; \
# age + age-keygen
curl -fsSL "https://github.com/FiloSottile/age/releases/download/${AGE_VERSION}/age-${AGE_VERSION}-linux-${arch}.tar.gz" | tar -xz -C /tmp; \
mv /tmp/age/age /tmp/age/age-keygen /usr/local/bin/; rm -rf /tmp/age; \
age --version; age-keygen --version 2>&1 | head -1 || true; \
# cue
curl -fsSL "https://github.com/cue-lang/cue/releases/download/${CUE_VERSION}/cue_${CUE_VERSION}_linux_${arch}.tar.gz" | (mkdir -p /tmp/cue.d && tar -xz -C /tmp/cue.d); \
mv /tmp/cue.d/cue /usr/local/bin/cue; rm -rf /tmp/cue.d; \
cue version | head -2; \
# task
curl -fsSL "https://github.com/go-task/task/releases/download/${TASK_VERSION}/task_linux_${arch}.tar.gz" | (mkdir -p /tmp/task.d && tar -xz -C /tmp/task.d); \
mv /tmp/task.d/task /usr/local/bin/task; rm -rf /tmp/task.d; \
task --version; \
# kustomize
curl -fsSL "https://github.com/kubernetes-sigs/kustomize/releases/download/kustomize%2F${KUSTOMIZE_VERSION}/kustomize_${KUSTOMIZE_VERSION}_linux_${arch}.tar.gz" | tar -xz -C /tmp; \
mv /tmp/kustomize /usr/local/bin/kustomize; \
kustomize version; \
# kubeconform
curl -fsSL "https://github.com/yannh/kubeconform/releases/download/${KUBECONFORM_VERSION}/kubeconform-linux-${arch}.tar.gz" | (mkdir -p /tmp/kc.d && tar -xz -C /tmp/kc.d); \
mv /tmp/kc.d/kubeconform /usr/local/bin/kubeconform; rm -rf /tmp/kc.d; \
kubeconform -v; \
# helmfile (releases tag is vX.Y.Z, asset name is helmfile_X.Y.Z_linux_amd64.tar.gz)
HELMFILE_VER="${HELMFILE_VERSION#v}"; \
curl -fsSL "https://github.com/helmfile/helmfile/releases/download/${HELMFILE_VERSION}/helmfile_${HELMFILE_VER}_linux_${arch}.tar.gz" | (mkdir -p /tmp/hf.d && tar -xz -C /tmp/hf.d); \
mv /tmp/hf.d/helmfile /usr/local/bin/helmfile; rm -rf /tmp/hf.d; \
helmfile --version; \
# talhelper
curl -fsSL "https://github.com/budimanjojo/talhelper/releases/download/${TALHELPER_VERSION}/talhelper_linux_${arch}.tar.gz" | (mkdir -p /tmp/th.d && tar -xz -C /tmp/th.d); \
mv /tmp/th.d/talhelper /usr/local/bin/talhelper; rm -rf /tmp/th.d; \
talhelper --version; \
# cilium-cli
case "${arch}" in amd64) cilium_arch=amd64 ;; arm64) cilium_arch=arm64 ;; esac; \
curl -fsSL "https://github.com/cilium/cilium-cli/releases/download/${CILIUM_CLI_VERSION}/cilium-linux-${cilium_arch}.tar.gz" | tar -xz -C /tmp; \
mv /tmp/cilium /usr/local/bin/cilium; \
cilium version --client; \
# gh CLI (pinned; replaces the apt cli/cli source we used earlier).
GH_VER="${GH_VERSION#v}"; \
curl -fsSL "https://github.com/cli/cli/releases/download/${GH_VERSION}/gh_${GH_VER}_linux_${arch}.tar.gz" | tar -xz -C /tmp; \
mv "/tmp/gh_${GH_VER}_linux_${arch}/bin/gh" /usr/local/bin/gh; \
rm -rf "/tmp/gh_${GH_VER}_linux_${arch}"; \
gh --version | head -1; \
# ----- 1Password CLI (zip) -----
curl -fsSL "https://cache.agilebits.com/dist/1P/op2/pkg/v${OP_VERSION}/op_linux_${arch}_v${OP_VERSION}.zip" -o /tmp/op.zip; \
unzip -d /tmp/op /tmp/op.zip; mv /tmp/op/op /usr/local/bin/op; rm -rf /tmp/op /tmp/op.zip; \
op --version
# ----------------------------------------------------------------------
# Cloud CLIs — AWS CLI v2 + Azure CLI (WOVED-50).
#
# Both are needed by Phase 1 worker pods that execute infra runbooks
# (sandbox deploys, helm installs, ECR pushes, etc.) per WOVED-67's
# non-interactive credential design. Pinning to specific versions for
# image reproducibility — bump in lockstep with apk8s/.mise.toml when
# either CLI advances.
#
# AWS CLI v2 ships official prebuilt binaries; Azure CLI rides on top
# of Microsoft's Debian apt repo, which keeps Python deps + extension
# isolation handled upstream rather than us pinning a specific
# python/azure-cli compatibility matrix.
# ----------------------------------------------------------------------
ARG AWS_CLI_VERSION=2.18.0
ARG AZ_CLI_VERSION=2.66.0
RUN set -eux; \
arch="$(dpkg --print-architecture)"; \
# ----- AWS CLI v2 (official zip) -----
case "$arch" in \
amd64) aws_arch="x86_64" ;; \
arm64) aws_arch="aarch64" ;; \
*) echo "unsupported arch for aws-cli: $arch" >&2; exit 1 ;; \
esac; \
curl -fsSL "https://awscli.amazonaws.com/awscli-exe-linux-${aws_arch}-${AWS_CLI_VERSION}.zip" \
-o /tmp/awscliv2.zip; \
unzip -q /tmp/awscliv2.zip -d /tmp; \
/tmp/aws/install -i /usr/local/aws-cli -b /usr/local/bin; \
rm -rf /tmp/aws /tmp/awscliv2.zip; \
aws --version; \
# ----- Azure CLI (Microsoft apt repo) -----
# Microsoft signing key + bookworm repo. Pinned to the apt-versioned
# tag (X.Y.Z-1~bookworm) so layer caching + image reproducibility
# both work.
install -m 0755 -d /etc/apt/keyrings; \
curl -fsSL https://packages.microsoft.com/keys/microsoft.asc \
| gpg --dearmor -o /etc/apt/keyrings/microsoft.gpg; \
chmod a+r /etc/apt/keyrings/microsoft.gpg; \
echo "deb [arch=${arch} signed-by=/etc/apt/keyrings/microsoft.gpg] https://packages.microsoft.com/repos/azure-cli/ bookworm main" \
> /etc/apt/sources.list.d/azure-cli.list; \
apt-get update; \
apt-get install -y --no-install-recommends "azure-cli=${AZ_CLI_VERSION}-1~bookworm"; \
rm -rf /var/lib/apt/lists/*; \
az --version | head -1
# Per-agent CLI install. Both are npm packages; the global install puts
# `codex` or `claude` on PATH for the non-root user. Pinned so Renovate
# can auto-PR patch bumps (see renovate.json) — without a pin every
# build pulls latest, which is non-reproducible and skips the patch
# auto-merge gate.
# renovate: datasource=npm depName=@anthropic-ai/claude-code
ARG CLAUDE_CODE_VERSION=2.1.139
# renovate: datasource=npm depName=@openai/codex
ARG OPENAI_CODEX_VERSION=0.130.0
# Disable Claude Code's runtime auto-updater. Version is pinned via the
# ARG above and bumped by Renovate PRs, not at runtime — that matches
# the rest of the codex-shell pinning model. The auto-updater also can't
# succeed inside the container anyway (npm rename within /usr/bin/
# requires root, which the agent user is not), so leaving it enabled
# just produces a noisy banner at TUI startup.
ENV DISABLE_AUTOUPDATER=true
# Install as root (writes to /usr/lib/node_modules), then chown the
# scope directory + entrypoint symlink to uid/gid 10001 so the agent
# user can run `npm install -g` for auto-updates without EACCES on the
# rename within the @scope/ parent. The agent user is created later
# (L282-283) but uid/gid 10001 are pinned constants, so numeric chown
# here is safe and order-independent.
RUN case "$AGENT" in \
codex) npm install -g "@openai/codex@${OPENAI_CODEX_VERSION}" \
&& chown -R 10001:10001 /usr/lib/node_modules/@openai \
&& chown -h 10001:10001 /usr/bin/codex ;; \
claude) npm install -g "@anthropic-ai/claude-code@${CLAUDE_CODE_VERSION}" \
&& chown -R 10001:10001 /usr/lib/node_modules/@anthropic-ai \
&& chown -h 10001:10001 /usr/bin/claude ;; \
esac && npm cache clean --force
# Non-root user. uid/gid 10001, name = AGENT.
#
# DO NOT bump uid/gid casually (WOVED-147). The slot OAuth init flow
# writes auth state to a PersistentVolumeClaim mounted at /home/${AGENT}.
# That PVC outlives any single pod — when the chart's pinned image tag
# rolls forward (Renovate, WOVED-148), kubernetes recreates the pod
# with the new image. If the new image's user has a different uid/gid,
# the new pod silently can't read its own credentials.json (file
# ownership is by uid, not username) and the operator sees the OAuth
# dance prompt re-fire on every task.
#
# 10001 picked to align with the woved worker images (which already
# baked it in) and to avoid colliding with the typical uid=1000
# first-user on host machines if an operator ever bind-mounts a path.
# Username stays AGENT for kubectl-exec UX (`whoami` reports the
# agent identity), but the load-bearing invariant is the uid/gid pin.
RUN groupadd -g 10001 ${AGENT} \
&& useradd -m -u 10001 -g 10001 -s /bin/bash ${AGENT} \
&& mkdir -p /home/${AGENT}/.config /home/${AGENT}/workspace \
&& chown -R ${AGENT}:${AGENT} /home/${AGENT}
# Per-agent default config baseline. Copied into /etc/<agent>-defaults/
# at build time; the entrypoint applies these BEFORE the ConfigMap
# overlay at /etc/<agent>-config/, so a deployment without a ConfigMap
# still gets sensible runtime config and the ConfigMap only needs to
# carry the deltas.
RUN mkdir -p "/etc/${AGENT}-defaults"
COPY defaults/ /etc/defaults-staging/
RUN if [ -f "/etc/defaults-staging/${AGENT}-config.toml" ]; then \
cp "/etc/defaults-staging/${AGENT}-config.toml" "/etc/${AGENT}-defaults/config.toml"; \
fi && rm -rf /etc/defaults-staging
# Entrypoint + bash profile.
COPY --chmod=0755 bin/entrypoint.sh /usr/local/bin/entrypoint.sh
# Worker mode (WOVED-126): headless task execution path. The entrypoint
# dispatches to this when AGENT_MODE=worker; ttyd is bypassed entirely.
COPY --chmod=0755 bin/worker.py /usr/local/bin/worker.py
# Slot OAuth init mode (WOVED-126): operator-driven device-code dance.
# The entrypoint dispatches to this when AGENT_MODE=auth-init; one-shot
# init pod that exits once the slot's PVC carries valid auth state.
# Per-slot bearer token (WOVED-128) authenticates callbacks to the
# Manager — see bin/auth_init.py.
COPY --chmod=0755 bin/auth_init.py /usr/local/bin/auth_init.py
# Smoke-test mode (WOVED-147): startup probe that verifies the CLI binary
# works + credentials are present and parseable. Entrypoint dispatches to
# this when AGENT_MODE=smoke-test; structured exit codes (64/65/66) tell
# the Manager which recovery path to take. No network calls — safe on
# every kubernetes startupProbe tick.
COPY --chmod=0755 bin/smoke_test.py /usr/local/bin/smoke_test.py
COPY --chown=${AGENT}:${AGENT} profile/.bashrc /home/${AGENT}/.bashrc
COPY --chown=${AGENT}:${AGENT} profile/.tmux.conf /home/${AGENT}/.tmux.conf
USER ${AGENT}
WORKDIR /home/${AGENT}/workspace
EXPOSE 7681
ENTRYPOINT ["/usr/bin/tini", "--", "/usr/local/bin/entrypoint.sh"]