Skip to content

Commit a461606

Browse files
authored
fix(security): stop passing NVIDIA_API_KEY into sandbox and command lines (#675)
The OpenShell gateway proxies inference requests and injects stored credentials server-side (proxy.rs strips client auth headers, backend.rs re-authenticates upstream). The raw key was never needed inside the sandbox but was passed via env args, setup.sh, walkthrough commands, and the setupSpark sudo call — exposing it in ps aux, /proc/pid/cmdline, docker inspect, and k3s audit logs. Changes: - Remove NVIDIA_API_KEY from openshell sandbox create env args - Use env-name-only credential form in setup.sh - Remove key from walkthrough.sh tmux/connect commands - Remove unnecessary key + ensureApiKey() from setupSpark - Clear key from process.env after setupInference handoff - Add 6 regression tests for credential exposure Does NOT fix /proc/pid/environ (kernel snapshot is immutable after exec — requires file-based credential loading in OpenShell). Messaging tokens left in sandbox env pending #617 merge. Closes #429.
1 parent de2554f commit a461606

File tree

5 files changed

+92
-11
lines changed

5 files changed

+92
-11
lines changed

bin/lib/onboard.js

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1386,11 +1386,15 @@ async function createSandbox(gpu, model, provider, preferredInferenceApi = null)
13861386
console.log(` Creating sandbox '${sandboxName}' (this takes a few minutes on first run)...`);
13871387
const chatUiUrl = process.env.CHAT_UI_URL || "http://127.0.0.1:18789";
13881388
patchStagedDockerfile(stagedDockerfile, model, chatUiUrl, String(Date.now()), provider, preferredInferenceApi);
1389+
// Only pass non-sensitive env vars to the sandbox. NVIDIA_API_KEY is NOT
1390+
// needed inside the sandbox — inference is proxied through the OpenShell
1391+
// gateway which injects the stored credential server-side. The gateway
1392+
// also strips any Authorization headers sent by the sandbox client.
1393+
// See: crates/openshell-sandbox/src/proxy.rs (header stripping),
1394+
// crates/openshell-router/src/backend.rs (server-side auth injection).
13891395
const envArgs = [formatEnvAssignment("CHAT_UI_URL", chatUiUrl)];
13901396
const sandboxEnv = { ...process.env };
1391-
if (process.env.NVIDIA_API_KEY) {
1392-
sandboxEnv.NVIDIA_API_KEY = process.env.NVIDIA_API_KEY;
1393-
}
1397+
delete sandboxEnv.NVIDIA_API_KEY;
13941398
const discordToken = getCredential("DISCORD_BOT_TOKEN") || process.env.DISCORD_BOT_TOKEN;
13951399
if (discordToken) {
13961400
sandboxEnv.DISCORD_BOT_TOKEN = discordToken;
@@ -2142,6 +2146,11 @@ async function onboard(opts = {}) {
21422146
process.env.NEMOCLAW_OPENSHELL_BIN = getOpenshellBinary();
21432147
await startGateway(gpu);
21442148
await setupInference(GATEWAY_NAME, model, provider, endpointUrl, credentialEnv);
2149+
// The key is now stored in openshell's provider config. Clear it from our
2150+
// process environment so new child processes don't inherit it. Note: this
2151+
// does NOT clear /proc/pid/environ (kernel snapshot is immutable after exec),
2152+
// but it prevents run()'s { ...process.env } from propagating the key.
2153+
delete process.env.NVIDIA_API_KEY;
21452154
const sandboxName = await createSandbox(gpu, model, provider, preferredInferenceApi);
21462155
if (nimContainer) {
21472156
registry.updateSandbox(sandboxName, { nimContainer });

bin/nemoclaw.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,8 +96,8 @@ async function setup() {
9696
}
9797

9898
async function setupSpark() {
99-
await ensureApiKey();
100-
run(`sudo -E NVIDIA_API_KEY=${shellQuote(process.env.NVIDIA_API_KEY)} bash "${SCRIPTS}/setup-spark.sh"`);
99+
// setup-spark.sh configures Docker cgroups — it does not use NVIDIA_API_KEY.
100+
run(`sudo bash "${SCRIPTS}/setup-spark.sh"`);
101101
}
102102

103103
async function deploy(instanceName) {

scripts/setup.sh

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,10 +133,12 @@ fi
133133
info "Setting up inference providers..."
134134

135135
# nvidia-nim (build.nvidia.com)
136+
# Use env-name-only form so openshell reads the value from the environment
137+
# internally — the literal key value never appears in the process argument list.
136138
upsert_provider \
137139
"nvidia-nim" \
138140
"openai" \
139-
"NVIDIA_API_KEY=$NVIDIA_API_KEY" \
141+
"NVIDIA_API_KEY" \
140142
"OPENAI_BASE_URL=https://integrate.api.nvidia.com/v1"
141143

142144
# vllm-local (if vLLM is installed or running)
@@ -193,9 +195,11 @@ rm -rf "$BUILD_CTX/nemoclaw/node_modules"
193195
# detect failures. The raw log is kept on failure for debugging.
194196
CREATE_LOG=$(mktemp /tmp/nemoclaw-create-XXXXXX.log)
195197
set +e
198+
# NVIDIA_API_KEY is NOT passed into the sandbox. Inference is proxied through
199+
# the OpenShell gateway which injects the stored credential server-side.
196200
openshell sandbox create --from "$BUILD_CTX/Dockerfile" --name "$SANDBOX_NAME" \
197201
--provider nvidia-nim \
198-
-- env NVIDIA_API_KEY="$NVIDIA_API_KEY" >"$CREATE_LOG" 2>&1
202+
>"$CREATE_LOG" 2>&1
199203
CREATE_RC=$?
200204
set -e
201205
rm -rf "$BUILD_CTX"

scripts/walkthrough.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,7 @@ if ! command -v tmux >/dev/null 2>&1; then
7272
echo ""
7373
echo " Terminal 2 (Agent):"
7474
echo " openshell sandbox connect nemoclaw"
75-
echo ' export NVIDIA_API_KEY=<your-key>'
76-
echo " nemoclaw-start"
77-
echo " openclaw agent --agent main --local --session-id live"
75+
echo " nemoclaw-start openclaw agent --agent main --local --session-id live"
7876
exit 0
7977
fi
8078

@@ -87,8 +85,10 @@ tmux kill-session -t "$SESSION" 2>/dev/null || true
8785
tmux new-session -d -s "$SESSION" -x 200 -y 50 "openshell term"
8886

8987
# Split right pane for the agent
88+
# NVIDIA_API_KEY is not needed inside the sandbox — inference is proxied
89+
# through the OpenShell gateway which injects credentials server-side.
9090
tmux split-window -h -t "$SESSION" \
91-
"openshell sandbox connect nemoclaw -- bash -c 'export NVIDIA_API_KEY=$NVIDIA_API_KEY && nemoclaw-start openclaw agent --agent main --local --session-id live'"
91+
"openshell sandbox connect nemoclaw -- bash -c 'nemoclaw-start openclaw agent --agent main --local --session-id live'"
9292

9393
# Even split
9494
tmux select-layout -t "$SESSION" even-horizontal

test/runner.test.js

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,4 +224,72 @@ describe("regression guards", () => {
224224
expect(src.includes("validateName(SANDBOX")).toBeTruthy();
225225
expect(src.includes("execSync")).toBeFalsy();
226226
});
227+
228+
describe("credential exposure guards (#429)", () => {
229+
it("onboard createSandbox does not pass NVIDIA_API_KEY to sandbox env", () => {
230+
const fs = require("fs");
231+
const src = fs.readFileSync(path.join(__dirname, "..", "bin", "lib", "onboard.js"), "utf-8");
232+
// Find the envArgs block in createSandbox — it should not contain NVIDIA_API_KEY
233+
const envArgsMatch = src.match(/const envArgs = \[[\s\S]*?\];/);
234+
expect(envArgsMatch).toBeTruthy();
235+
expect(envArgsMatch[0].includes("NVIDIA_API_KEY")).toBe(false);
236+
});
237+
238+
it("onboard clears NVIDIA_API_KEY from process.env after setupInference", () => {
239+
const fs = require("fs");
240+
const src = fs.readFileSync(path.join(__dirname, "..", "bin", "lib", "onboard.js"), "utf-8");
241+
expect(src.includes("delete process.env.NVIDIA_API_KEY")).toBeTruthy();
242+
});
243+
244+
it("setup.sh uses env-name-only form for nvidia-nim credential", () => {
245+
const fs = require("fs");
246+
const src = fs.readFileSync(path.join(__dirname, "..", "scripts", "setup.sh"), "utf-8");
247+
// Should use "NVIDIA_API_KEY" (name only), not "NVIDIA_API_KEY=$NVIDIA_API_KEY" (value)
248+
const lines = src.split("\n");
249+
for (const line of lines) {
250+
if (line.includes("upsert_provider") || line.includes("--credential")) continue;
251+
if (line.trim().startsWith("#")) continue;
252+
// Check credential argument lines passed to upsert_provider
253+
if (line.includes('"NVIDIA_API_KEY=')) {
254+
// Allow "NVIDIA_API_KEY" alone but not "NVIDIA_API_KEY=$..."
255+
expect(line.includes("NVIDIA_API_KEY=$")).toBe(false);
256+
}
257+
}
258+
});
259+
260+
it("setup.sh does not pass NVIDIA_API_KEY in sandbox create env args", () => {
261+
const fs = require("fs");
262+
const src = fs.readFileSync(path.join(__dirname, "..", "scripts", "setup.sh"), "utf-8");
263+
// Find sandbox create command — should not have env NVIDIA_API_KEY
264+
const createLines = src.split("\n").filter((l) => l.includes("sandbox create"));
265+
for (const line of createLines) {
266+
expect(line.includes("NVIDIA_API_KEY")).toBe(false);
267+
}
268+
});
269+
270+
it("setupSpark does not pass NVIDIA_API_KEY to sudo", () => {
271+
const fs = require("fs");
272+
const src = fs.readFileSync(path.join(__dirname, "..", "bin", "nemoclaw.js"), "utf-8");
273+
// Find the run() call inside setupSpark — it should not contain the key
274+
const sparkLines = src.split("\n").filter(
275+
(l) => l.includes("setup-spark") && l.includes("run(")
276+
);
277+
for (const line of sparkLines) {
278+
expect(line.includes("NVIDIA_API_KEY")).toBe(false);
279+
}
280+
});
281+
282+
it("walkthrough.sh does not embed NVIDIA_API_KEY in tmux or sandbox commands", () => {
283+
const fs = require("fs");
284+
const src = fs.readFileSync(path.join(__dirname, "..", "scripts", "walkthrough.sh"), "utf-8");
285+
// Check only executable lines (tmux spawn, openshell connect) — not comments/docs
286+
const cmdLines = src.split("\n").filter(
287+
(l) => !l.trim().startsWith("#") && !l.trim().startsWith("echo") &&
288+
(l.includes("tmux") || l.includes("openshell sandbox connect"))
289+
);
290+
for (const line of cmdLines) {
291+
expect(line.includes("NVIDIA_API_KEY")).toBe(false);
292+
}
293+
});
294+
});
227295
});

0 commit comments

Comments
 (0)