Skip to content

Commit 08e6e79

Browse files
authored
Merge branch 'main' into security/strip-credentials-and-blueprint-digest
2 parents 1e964a7 + f0f53e4 commit 08e6e79

File tree

12 files changed

+1538
-197
lines changed

12 files changed

+1538
-197
lines changed

.github/workflows/commit-lint.yaml

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,6 @@ jobs:
2121
steps:
2222
- name: Checkout
2323
uses: actions/checkout@v6
24-
with:
25-
fetch-depth: 0
2624

2725
- name: Setup Node.js
2826
uses: actions/setup-node@v6
@@ -33,14 +31,7 @@ jobs:
3331
- name: Install dependencies
3432
run: npm install --ignore-scripts
3533

36-
- name: Lint commits
37-
if: github.event.action != 'edited'
38-
env:
39-
FROM_SHA: ${{ github.event.pull_request.base.sha }}
40-
TO_SHA: ${{ github.event.pull_request.head.sha }}
41-
run: npx commitlint --from "$FROM_SHA" --to "$TO_SHA" --verbose
42-
43-
- name: Lint PR title (squash-merge path)
34+
- name: Lint PR title
4435
env:
4536
PR_TITLE: ${{ github.event.pull_request.title }}
4637
run: printf '%s\n' "$PR_TITLE" | npx commitlint --verbose

README.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,26 @@ The sandbox image is approximately 2.4 GB compressed. During image push, the Doc
6565
| macOS | Podman | Not supported yet. NemoClaw currently depends on OpenShell support for Podman on macOS. |
6666
| Windows WSL | Docker Desktop (WSL backend) | Supported target path |
6767

68+
#### macOS first-run checklist
69+
70+
On a fresh macOS machine, install the prerequisites in this order:
71+
72+
1. Install Xcode Command Line Tools:
73+
74+
```bash
75+
xcode-select --install
76+
```
77+
78+
2. Install and start a supported container runtime:
79+
- Docker Desktop
80+
- Colima
81+
3. Run the NemoClaw installer.
82+
83+
This avoids the two most common first-run failures on macOS:
84+
85+
- missing developer tools needed by the installer and Node.js toolchain
86+
- Docker connection errors when no supported container runtime is installed or running
87+
6888
> **💡 Tip**
6989
>
7090
> For DGX Spark, follow the [DGX Spark setup guide](https://github.com/NVIDIA/NemoClaw/blob/main/spark-install.md). It covers Spark-specific prerequisites, such as cgroup v2 and Docker configuration, before running the standard installer.

bin/lib/onboard.js

Lines changed: 109 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,28 @@ function hasStaleGateway(gwInfoOutput) {
185185
return typeof gwInfoOutput === "string" && gwInfoOutput.length > 0 && gwInfoOutput.includes(GATEWAY_NAME);
186186
}
187187

188+
const ANSI_ESCAPE = String.fromCharCode(27);
189+
const ANSI_REGEX = new RegExp(`${ANSI_ESCAPE}\\[[0-9;]*[A-Za-z]`, "g");
190+
191+
function stripAnsi(value = "") {
192+
return value.replace(ANSI_REGEX, "");
193+
}
194+
195+
function getActiveGatewayName(statusOutput = "") {
196+
if (typeof statusOutput !== "string" || statusOutput.length === 0) {
197+
return "";
198+
}
199+
const match = stripAnsi(statusOutput)
200+
.match(/^\s*Gateway:\s+(.+?)\s*$/m);
201+
return match ? match[1].trim() : "";
202+
}
203+
204+
function isGatewayHealthy(statusOutput = "", gwInfoOutput = "") {
205+
const connected = typeof statusOutput === "string" && statusOutput.includes("Connected");
206+
const activeGateway = getActiveGatewayName(statusOutput);
207+
return connected && activeGateway === GATEWAY_NAME && hasStaleGateway(gwInfoOutput);
208+
}
209+
188210
function streamSandboxCreate(command, env = process.env, options = {}) {
189211
const child = spawn("bash", ["-lc", command], {
190212
cwd: ROOT,
@@ -1237,8 +1259,16 @@ async function preflight() {
12371259
// A previous onboard run may have left the gateway container and port
12381260
// forward running. If a NemoClaw-owned gateway is still present, tear
12391261
// it down so the port check below doesn't fail on our own leftovers.
1262+
const gatewayStatus = runCaptureOpenshell(["status"], { ignoreError: true });
12401263
const gwInfo = runCaptureOpenshell(["gateway", "info", "-g", GATEWAY_NAME], { ignoreError: true });
1241-
if (hasStaleGateway(gwInfo)) {
1264+
const healthyGateway = isGatewayHealthy(gatewayStatus, gwInfo);
1265+
if (healthyGateway) {
1266+
console.log(" Reusing existing NemoClaw gateway...");
1267+
runOpenshell(["forward", "stop", "18789"], { ignoreError: true });
1268+
runOpenshell(["gateway", "select", GATEWAY_NAME], { ignoreError: true });
1269+
process.env.OPENSHELL_GATEWAY = GATEWAY_NAME;
1270+
console.log(" ✓ Existing gateway selected");
1271+
} else if (hasStaleGateway(gwInfo)) {
12421272
console.log(" Cleaning up previous NemoClaw session...");
12431273
runOpenshell(["forward", "stop", "18789"], { ignoreError: true });
12441274
runOpenshell(["gateway", "destroy", "-g", GATEWAY_NAME], { ignoreError: true });
@@ -1251,6 +1281,10 @@ async function preflight() {
12511281
{ port: 18789, label: "NemoClaw dashboard" },
12521282
];
12531283
for (const { port, label } of requiredPorts) {
1284+
if (port === 8080 && healthyGateway) {
1285+
console.log(` ✓ Port ${port} already in use by active NemoClaw gateway (${label})`);
1286+
continue;
1287+
}
12541288
const portCheck = await checkPortAvailable(port);
12551289
if (!portCheck.ok) {
12561290
console.error("");
@@ -1294,13 +1328,32 @@ async function preflight() {
12941328
return gpu;
12951329
}
12961330

1331+
// ── Gateway cleanup ──────────────────────────────────────────────
1332+
1333+
function destroyGateway() {
1334+
runOpenshell(["gateway", "destroy", "-g", GATEWAY_NAME], { ignoreError: true });
1335+
// openshell gateway destroy doesn't remove Docker volumes, which leaves
1336+
// corrupted cluster state that breaks the next gateway start. Clean them up.
1337+
run(`docker volume ls -q --filter "name=openshell-cluster-${GATEWAY_NAME}" | grep . && docker volume ls -q --filter "name=openshell-cluster-${GATEWAY_NAME}" | xargs docker volume rm || true`, { ignoreError: true });
1338+
}
1339+
12971340
// ── Step 2: Gateway ──────────────────────────────────────────────
12981341

1299-
async function startGateway(_gpu) {
1342+
async function startGatewayWithOptions(_gpu, { exitOnFailure = true } = {}) {
13001343
step(3, 7, "Starting OpenShell gateway");
13011344

1302-
// Destroy old gateway
1303-
runOpenshell(["gateway", "destroy", "-g", GATEWAY_NAME], { ignoreError: true });
1345+
const gatewayStatus = runCaptureOpenshell(["status"], { ignoreError: true });
1346+
const gwInfo = runCaptureOpenshell(["gateway", "info", "-g", GATEWAY_NAME], { ignoreError: true });
1347+
if (isGatewayHealthy(gatewayStatus, gwInfo)) {
1348+
console.log(" ✓ Reusing existing gateway");
1349+
runOpenshell(["gateway", "select", GATEWAY_NAME], { ignoreError: true });
1350+
process.env.OPENSHELL_GATEWAY = GATEWAY_NAME;
1351+
return;
1352+
}
1353+
1354+
if (hasStaleGateway(gwInfo)) {
1355+
runOpenshell(["gateway", "destroy", "-g", GATEWAY_NAME], { ignoreError: true });
1356+
}
13041357

13051358
const gwArgs = ["--name", GATEWAY_NAME];
13061359
// Do NOT pass --gpu here. On DGX Spark (and most GPU hosts), inference is
@@ -1319,18 +1372,33 @@ async function startGateway(_gpu) {
13191372
console.log(` Using pinned OpenShell gateway image: ${stableGatewayImage}`);
13201373
}
13211374

1322-
runOpenshell(["gateway", "start", ...gwArgs], { ignoreError: false, env: gatewayEnv });
1375+
const startResult = runOpenshell(["gateway", "start", ...gwArgs], { ignoreError: true, env: gatewayEnv });
1376+
if (startResult.status !== 0) {
1377+
console.error(" Gateway failed to start. Cleaning up stale state...");
1378+
destroyGateway();
1379+
if (exitOnFailure) {
1380+
console.error(" Stale state removed. Please rerun: nemoclaw onboard");
1381+
process.exit(1);
1382+
}
1383+
throw new Error("Gateway failed to start");
1384+
}
13231385

13241386
// Verify health
13251387
for (let i = 0; i < 5; i++) {
13261388
const status = runCaptureOpenshell(["status"], { ignoreError: true });
1327-
if (status.includes("Connected")) {
1389+
const gwInfo = runCaptureOpenshell(["gateway", "info", "-g", GATEWAY_NAME], { ignoreError: true });
1390+
if (isGatewayHealthy(status, gwInfo)) {
13281391
console.log(" ✓ Gateway is healthy");
13291392
break;
13301393
}
13311394
if (i === 4) {
1332-
console.error(" Gateway failed to start. Run: openshell gateway info");
1333-
process.exit(1);
1395+
console.error(" Gateway health check failed. Cleaning up stale state...");
1396+
destroyGateway();
1397+
if (exitOnFailure) {
1398+
console.error(" Stale state removed. Please rerun: nemoclaw onboard");
1399+
process.exit(1);
1400+
}
1401+
throw new Error("Gateway failed to start");
13341402
}
13351403
sleep(2);
13361404
}
@@ -1347,6 +1415,14 @@ async function startGateway(_gpu) {
13471415
process.env.OPENSHELL_GATEWAY = GATEWAY_NAME;
13481416
}
13491417

1418+
async function startGateway(_gpu) {
1419+
return startGatewayWithOptions(_gpu, { exitOnFailure: true });
1420+
}
1421+
1422+
async function startGatewayForRecovery(_gpu) {
1423+
return startGatewayWithOptions(_gpu, { exitOnFailure: false });
1424+
}
1425+
13501426
// ── Step 3: Sandbox ──────────────────────────────────────────────
13511427

13521428
async function createSandbox(gpu, model, provider, preferredInferenceApi = null) {
@@ -1398,6 +1474,7 @@ async function createSandbox(gpu, model, provider, preferredInferenceApi = null)
13981474
run(`cp -r "${path.join(ROOT, "nemoclaw-blueprint")}" "${buildCtx}/nemoclaw-blueprint"`);
13991475
run(`cp -r "${path.join(ROOT, "scripts")}" "${buildCtx}/scripts"`);
14001476
run(`rm -rf "${buildCtx}/nemoclaw/node_modules"`, { ignoreError: true });
1477+
run(`bash "${buildCtx}/scripts/clean-staged-tree.sh" "${buildCtx}/nemoclaw-blueprint"`, { ignoreError: true });
14011478

14021479
// Create sandbox (use -- echo to avoid dropping into interactive shell)
14031480
// Pass the base policy so sandbox starts in proxy mode (required for policy updates later)
@@ -1534,9 +1611,7 @@ async function setupNim(gpu) {
15341611
const options = [];
15351612
options.push({
15361613
key: "build",
1537-
label:
1538-
"NVIDIA Endpoints" +
1539-
(!ollamaRunning && !(EXPERIMENTAL && vllmRunning) ? " (recommended)" : ""),
1614+
label: "NVIDIA Endpoints",
15401615
});
15411616
options.push({ key: "openai", label: "OpenAI" });
15421617
options.push({ key: "custom", label: "Other OpenAI-compatible endpoint" });
@@ -2138,7 +2213,7 @@ async function setupPolicies(sandboxName) {
21382213
// ── Dashboard ────────────────────────────────────────────────────
21392214

21402215
const CONTROL_UI_PORT = 18789;
2141-
const CONTROL_UI_CHAT_PATH = "/chat?session=main";
2216+
const CONTROL_UI_PATH = "/";
21422217

21432218
function findOpenclawJsonPath(dir) {
21442219
if (!fs.existsSync(dir)) return null;
@@ -2184,17 +2259,13 @@ function fetchGatewayAuthTokenFromSandbox(sandboxName) {
21842259
}
21852260
}
21862261

2187-
function buildControlUiChatUrls(token) {
2262+
function buildControlUiUrls(token) {
21882263
const hash = token ? `#token=${token}` : "";
2189-
const pathChat = `${CONTROL_UI_CHAT_PATH}${hash}`;
2190-
const bases = [
2191-
`http://127.0.0.1:${CONTROL_UI_PORT}`,
2192-
`http://localhost:${CONTROL_UI_PORT}`,
2193-
];
2264+
const baseUrl = `http://127.0.0.1:${CONTROL_UI_PORT}`;
2265+
const urls = [`${baseUrl}${CONTROL_UI_PATH}${hash}`];
21942266
const chatUi = (process.env.CHAT_UI_URL || "").trim().replace(/\/$/, "");
2195-
const urls = bases.map((b) => `${b}${pathChat}`);
2196-
if (chatUi && /^https?:\/\//i.test(chatUi) && !bases.includes(chatUi)) {
2197-
urls.push(`${chatUi}${pathChat}`);
2267+
if (chatUi && /^https?:\/\//i.test(chatUi) && chatUi !== baseUrl) {
2268+
urls.push(`${chatUi}${CONTROL_UI_PATH}${hash}`);
21982269
}
21992270
return [...new Set(urls)];
22002271
}
@@ -2222,22 +2293,26 @@ function printDashboard(sandboxName, model, provider, nimContainer = null) {
22222293
console.log(` Model ${model} (${providerLabel})`);
22232294
console.log(` NIM ${nimLabel}`);
22242295
console.log(` ${"─".repeat(50)}`);
2225-
console.log(` Next:`);
2296+
console.log(` Run: nemoclaw ${sandboxName} connect`);
2297+
console.log(` Status: nemoclaw ${sandboxName} status`);
2298+
console.log(` Logs: nemoclaw ${sandboxName} logs --follow`);
2299+
console.log("");
22262300
if (token) {
2227-
note(" URLs below embed the gateway token — treat them like a password.");
2228-
console.log(` Control UI: copy one line into your browser (port ${CONTROL_UI_PORT} must be forwarded):`);
2229-
for (const u of buildControlUiChatUrls(token)) {
2230-
console.log(` ${u}`);
2301+
console.log(" OpenClaw UI (tokenized URL; treat it like a password)");
2302+
console.log(` Port ${CONTROL_UI_PORT} must be forwarded before opening this URL.`);
2303+
for (const url of buildControlUiUrls(token)) {
2304+
console.log(` ${url}`);
22312305
}
22322306
} else {
22332307
note(" Could not read gateway token from the sandbox (download failed).");
2234-
console.log(` Control UI: http://127.0.0.1:${CONTROL_UI_PORT}${CONTROL_UI_CHAT_PATH}`);
2308+
console.log(" OpenClaw UI");
2309+
console.log(` Port ${CONTROL_UI_PORT} must be forwarded before opening this URL.`);
2310+
for (const url of buildControlUiUrls()) {
2311+
console.log(` ${url}`);
2312+
}
22352313
console.log(` Token: nemoclaw ${sandboxName} connect → jq -r '.gateway.auth.token' /sandbox/.openclaw/openclaw.json`);
22362314
console.log(` append #token=<token> to the URL, or see /tmp/gateway.log inside the sandbox.`);
22372315
}
2238-
console.log(` Run: nemoclaw ${sandboxName} connect`);
2239-
console.log(` Status: nemoclaw ${sandboxName} status`);
2240-
console.log(` Logs: nemoclaw ${sandboxName} logs --follow`);
22412316
console.log(` ${"─".repeat(50)}`);
22422317
console.log("");
22432318
}
@@ -2280,12 +2355,16 @@ module.exports = {
22802355
getInstalledOpenshellVersion,
22812356
getStableGatewayImageRef,
22822357
hasStaleGateway,
2358+
isGatewayHealthy,
22832359
isSandboxReady,
22842360
onboard,
2361+
preflight,
22852362
pruneStaleSandboxEntry,
22862363
runCaptureOpenshell,
22872364
setupInference,
22882365
setupNim,
2366+
startGateway,
2367+
startGatewayForRecovery,
22892368
writeSandboxConfigSyncFile,
22902369
patchStagedDockerfile,
22912370
};

0 commit comments

Comments
 (0)