@@ -185,6 +185,28 @@ function hasStaleGateway(gwInfoOutput) {
185185 return typeof gwInfoOutput === "string" && gwInfoOutput . length > 0 && gwInfoOutput . includes ( GATEWAY_NAME ) ;
186186}
187187
188+ const ANSI_ESCAPE = String . fromCharCode ( 27 ) ;
189+ const ANSI_REGEX = new RegExp ( `${ ANSI_ESCAPE } \\[[0-9;]*[A-Za-z]` , "g" ) ;
190+
191+ function stripAnsi ( value = "" ) {
192+ return value . replace ( ANSI_REGEX , "" ) ;
193+ }
194+
195+ function getActiveGatewayName ( statusOutput = "" ) {
196+ if ( typeof statusOutput !== "string" || statusOutput . length === 0 ) {
197+ return "" ;
198+ }
199+ const match = stripAnsi ( statusOutput )
200+ . match ( / ^ \s * G a t e w a y : \s + ( .+ ?) \s * $ / m) ;
201+ return match ? match [ 1 ] . trim ( ) : "" ;
202+ }
203+
204+ function isGatewayHealthy ( statusOutput = "" , gwInfoOutput = "" ) {
205+ const connected = typeof statusOutput === "string" && statusOutput . includes ( "Connected" ) ;
206+ const activeGateway = getActiveGatewayName ( statusOutput ) ;
207+ return connected && activeGateway === GATEWAY_NAME && hasStaleGateway ( gwInfoOutput ) ;
208+ }
209+
188210function streamSandboxCreate ( command , env = process . env , options = { } ) {
189211 const child = spawn ( "bash" , [ "-lc" , command ] , {
190212 cwd : ROOT ,
@@ -1237,8 +1259,16 @@ async function preflight() {
12371259 // A previous onboard run may have left the gateway container and port
12381260 // forward running. If a NemoClaw-owned gateway is still present, tear
12391261 // it down so the port check below doesn't fail on our own leftovers.
1262+ const gatewayStatus = runCaptureOpenshell ( [ "status" ] , { ignoreError : true } ) ;
12401263 const gwInfo = runCaptureOpenshell ( [ "gateway" , "info" , "-g" , GATEWAY_NAME ] , { ignoreError : true } ) ;
1241- if ( hasStaleGateway ( gwInfo ) ) {
1264+ const healthyGateway = isGatewayHealthy ( gatewayStatus , gwInfo ) ;
1265+ if ( healthyGateway ) {
1266+ console . log ( " Reusing existing NemoClaw gateway..." ) ;
1267+ runOpenshell ( [ "forward" , "stop" , "18789" ] , { ignoreError : true } ) ;
1268+ runOpenshell ( [ "gateway" , "select" , GATEWAY_NAME ] , { ignoreError : true } ) ;
1269+ process . env . OPENSHELL_GATEWAY = GATEWAY_NAME ;
1270+ console . log ( " ✓ Existing gateway selected" ) ;
1271+ } else if ( hasStaleGateway ( gwInfo ) ) {
12421272 console . log ( " Cleaning up previous NemoClaw session..." ) ;
12431273 runOpenshell ( [ "forward" , "stop" , "18789" ] , { ignoreError : true } ) ;
12441274 runOpenshell ( [ "gateway" , "destroy" , "-g" , GATEWAY_NAME ] , { ignoreError : true } ) ;
@@ -1251,6 +1281,10 @@ async function preflight() {
12511281 { port : 18789 , label : "NemoClaw dashboard" } ,
12521282 ] ;
12531283 for ( const { port, label } of requiredPorts ) {
1284+ if ( port === 8080 && healthyGateway ) {
1285+ console . log ( ` ✓ Port ${ port } already in use by active NemoClaw gateway (${ label } )` ) ;
1286+ continue ;
1287+ }
12541288 const portCheck = await checkPortAvailable ( port ) ;
12551289 if ( ! portCheck . ok ) {
12561290 console . error ( "" ) ;
@@ -1294,13 +1328,32 @@ async function preflight() {
12941328 return gpu ;
12951329}
12961330
1331+ // ── Gateway cleanup ──────────────────────────────────────────────
1332+
1333+ function destroyGateway ( ) {
1334+ runOpenshell ( [ "gateway" , "destroy" , "-g" , GATEWAY_NAME ] , { ignoreError : true } ) ;
1335+ // openshell gateway destroy doesn't remove Docker volumes, which leaves
1336+ // corrupted cluster state that breaks the next gateway start. Clean them up.
1337+ run ( `docker volume ls -q --filter "name=openshell-cluster-${ GATEWAY_NAME } " | grep . && docker volume ls -q --filter "name=openshell-cluster-${ GATEWAY_NAME } " | xargs docker volume rm || true` , { ignoreError : true } ) ;
1338+ }
1339+
12971340// ── Step 2: Gateway ──────────────────────────────────────────────
12981341
1299- async function startGateway ( _gpu ) {
1342+ async function startGatewayWithOptions ( _gpu , { exitOnFailure = true } = { } ) {
13001343 step ( 3 , 7 , "Starting OpenShell gateway" ) ;
13011344
1302- // Destroy old gateway
1303- runOpenshell ( [ "gateway" , "destroy" , "-g" , GATEWAY_NAME ] , { ignoreError : true } ) ;
1345+ const gatewayStatus = runCaptureOpenshell ( [ "status" ] , { ignoreError : true } ) ;
1346+ const gwInfo = runCaptureOpenshell ( [ "gateway" , "info" , "-g" , GATEWAY_NAME ] , { ignoreError : true } ) ;
1347+ if ( isGatewayHealthy ( gatewayStatus , gwInfo ) ) {
1348+ console . log ( " ✓ Reusing existing gateway" ) ;
1349+ runOpenshell ( [ "gateway" , "select" , GATEWAY_NAME ] , { ignoreError : true } ) ;
1350+ process . env . OPENSHELL_GATEWAY = GATEWAY_NAME ;
1351+ return ;
1352+ }
1353+
1354+ if ( hasStaleGateway ( gwInfo ) ) {
1355+ runOpenshell ( [ "gateway" , "destroy" , "-g" , GATEWAY_NAME ] , { ignoreError : true } ) ;
1356+ }
13041357
13051358 const gwArgs = [ "--name" , GATEWAY_NAME ] ;
13061359 // Do NOT pass --gpu here. On DGX Spark (and most GPU hosts), inference is
@@ -1319,18 +1372,33 @@ async function startGateway(_gpu) {
13191372 console . log ( ` Using pinned OpenShell gateway image: ${ stableGatewayImage } ` ) ;
13201373 }
13211374
1322- runOpenshell ( [ "gateway" , "start" , ...gwArgs ] , { ignoreError : false , env : gatewayEnv } ) ;
1375+ const startResult = runOpenshell ( [ "gateway" , "start" , ...gwArgs ] , { ignoreError : true , env : gatewayEnv } ) ;
1376+ if ( startResult . status !== 0 ) {
1377+ console . error ( " Gateway failed to start. Cleaning up stale state..." ) ;
1378+ destroyGateway ( ) ;
1379+ if ( exitOnFailure ) {
1380+ console . error ( " Stale state removed. Please rerun: nemoclaw onboard" ) ;
1381+ process . exit ( 1 ) ;
1382+ }
1383+ throw new Error ( "Gateway failed to start" ) ;
1384+ }
13231385
13241386 // Verify health
13251387 for ( let i = 0 ; i < 5 ; i ++ ) {
13261388 const status = runCaptureOpenshell ( [ "status" ] , { ignoreError : true } ) ;
1327- if ( status . includes ( "Connected" ) ) {
1389+ const gwInfo = runCaptureOpenshell ( [ "gateway" , "info" , "-g" , GATEWAY_NAME ] , { ignoreError : true } ) ;
1390+ if ( isGatewayHealthy ( status , gwInfo ) ) {
13281391 console . log ( " ✓ Gateway is healthy" ) ;
13291392 break ;
13301393 }
13311394 if ( i === 4 ) {
1332- console . error ( " Gateway failed to start. Run: openshell gateway info" ) ;
1333- process . exit ( 1 ) ;
1395+ console . error ( " Gateway health check failed. Cleaning up stale state..." ) ;
1396+ destroyGateway ( ) ;
1397+ if ( exitOnFailure ) {
1398+ console . error ( " Stale state removed. Please rerun: nemoclaw onboard" ) ;
1399+ process . exit ( 1 ) ;
1400+ }
1401+ throw new Error ( "Gateway failed to start" ) ;
13341402 }
13351403 sleep ( 2 ) ;
13361404 }
@@ -1347,6 +1415,14 @@ async function startGateway(_gpu) {
13471415 process . env . OPENSHELL_GATEWAY = GATEWAY_NAME ;
13481416}
13491417
1418+ async function startGateway ( _gpu ) {
1419+ return startGatewayWithOptions ( _gpu , { exitOnFailure : true } ) ;
1420+ }
1421+
1422+ async function startGatewayForRecovery ( _gpu ) {
1423+ return startGatewayWithOptions ( _gpu , { exitOnFailure : false } ) ;
1424+ }
1425+
13501426// ── Step 3: Sandbox ──────────────────────────────────────────────
13511427
13521428async function createSandbox ( gpu , model , provider , preferredInferenceApi = null ) {
@@ -1398,6 +1474,7 @@ async function createSandbox(gpu, model, provider, preferredInferenceApi = null)
13981474 run ( `cp -r "${ path . join ( ROOT , "nemoclaw-blueprint" ) } " "${ buildCtx } /nemoclaw-blueprint"` ) ;
13991475 run ( `cp -r "${ path . join ( ROOT , "scripts" ) } " "${ buildCtx } /scripts"` ) ;
14001476 run ( `rm -rf "${ buildCtx } /nemoclaw/node_modules"` , { ignoreError : true } ) ;
1477+ run ( `bash "${ buildCtx } /scripts/clean-staged-tree.sh" "${ buildCtx } /nemoclaw-blueprint"` , { ignoreError : true } ) ;
14011478
14021479 // Create sandbox (use -- echo to avoid dropping into interactive shell)
14031480 // Pass the base policy so sandbox starts in proxy mode (required for policy updates later)
@@ -1534,9 +1611,7 @@ async function setupNim(gpu) {
15341611 const options = [ ] ;
15351612 options . push ( {
15361613 key : "build" ,
1537- label :
1538- "NVIDIA Endpoints" +
1539- ( ! ollamaRunning && ! ( EXPERIMENTAL && vllmRunning ) ? " (recommended)" : "" ) ,
1614+ label : "NVIDIA Endpoints" ,
15401615 } ) ;
15411616 options . push ( { key : "openai" , label : "OpenAI" } ) ;
15421617 options . push ( { key : "custom" , label : "Other OpenAI-compatible endpoint" } ) ;
@@ -2138,7 +2213,7 @@ async function setupPolicies(sandboxName) {
21382213// ── Dashboard ────────────────────────────────────────────────────
21392214
21402215const CONTROL_UI_PORT = 18789 ;
2141- const CONTROL_UI_CHAT_PATH = "/chat?session=main " ;
2216+ const CONTROL_UI_PATH = "/" ;
21422217
21432218function findOpenclawJsonPath ( dir ) {
21442219 if ( ! fs . existsSync ( dir ) ) return null ;
@@ -2184,17 +2259,13 @@ function fetchGatewayAuthTokenFromSandbox(sandboxName) {
21842259 }
21852260}
21862261
2187- function buildControlUiChatUrls ( token ) {
2262+ function buildControlUiUrls ( token ) {
21882263 const hash = token ? `#token=${ token } ` : "" ;
2189- const pathChat = `${ CONTROL_UI_CHAT_PATH } ${ hash } ` ;
2190- const bases = [
2191- `http://127.0.0.1:${ CONTROL_UI_PORT } ` ,
2192- `http://localhost:${ CONTROL_UI_PORT } ` ,
2193- ] ;
2264+ const baseUrl = `http://127.0.0.1:${ CONTROL_UI_PORT } ` ;
2265+ const urls = [ `${ baseUrl } ${ CONTROL_UI_PATH } ${ hash } ` ] ;
21942266 const chatUi = ( process . env . CHAT_UI_URL || "" ) . trim ( ) . replace ( / \/ $ / , "" ) ;
2195- const urls = bases . map ( ( b ) => `${ b } ${ pathChat } ` ) ;
2196- if ( chatUi && / ^ h t t p s ? : \/ \/ / i. test ( chatUi ) && ! bases . includes ( chatUi ) ) {
2197- urls . push ( `${ chatUi } ${ pathChat } ` ) ;
2267+ if ( chatUi && / ^ h t t p s ? : \/ \/ / i. test ( chatUi ) && chatUi !== baseUrl ) {
2268+ urls . push ( `${ chatUi } ${ CONTROL_UI_PATH } ${ hash } ` ) ;
21982269 }
21992270 return [ ...new Set ( urls ) ] ;
22002271}
@@ -2222,22 +2293,26 @@ function printDashboard(sandboxName, model, provider, nimContainer = null) {
22222293 console . log ( ` Model ${ model } (${ providerLabel } )` ) ;
22232294 console . log ( ` NIM ${ nimLabel } ` ) ;
22242295 console . log ( ` ${ "─" . repeat ( 50 ) } ` ) ;
2225- console . log ( ` Next:` ) ;
2296+ console . log ( ` Run: nemoclaw ${ sandboxName } connect` ) ;
2297+ console . log ( ` Status: nemoclaw ${ sandboxName } status` ) ;
2298+ console . log ( ` Logs: nemoclaw ${ sandboxName } logs --follow` ) ;
2299+ console . log ( "" ) ;
22262300 if ( token ) {
2227- note ( " URLs below embed the gateway token — treat them like a password. " ) ;
2228- console . log ( ` Control UI: copy one line into your browser (port ${ CONTROL_UI_PORT } must be forwarded): ` ) ;
2229- for ( const u of buildControlUiChatUrls ( token ) ) {
2230- console . log ( ` ${ u } ` ) ;
2301+ console . log ( " OpenClaw UI (tokenized URL; treat it like a password) " ) ;
2302+ console . log ( ` Port ${ CONTROL_UI_PORT } must be forwarded before opening this URL. ` ) ;
2303+ for ( const url of buildControlUiUrls ( token ) ) {
2304+ console . log ( ` ${ url } ` ) ;
22312305 }
22322306 } else {
22332307 note ( " Could not read gateway token from the sandbox (download failed)." ) ;
2234- console . log ( ` Control UI: http://127.0.0.1:${ CONTROL_UI_PORT } ${ CONTROL_UI_CHAT_PATH } ` ) ;
2308+ console . log ( " OpenClaw UI" ) ;
2309+ console . log ( ` Port ${ CONTROL_UI_PORT } must be forwarded before opening this URL.` ) ;
2310+ for ( const url of buildControlUiUrls ( ) ) {
2311+ console . log ( ` ${ url } ` ) ;
2312+ }
22352313 console . log ( ` Token: nemoclaw ${ sandboxName } connect → jq -r '.gateway.auth.token' /sandbox/.openclaw/openclaw.json` ) ;
22362314 console . log ( ` append #token=<token> to the URL, or see /tmp/gateway.log inside the sandbox.` ) ;
22372315 }
2238- console . log ( ` Run: nemoclaw ${ sandboxName } connect` ) ;
2239- console . log ( ` Status: nemoclaw ${ sandboxName } status` ) ;
2240- console . log ( ` Logs: nemoclaw ${ sandboxName } logs --follow` ) ;
22412316 console . log ( ` ${ "─" . repeat ( 50 ) } ` ) ;
22422317 console . log ( "" ) ;
22432318}
@@ -2280,12 +2355,16 @@ module.exports = {
22802355 getInstalledOpenshellVersion,
22812356 getStableGatewayImageRef,
22822357 hasStaleGateway,
2358+ isGatewayHealthy,
22832359 isSandboxReady,
22842360 onboard,
2361+ preflight,
22852362 pruneStaleSandboxEntry,
22862363 runCaptureOpenshell,
22872364 setupInference,
22882365 setupNim,
2366+ startGateway,
2367+ startGatewayForRecovery,
22892368 writeSandboxConfigSyncFile,
22902369 patchStagedDockerfile,
22912370} ;
0 commit comments