Skip to content

Commit 9450827

Browse files
committed
feat: expand provider onboarding and validation
1 parent a1e7485 commit 9450827

30 files changed

Lines changed: 3768 additions & 559 deletions

.dockerignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
node_modules
22
/dist
3-
!nemoclaw/dist
43
.git
54
*.pyc
65
__pycache__

Dockerfile

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,12 @@ RUN chmod +x /usr/local/bin/nemoclaw-start
8080
# Build args for config that varies per deployment.
8181
# nemoclaw onboard passes these at image build time.
8282
ARG NEMOCLAW_MODEL=nvidia/nemotron-3-super-120b-a12b
83+
ARG NEMOCLAW_PROVIDER_KEY=nvidia
84+
ARG NEMOCLAW_PRIMARY_MODEL_REF=nvidia/nemotron-3-super-120b-a12b
8385
ARG CHAT_UI_URL=http://127.0.0.1:18789
86+
ARG NEMOCLAW_INFERENCE_BASE_URL=https://inference.local/v1
87+
ARG NEMOCLAW_INFERENCE_API=openai-completions
88+
ARG NEMOCLAW_INFERENCE_COMPAT_B64=e30=
8489
# Unique per build to ensure each image gets a fresh auth token.
8590
# Pass --build-arg NEMOCLAW_BUILD_ID=$(date +%s) to bust the cache.
8691
ARG NEMOCLAW_BUILD_ID=default
@@ -89,7 +94,12 @@ ARG NEMOCLAW_BUILD_ID=default
8994
# via os.environ, never via string interpolation into Python source code.
9095
# Direct ARG interpolation into python3 -c is a code injection vector (C-2).
9196
ENV NEMOCLAW_MODEL=${NEMOCLAW_MODEL} \
92-
CHAT_UI_URL=${CHAT_UI_URL}
97+
NEMOCLAW_PROVIDER_KEY=${NEMOCLAW_PROVIDER_KEY} \
98+
NEMOCLAW_PRIMARY_MODEL_REF=${NEMOCLAW_PRIMARY_MODEL_REF} \
99+
CHAT_UI_URL=${CHAT_UI_URL} \
100+
NEMOCLAW_INFERENCE_BASE_URL=${NEMOCLAW_INFERENCE_BASE_URL} \
101+
NEMOCLAW_INFERENCE_API=${NEMOCLAW_INFERENCE_API} \
102+
NEMOCLAW_INFERENCE_COMPAT_B64=${NEMOCLAW_INFERENCE_COMPAT_B64}
93103

94104
WORKDIR /sandbox
95105
USER sandbox
@@ -100,30 +110,30 @@ USER sandbox
100110
# Build args (NEMOCLAW_MODEL, CHAT_UI_URL) customize per deployment.
101111
# Auth token is generated per build so each image has a unique token.
102112
RUN python3 -c "\
103-
import json, os, secrets; \
113+
import base64, json, os, secrets; \
104114
from urllib.parse import urlparse; \
105115
model = os.environ['NEMOCLAW_MODEL']; \
106116
chat_ui_url = os.environ['CHAT_UI_URL']; \
117+
provider_key = os.environ['NEMOCLAW_PROVIDER_KEY']; \
118+
primary_model_ref = os.environ['NEMOCLAW_PRIMARY_MODEL_REF']; \
119+
inference_base_url = os.environ['NEMOCLAW_INFERENCE_BASE_URL']; \
120+
inference_api = os.environ['NEMOCLAW_INFERENCE_API']; \
121+
inference_compat = json.loads(base64.b64decode(os.environ['NEMOCLAW_INFERENCE_COMPAT_B64']).decode('utf-8')); \
107122
parsed = urlparse(chat_ui_url); \
108123
chat_origin = f'{parsed.scheme}://{parsed.netloc}' if parsed.scheme and parsed.netloc else 'http://127.0.0.1:18789'; \
109124
origins = ['http://127.0.0.1:18789']; \
110125
origins = list(dict.fromkeys(origins + [chat_origin])); \
126+
providers = { \
127+
provider_key: { \
128+
'baseUrl': inference_base_url, \
129+
'apiKey': 'unused', \
130+
'api': inference_api, \
131+
'models': [{**({'compat': inference_compat} if inference_compat else {}), 'id': model, 'name': primary_model_ref, 'reasoning': False, 'input': ['text'], 'cost': {'input': 0, 'output': 0, 'cacheRead': 0, 'cacheWrite': 0}, 'contextWindow': 131072, 'maxTokens': 4096}] \
132+
} \
133+
}; \
111134
config = { \
112-
'agents': {'defaults': {'model': {'primary': f'inference/{model}'}}}, \
113-
'models': {'mode': 'merge', 'providers': { \
114-
'nvidia': { \
115-
'baseUrl': 'https://inference.local/v1', \
116-
'apiKey': 'openshell-managed', \
117-
'api': 'openai-completions', \
118-
'models': [{'id': model.split('/')[-1], 'name': model, 'reasoning': False, 'input': ['text'], 'cost': {'input': 0, 'output': 0, 'cacheRead': 0, 'cacheWrite': 0}, 'contextWindow': 131072, 'maxTokens': 4096}] \
119-
}, \
120-
'inference': { \
121-
'baseUrl': 'https://inference.local/v1', \
122-
'apiKey': 'unused', \
123-
'api': 'openai-completions', \
124-
'models': [{'id': model, 'name': model, 'reasoning': False, 'input': ['text'], 'cost': {'input': 0, 'output': 0, 'cacheRead': 0, 'cacheWrite': 0}, 'contextWindow': 131072, 'maxTokens': 4096}] \
125-
} \
126-
}}, \
135+
'agents': {'defaults': {'model': {'primary': primary_model_ref}}}, \
136+
'models': {'mode': 'merge', 'providers': providers}, \
127137
'channels': {'defaults': {'configWrites': False}}, \
128138
'gateway': { \
129139
'mode': 'local', \

README.md

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ When the install completes, a summary confirms the running environment:
8686
```text
8787
──────────────────────────────────────────────────
8888
Sandbox my-assistant (Landlock + seccomp + netns)
89-
Model nvidia/nemotron-3-super-120b-a12b (NVIDIA Endpoint API)
89+
Model nvidia/nemotron-3-super-120b-a12b (NVIDIA Endpoints)
9090
──────────────────────────────────────────────────
9191
Run: nemoclaw my-assistant connect
9292
Status: nemoclaw my-assistant status
@@ -162,14 +162,14 @@ curl -fsSL https://raw.githubusercontent.com/NVIDIA/NemoClaw/refs/heads/main/uni
162162

163163
## How It Works
164164

165-
NemoClaw installs the NVIDIA OpenShell runtime and Nemotron models, then uses a versioned blueprint to create a sandboxed environment where every network request, file access, and inference call is governed by declarative policy. The `nemoclaw` CLI orchestrates the full stack: OpenShell gateway, sandbox, inference provider, and network policy.
165+
NemoClaw installs the NVIDIA OpenShell runtime, then creates a sandboxed OpenClaw environment where every network request, file access, and inference call is governed by declarative policy. The `nemoclaw` CLI orchestrates the full stack: OpenShell gateway, sandbox, inference provider, and network policy.
166166

167167
| Component | Role |
168168
|------------------|-------------------------------------------------------------------------------------------|
169169
| **Plugin** | TypeScript CLI commands for launch, connect, status, and logs. |
170170
| **Blueprint** | Versioned Python artifact that orchestrates sandbox creation, policy, and inference setup. |
171171
| **Sandbox** | Isolated OpenShell container running OpenClaw with policy-enforced egress and filesystem. |
172-
| **Inference** | NVIDIA Endpoint model calls, routed through the OpenShell gateway, transparent to the agent. |
172+
| **Inference** | Provider-routed model calls, routed through the OpenShell gateway, transparent to the agent. |
173173

174174
The blueprint lifecycle follows four stages: resolve the artifact, verify its digest, plan the resources, and apply through the OpenShell CLI.
175175

@@ -179,15 +179,28 @@ When something goes wrong, errors may originate from either NemoClaw or the Open
179179

180180
## Inference
181181

182-
Inference requests from the agent never leave the sandbox directly. OpenShell intercepts every call and routes it to the NVIDIA Endpoint provider.
182+
Inference requests from the agent never leave the sandbox directly. OpenShell intercepts every call and routes it to the provider you selected during onboarding.
183183

184-
| Provider | Model | Use Case |
185-
|--------------|--------------------------------------|-------------------------------------------------|
186-
| NVIDIA Endpoint | `nvidia/nemotron-3-super-120b-a12b` | Production. Requires an NVIDIA API key. |
184+
Supported non-experimental onboarding paths:
187185

188-
Get an API key from [build.nvidia.com](https://build.nvidia.com). The `nemoclaw onboard` command prompts for this key during setup.
186+
| Provider | Notes |
187+
|---|---|
188+
| NVIDIA Endpoints | Curated hosted models on `integrate.api.nvidia.com`. |
189+
| OpenAI | Curated GPT models plus `Other...` for manual model entry. |
190+
| Other OpenAI-compatible endpoint | For proxies and compatible gateways. |
191+
| Anthropic | Curated Claude models plus `Other...` for manual model entry. |
192+
| Other Anthropic-compatible endpoint | For Claude proxies and compatible gateways. |
193+
| Google Gemini | Google's OpenAI-compatible endpoint. |
189194

190-
Local inference options such as Ollama and vLLM are still experimental. On macOS, they also depend on OpenShell host-routing support in addition to the local service itself being reachable on the host.
195+
During onboarding, NemoClaw validates the selected provider and model before it creates the sandbox:
196+
197+
- OpenAI-compatible providers: tries `/responses` first, then `/chat/completions`
198+
- Anthropic-compatible providers: tries `/v1/messages`
199+
- If validation fails, the wizard prompts you to fix the selection before continuing
200+
201+
Credentials stay on the host in `~/.nemoclaw/credentials.json`. The sandbox only sees the routed `inference.local` endpoint, not your raw provider key.
202+
203+
Local Ollama is supported in the standard onboarding flow. Local vLLM remains experimental, and local host-routed inference on macOS still depends on OpenShell host-routing support in addition to the local service itself being reachable on the host.
191204

192205
---
193206

@@ -252,7 +265,7 @@ Refer to the documentation for more information on NemoClaw.
252265
- [Overview](https://docs.nvidia.com/nemoclaw/latest/about/overview.html): Learn what NemoClaw does and how it fits together.
253266
- [How It Works](https://docs.nvidia.com/nemoclaw/latest/about/how-it-works.html): Learn about the plugin, blueprint, and sandbox lifecycle.
254267
- [Architecture](https://docs.nvidia.com/nemoclaw/latest/reference/architecture.html): Learn about the plugin structure, blueprint lifecycle, and sandbox environment.
255-
- [Inference Profiles](https://docs.nvidia.com/nemoclaw/latest/reference/inference-profiles.html): Learn about the NVIDIA Endpoint inference configuration.
268+
- [Inference Profiles](https://docs.nvidia.com/nemoclaw/latest/reference/inference-profiles.html): Learn how NemoClaw configures routed inference providers.
256269
- [Network Policies](https://docs.nvidia.com/nemoclaw/latest/reference/network-policies.html): Learn about egress control and policy customization.
257270
- [CLI Commands](https://docs.nvidia.com/nemoclaw/latest/reference/commands.html): Learn about the full command reference.
258271
- [Troubleshooting](https://docs.nvidia.com/nemoclaw/latest/reference/troubleshooting.html): Troubleshoot common issues and resolution steps.

bin/lib/credentials.js

Lines changed: 92 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
const fs = require("fs");
55
const path = require("path");
66
const readline = require("readline");
7-
const { execSync } = require("child_process");
87

98
const CREDS_DIR = path.join(process.env.HOME || "/tmp", ".nemoclaw");
109
const CREDS_FILE = path.join(CREDS_DIR, "credentials.json");
@@ -31,8 +30,97 @@ function getCredential(key) {
3130
return creds[key] || null;
3231
}
3332

34-
function prompt(question) {
33+
function promptSecret(question) {
34+
return new Promise((resolve, reject) => {
35+
const input = process.stdin;
36+
const output = process.stderr;
37+
let answer = "";
38+
let rawModeEnabled = false;
39+
let finished = false;
40+
41+
function cleanup() {
42+
input.removeListener("data", onData);
43+
if (rawModeEnabled && typeof input.setRawMode === "function") {
44+
input.setRawMode(false);
45+
}
46+
if (typeof input.pause === "function") {
47+
input.pause();
48+
}
49+
}
50+
51+
function finish(fn, value) {
52+
if (finished) return;
53+
finished = true;
54+
cleanup();
55+
output.write("\n");
56+
fn(value);
57+
}
58+
59+
function onData(chunk) {
60+
const text = chunk.toString("utf8");
61+
for (let i = 0; i < text.length; i += 1) {
62+
const ch = text[i];
63+
64+
if (ch === "\u0003") {
65+
finish(reject, Object.assign(new Error("Prompt interrupted"), { code: "SIGINT" }));
66+
return;
67+
}
68+
69+
if (ch === "\r" || ch === "\n") {
70+
finish(resolve, answer.trim());
71+
return;
72+
}
73+
74+
if (ch === "\u0008" || ch === "\u007f") {
75+
answer = answer.slice(0, -1);
76+
continue;
77+
}
78+
79+
if (ch === "\u001b") {
80+
// Ignore terminal escape/control sequences such as Delete, arrows,
81+
// Home/End, etc. while leaving the buffered secret untouched.
82+
const rest = text.slice(i);
83+
const match = rest.match(/^\u001b(?:\[[0-9;?]*[~A-Za-z]|\][^\u0007]*\u0007|.)/);
84+
if (match) {
85+
i += match[0].length - 1;
86+
}
87+
continue;
88+
}
89+
90+
if (ch >= " ") {
91+
answer += ch;
92+
}
93+
}
94+
}
95+
96+
output.write(question);
97+
input.setEncoding("utf8");
98+
if (typeof input.resume === "function") {
99+
input.resume();
100+
}
101+
if (typeof input.setRawMode === "function") {
102+
input.setRawMode(true);
103+
rawModeEnabled = true;
104+
}
105+
input.on("data", onData);
106+
});
107+
}
108+
109+
function prompt(question, opts = {}) {
35110
return new Promise((resolve) => {
111+
const silent = opts.secret === true && process.stdin.isTTY && process.stderr.isTTY;
112+
if (silent) {
113+
promptSecret(question)
114+
.then(resolve)
115+
.catch((err) => {
116+
if (err && err.code === "SIGINT") {
117+
process.kill(process.pid, "SIGINT");
118+
return;
119+
}
120+
throw err;
121+
});
122+
return;
123+
}
36124
const rl = readline.createInterface({ input: process.stdin, output: process.stderr });
37125
rl.question(question, (answer) => {
38126
rl.close();
@@ -67,7 +155,7 @@ async function ensureApiKey() {
67155
console.log(" └─────────────────────────────────────────────────────────────────┘");
68156
console.log("");
69157

70-
key = await prompt(" NVIDIA API Key: ");
158+
key = await prompt(" NVIDIA API Key: ", { secret: true });
71159

72160
if (!key || !key.startsWith("nvapi-")) {
73161
console.error(" Invalid key. Must start with nvapi-");
@@ -114,7 +202,7 @@ async function ensureGithubToken() {
114202
console.log(" └──────────────────────────────────────────────────┘");
115203
console.log("");
116204

117-
token = await prompt(" GitHub Token: ");
205+
token = await prompt(" GitHub Token: ", { secret: true });
118206

119207
if (!token) {
120208
console.error(" Token required for deploy (repo is private).");

bin/lib/inference-config.js

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ const { DEFAULT_OLLAMA_MODEL } = require("./local-inference");
1818

1919
function getProviderSelectionConfig(provider, model) {
2020
switch (provider) {
21+
case "nvidia-prod":
2122
case "nvidia-nim":
2223
return {
2324
endpointType: "custom",
@@ -27,7 +28,62 @@ function getProviderSelectionConfig(provider, model) {
2728
profile: DEFAULT_ROUTE_PROFILE,
2829
credentialEnv: DEFAULT_ROUTE_CREDENTIAL_ENV,
2930
provider,
30-
providerLabel: "NVIDIA Endpoint API",
31+
providerLabel: "NVIDIA Endpoints",
32+
};
33+
case "openai-api":
34+
return {
35+
endpointType: "custom",
36+
endpointUrl: INFERENCE_ROUTE_URL,
37+
ncpPartner: null,
38+
model: model || "gpt-5.4",
39+
profile: DEFAULT_ROUTE_PROFILE,
40+
credentialEnv: "OPENAI_API_KEY",
41+
provider,
42+
providerLabel: "OpenAI",
43+
};
44+
case "anthropic-prod":
45+
return {
46+
endpointType: "custom",
47+
endpointUrl: INFERENCE_ROUTE_URL,
48+
ncpPartner: null,
49+
model: model || "claude-sonnet-4-6",
50+
profile: DEFAULT_ROUTE_PROFILE,
51+
credentialEnv: "ANTHROPIC_API_KEY",
52+
provider,
53+
providerLabel: "Anthropic",
54+
};
55+
case "compatible-anthropic-endpoint":
56+
return {
57+
endpointType: "custom",
58+
endpointUrl: INFERENCE_ROUTE_URL,
59+
ncpPartner: null,
60+
model: model || "custom-anthropic-model",
61+
profile: DEFAULT_ROUTE_PROFILE,
62+
credentialEnv: "COMPATIBLE_ANTHROPIC_API_KEY",
63+
provider,
64+
providerLabel: "Other Anthropic-compatible endpoint",
65+
};
66+
case "gemini-api":
67+
return {
68+
endpointType: "custom",
69+
endpointUrl: INFERENCE_ROUTE_URL,
70+
ncpPartner: null,
71+
model: model || "gemini-2.5-flash",
72+
profile: DEFAULT_ROUTE_PROFILE,
73+
credentialEnv: "GEMINI_API_KEY",
74+
provider,
75+
providerLabel: "Google Gemini",
76+
};
77+
case "compatible-endpoint":
78+
return {
79+
endpointType: "custom",
80+
endpointUrl: INFERENCE_ROUTE_URL,
81+
ncpPartner: null,
82+
model: model || "custom-model",
83+
profile: DEFAULT_ROUTE_PROFILE,
84+
credentialEnv: "COMPATIBLE_API_KEY",
85+
provider,
86+
providerLabel: "Other OpenAI-compatible endpoint",
3187
};
3288
case "vllm-local":
3389
return {

0 commit comments

Comments
 (0)