diff --git a/.agents/skills/nemoclaw-user-configure-inference/SKILL.md b/.agents/skills/nemoclaw-user-configure-inference/SKILL.md
index ffc9ea1917..22f79a7c53 100644
--- a/.agents/skills/nemoclaw-user-configure-inference/SKILL.md
+++ b/.agents/skills/nemoclaw-user-configure-inference/SKILL.md
@@ -128,6 +128,35 @@ $ openshell inference set --provider compatible-anthropic-endpoint --model <mode
 
 If the provider itself needs to change, rerun `nemoclaw onboard`.
 
+#### Switching from Responses API to Chat Completions
+
+If onboarding selected `/v1/responses` but the agent fails at runtime (for
+example, because the backend does not emit the streaming events OpenClaw
+requires), re-run onboarding so the wizard re-probes the endpoint and bakes
+the correct API path into the image:
+
+```console
+$ nemoclaw onboard
+```
+
+Select the same provider and endpoint again.
+The updated streaming probe will detect incomplete `/v1/responses` support
+and select `/v1/chat/completions` automatically.
+
+To force `/v1/chat/completions` without re-probing, set `NEMOCLAW_PREFERRED_API`
+before onboarding:
+
+```console
+$ NEMOCLAW_PREFERRED_API=openai-completions nemoclaw onboard
+```
+
+> **Note:** `NEMOCLAW_INFERENCE_API_OVERRIDE` patches the config at container startup but
+> does not update the Dockerfile ARG baked into the image.
+> If you recreate the sandbox without the override env var, the image reverts to
+> the original API path.
+> A fresh `nemoclaw onboard` is the reliable fix because it updates both the
+> session and the baked image.
+
 ## Step 2: Cross-Provider Switching
 
 Switching to a different provider family (for example, from NVIDIA Endpoints to Anthropic) requires updating both the gateway route and the sandbox config.
@@ -147,7 +176,7 @@ $ nemoclaw onboard --resume --recreate-sandbox
 ```
 
 The entrypoint patches `openclaw.json` at container startup with the override values.
-No image rebuild is needed.
+You do not need to rebuild the image.
 Remove the env vars and recreate the sandbox to revert to the original model.
 
 `NEMOCLAW_INFERENCE_API_OVERRIDE` accepts `openai-completions` (for NVIDIA, OpenAI, Gemini, compatible endpoints) or `anthropic-messages` (for Anthropic and Anthropic-compatible endpoints).
@@ -281,6 +310,33 @@ $ NEMOCLAW_PROVIDER=custom \
 | `NEMOCLAW_MODEL` | Model ID as reported by the server. |
 | `COMPATIBLE_API_KEY` | API key for the endpoint. Use any non-empty value if authentication is not required. |
 
+### Forcing Chat Completions API
+
+Some OpenAI-compatible servers (such as SGLang) expose `/v1/responses` but do
+not emit the granular streaming events that OpenClaw requires.
+NemoClaw tests streaming events during onboarding and falls back to
+`/v1/chat/completions` automatically when it detects incomplete streaming.
+
+If you need to bypass the `/v1/responses` probe entirely, set
+`NEMOCLAW_PREFERRED_API` before running onboard:
+
+```console
+$ NEMOCLAW_PREFERRED_API=openai-completions nemoclaw onboard
+```
+
+Set this variable to make the wizard skip the `/v1/responses` probe and use
+`/v1/chat/completions` directly.
+You can use it in both interactive and non-interactive mode.
+
+| Variable | Values | Default |
+|---|---|---|
+| `NEMOCLAW_PREFERRED_API` | `openai-completions`, `chat-completions` | unset (auto-detect) |
+
+If you already onboarded and the sandbox is failing at runtime, re-run
+`nemoclaw onboard` to re-probe the endpoint and bake the correct API path
+into the image.
+Refer to Switch Inference Models (see the `nemoclaw-user-configure-inference` skill) for details.
+
 ## Step 7: Anthropic-Compatible Server
 
 If your local server implements the Anthropic Messages API (`/v1/messages`), choose **Other Anthropic-compatible endpoint** during onboarding instead.
diff --git a/.agents/skills/nemoclaw-user-reference/references/troubleshooting.md b/.agents/skills/nemoclaw-user-reference/references/troubleshooting.md
index 333ed815e3..3304cb082c 100644
--- a/.agents/skills/nemoclaw-user-reference/references/troubleshooting.md
+++ b/.agents/skills/nemoclaw-user-reference/references/troubleshooting.md
@@ -256,6 +256,33 @@ $ export NEMOCLAW_LOCAL_INFERENCE_TIMEOUT=300
 $ nemoclaw onboard
 ```
 
+### Agent fails at runtime after onboarding succeeds with a compatible endpoint
+
+Some OpenAI-compatible servers (such as SGLang) expose `/v1/responses` and pass
+the onboarding validation probe, but their streaming mode is incomplete.
+OpenClaw requires granular streaming events like `response.output_text.delta`
+that these backends do not emit.
+
+NemoClaw now tests streaming events during the `/v1/responses` probe and falls
+back to `/v1/chat/completions` automatically.
+If you onboarded before this check was added, re-run onboarding so the wizard
+re-probes the endpoint and bakes the correct API path into the image:
+
+```console
+$ nemoclaw onboard
+```
+
+To force `/v1/chat/completions` without re-probing, set `NEMOCLAW_PREFERRED_API`:
+
+```console
+$ NEMOCLAW_PREFERRED_API=openai-completions nemoclaw onboard
+```
+
+Do not rely on `NEMOCLAW_INFERENCE_API_OVERRIDE` alone — it patches the config
+at container startup but does not update the Dockerfile ARG baked into the
+image.
+A fresh `nemoclaw onboard` is the reliable fix.
+
 ### `NEMOCLAW_DISABLE_DEVICE_AUTH=1` does not change an existing sandbox
 
 This is expected behavior.
diff --git a/docs/inference/switch-inference-providers.md b/docs/inference/switch-inference-providers.md
index cda2e7a3cd..2ac5df5ad3 100644
--- a/docs/inference/switch-inference-providers.md
+++ b/docs/inference/switch-inference-providers.md
@@ -73,6 +73,37 @@ $ openshell inference set --provider compatible-anthropic-endpoint --model <mode
 
 If the provider itself needs to change, rerun `nemoclaw onboard`.
 
+#### Switching from Responses API to Chat Completions
+
+If onboarding selected `/v1/responses` but the agent fails at runtime (for
+example, because the backend does not emit the streaming events OpenClaw
+requires), re-run onboarding so the wizard re-probes the endpoint and bakes
+the correct API path into the image:
+
+```console
+$ nemoclaw onboard
+```
+
+Select the same provider and endpoint again.
+The updated streaming probe will detect incomplete `/v1/responses` support
+and select `/v1/chat/completions` automatically.
+
+To force `/v1/chat/completions` without re-probing, set `NEMOCLAW_PREFERRED_API`
+before onboarding:
+
+```console
+$ NEMOCLAW_PREFERRED_API=openai-completions nemoclaw onboard
+```
+
+:::{note}
+`NEMOCLAW_INFERENCE_API_OVERRIDE` patches the config at container startup but
+does not update the Dockerfile ARG baked into the image.
+If you recreate the sandbox without the override env var, the image reverts to
+the original API path.
+A fresh `nemoclaw onboard` is the reliable fix because it updates both the
+session and the baked image.
+:::
+
 ## Cross-Provider Switching
 
 Switching to a different provider family (for example, from NVIDIA Endpoints to Anthropic) requires updating both the gateway route and the sandbox config.
@@ -92,7 +123,7 @@ $ nemoclaw onboard --resume --recreate-sandbox
 ```
 
 The entrypoint patches `openclaw.json` at container startup with the override values.
-No image rebuild is needed.
+You do not need to rebuild the image.
 Remove the env vars and recreate the sandbox to revert to the original model.
 
 `NEMOCLAW_INFERENCE_API_OVERRIDE` accepts `openai-completions` (for NVIDIA, OpenAI, Gemini, compatible endpoints) or `anthropic-messages` (for Anthropic and Anthropic-compatible endpoints).
diff --git a/docs/inference/use-local-inference.md b/docs/inference/use-local-inference.md
index 4f09296d89..037e0471b5 100644
--- a/docs/inference/use-local-inference.md
+++ b/docs/inference/use-local-inference.md
@@ -130,6 +130,33 @@ $ NEMOCLAW_PROVIDER=custom \
 | `NEMOCLAW_MODEL` | Model ID as reported by the server. |
 | `COMPATIBLE_API_KEY` | API key for the endpoint. Use any non-empty value if authentication is not required. |
 
+### Forcing Chat Completions API
+
+Some OpenAI-compatible servers (such as SGLang) expose `/v1/responses` but do
+not emit the granular streaming events that OpenClaw requires.
+NemoClaw tests streaming events during onboarding and falls back to
+`/v1/chat/completions` automatically when it detects incomplete streaming.
+
+If you need to bypass the `/v1/responses` probe entirely, set
+`NEMOCLAW_PREFERRED_API` before running onboard:
+
+```console
+$ NEMOCLAW_PREFERRED_API=openai-completions nemoclaw onboard
+```
+
+Set this variable to make the wizard skip the `/v1/responses` probe and use
+`/v1/chat/completions` directly.
+You can use it in both interactive and non-interactive mode.
+
+| Variable | Values | Default |
+|---|---|---|
+| `NEMOCLAW_PREFERRED_API` | `openai-completions`, `chat-completions` | unset (auto-detect) |
+
+If you already onboarded and the sandbox is failing at runtime, re-run
+`nemoclaw onboard` to re-probe the endpoint and bake the correct API path
+into the image.
+Refer to [Switch Inference Models](switch-inference-providers.md) for details.
+
 ## Anthropic-Compatible Server
 
 If your local server implements the Anthropic Messages API (`/v1/messages`), choose **Other Anthropic-compatible endpoint** during onboarding instead.
diff --git a/docs/reference/troubleshooting.md b/docs/reference/troubleshooting.md
index d370cb3ea5..f5110aee38 100644
--- a/docs/reference/troubleshooting.md
+++ b/docs/reference/troubleshooting.md
@@ -286,6 +286,33 @@ $ export NEMOCLAW_LOCAL_INFERENCE_TIMEOUT=300
 $ nemoclaw onboard
 ```
 
+### Agent fails at runtime after onboarding succeeds with a compatible endpoint
+
+Some OpenAI-compatible servers (such as SGLang) expose `/v1/responses` and pass
+the onboarding validation probe, but their streaming mode is incomplete.
+OpenClaw requires granular streaming events like `response.output_text.delta`
+that these backends do not emit.
+
+NemoClaw now tests streaming events during the `/v1/responses` probe and falls
+back to `/v1/chat/completions` automatically.
+If you onboarded before this check was added, re-run onboarding so the wizard
+re-probes the endpoint and bakes the correct API path into the image:
+
+```console
+$ nemoclaw onboard
+```
+
+To force `/v1/chat/completions` without re-probing, set `NEMOCLAW_PREFERRED_API`:
+
+```console
+$ NEMOCLAW_PREFERRED_API=openai-completions nemoclaw onboard
+```
+
+Do not rely on `NEMOCLAW_INFERENCE_API_OVERRIDE` alone — it patches the config
+at container startup but does not update the Dockerfile ARG baked into the
+image.
+A fresh `nemoclaw onboard` is the reliable fix.
+
 ### `NEMOCLAW_DISABLE_DEVICE_AUTH=1` does not change an existing sandbox
 
 This is expected behavior.
diff --git a/src/lib/http-probe.test.ts b/src/lib/http-probe.test.ts
index 513d738596..d13ed8a2be 100644
--- a/src/lib/http-probe.test.ts
+++ b/src/lib/http-probe.test.ts
@@ -8,6 +8,7 @@ import { describe, expect, it } from "vitest";
 import {
   getCurlTimingArgs,
   runCurlProbe,
+  runStreamingEventProbe,
   summarizeCurlFailure,
   summarizeProbeError,
   summarizeProbeFailure,
@@ -85,3 +86,154 @@ describe("http-probe helpers", () => {
     expect(result.stderr).toContain("spawn ENOENT");
   });
 });
+
+describe("runStreamingEventProbe", () => {
+  /** Helper to build a spawnSyncImpl that writes SSE content to the -o file. */
+  function mockStreaming(sseBody: string, exitCode = 0) {
+    return (_command: string, args: readonly string[]) => {
+      const oIdx = args.indexOf("-o");
+      if (oIdx !== -1) {
+        const outputPath = args[oIdx + 1] as string;
+        fs.writeFileSync(outputPath, sseBody);
+      }
+      return {
+        pid: 1,
+        output: [],
+        stdout: "",
+        stderr: "",
+        status: exitCode,
+        signal: null,
+      };
+    };
+  }
+
+  it("passes when all required streaming events are present", () => {
+    const sseBody = [
+      "event: response.created",
+      'data: {"id":"resp_1"}',
+      "",
+      "event: response.in_progress",
+      'data: {"id":"resp_1"}',
+      "",
+      "event: response.output_item.added",
+      'data: {"id":"resp_1"}',
+      "",
+      "event: response.content_part.added",
+      'data: {"id":"resp_1"}',
+      "",
+      "event: response.output_text.delta",
+      'data: {"delta":"OK"}',
+      "",
+      "event: response.output_text.done",
+      'data: {"text":"OK"}',
+      "",
+      "event: response.content_part.done",
+      'data: {"id":"resp_1"}',
+      "",
+      "event: response.completed",
+      'data: {"id":"resp_1"}',
+      "",
+    ].join("\n");
+
+    const result = runStreamingEventProbe(
+      ["-sS", "--max-time", "15", "https://example.test/v1/responses"],
+      { spawnSyncImpl: mockStreaming(sseBody) },
+    );
+
+    expect(result.ok).toBe(true);
+    expect(result.missingEvents).toEqual([]);
+  });
+
+  it("fails when only basic lifecycle events are present (SGLang-like)", () => {
+    const sseBody = [
+      "event: response.created",
+      'data: {"id":"resp_1"}',
+      "",
+      "event: response.in_progress",
+      'data: {"id":"resp_1"}',
+      "",
+      "event: response.completed",
+      'data: {"id":"resp_1","text":"OK"}',
+      "",
+    ].join("\n");
+
+    const result = runStreamingEventProbe(
+      ["-sS", "--max-time", "15", "https://example.test/v1/responses"],
+      { spawnSyncImpl: mockStreaming(sseBody) },
+    );
+
+    expect(result.ok).toBe(false);
+    expect(result.missingEvents).toContain("response.output_text.delta");
+    expect(result.message).toContain("response.output_text.delta");
+  });
+
+  it("still passes if curl exits with 28 (timeout) but events were captured", () => {
+    const sseBody = [
+      "event: response.created",
+      'data: {"id":"resp_1"}',
+      "",
+      "event: response.output_text.delta",
+      'data: {"delta":"O"}',
+      "",
+    ].join("\n");
+
+    const result = runStreamingEventProbe(
+      ["-sS", "--max-time", "15", "https://example.test/v1/responses"],
+      { spawnSyncImpl: mockStreaming(sseBody, 28) },
+    );
+
+    expect(result.ok).toBe(true);
+    expect(result.missingEvents).toEqual([]);
+  });
+
+  it("fails on spawn error", () => {
+    const result = runStreamingEventProbe(
+      ["-sS", "https://example.test/v1/responses"],
+      {
+        spawnSyncImpl: () => {
+          const error = Object.assign(new Error("spawn ENOENT"), { code: "ENOENT" });
+          return {
+            pid: 1,
+            output: [],
+            stdout: "",
+            stderr: "",
+            status: null,
+            signal: null,
+            error,
+          };
+        },
+      },
+    );
+
+    expect(result.ok).toBe(false);
+    expect(result.message).toContain("Streaming probe failed");
+  });
+
+  it("cleans up temp files after probe", () => {
+    let outputPath = "";
+    runStreamingEventProbe(
+      ["-sS", "--max-time", "15", "https://example.test/v1/responses"],
+      {
+        spawnSyncImpl: (_command, args) => {
+          const oIdx = args.indexOf("-o");
+          if (oIdx !== -1) {
+            outputPath = args[oIdx + 1] as string;
+            fs.writeFileSync(outputPath, "event: response.output_text.delta\ndata: {}\n");
+          }
+          return {
+            pid: 1,
+            output: [],
+            stdout: "",
+            stderr: "",
+            status: 0,
+            signal: null,
+          };
+        },
+      },
+    );
+
+    expect(outputPath).not.toBe("");
+    expect(fs.existsSync(outputPath)).toBe(false);
+    expect(fs.existsSync(path.dirname(outputPath))).toBe(false);
+  });
+});
diff --git a/src/lib/http-probe.ts b/src/lib/http-probe.ts
index 9883c1f78a..983ccd9fe3 100644
--- a/src/lib/http-probe.ts
+++ b/src/lib/http-probe.ts
@@ -26,6 +26,12 @@ export interface CurlProbeOptions {
   ) => SpawnSyncReturns<string>;
 }
 
+export interface StreamingProbeResult {
+  ok: boolean;
+  missingEvents: string[];
+  message: string;
+}
+
 function secureTempFile(prefix: string, ext = ""): string {
   const dir = fs.mkdtempSync(path.join(os.tmpdir(), `${prefix}-`));
   return path.join(dir, `${prefix}${ext}`);
@@ -147,3 +153,92 @@ export function runCurlProbe(argv: string[], opts: CurlProbeOptions = {}): CurlP
     cleanupTempDir(bodyFile, "nemoclaw-curl-probe");
   }
 }
+
+/**
+ * The minimum set of streaming events that OpenClaw requires from a
+ * `/v1/responses` endpoint. Backends that only emit the top-level lifecycle
+ * events (created / in_progress / completed) will cause runtime failures
+ * because OpenClaw never receives the incremental content deltas.
+ */
+const REQUIRED_STREAMING_EVENTS = ["response.output_text.delta"];
+
+/**
+ * Send a streaming request to a `/v1/responses`-style endpoint and verify
+ * that the SSE event stream includes the granular events OpenClaw needs.
+ *
+ * This catches backends like SGLang that return valid non-streaming
+ * responses but emit only `response.created`, `response.in_progress`, and
+ * `response.completed` in streaming mode — missing the content deltas that
+ * OpenClaw relies on.
+ */
+export function runStreamingEventProbe(
+  argv: string[],
+  opts: CurlProbeOptions = {},
+): StreamingProbeResult {
+  const bodyFile = secureTempFile("nemoclaw-streaming-probe", ".sse");
+  try {
+    const args = [...argv];
+    const url = args.pop();
+    const spawnSyncImpl = opts.spawnSyncImpl ?? spawnSync;
+    const result = spawnSyncImpl(
+      "curl",
+      [...args, "-N", "-o", bodyFile, String(url || "")],
+      {
+        cwd: opts.cwd ?? ROOT,
+        encoding: "utf8",
+        timeout: 30_000,
+        env: {
+          ...process.env,
+          ...opts.env,
+        },
+      },
+    );
+
+    const body = fs.existsSync(bodyFile) ? fs.readFileSync(bodyFile, "utf8") : "";
+
+    if (result.error || (result.status !== null && result.status !== 0 && result.status !== 28)) {
+      // curl exit 28 = timeout, which is expected — we cap with --max-time
+      // and may still have collected enough events before the timeout.
+      const detail = result.error
+        ? String((result.error as Error).message || result.error)
+        : String(result.stderr || "");
+      return {
+        ok: false,
+        missingEvents: REQUIRED_STREAMING_EVENTS,
+        message: `Streaming probe failed: ${compactText(detail).slice(0, 200)}`,
+      };
+    }
+
+    // Parse SSE event types from the raw output.
+    // Each event line looks like: "event: response.output_text.delta"
+    const eventTypes = new Set<string>();
+    for (const line of body.split("\n")) {
+      const match = /^event:\s*(.+)$/i.exec(line.trim());
+      if (match) {
+        eventTypes.add(match[1].trim());
+      }
+    }
+
+    const missing = REQUIRED_STREAMING_EVENTS.filter((e) => !eventTypes.has(e));
+    if (missing.length > 0) {
+      return {
+        ok: false,
+        missingEvents: missing,
+        message:
+          `Responses API streaming is missing required events: ${missing.join(", ")}. ` +
+          "Falling back to chat completions API.",
+      };
+    }
+
+    return { ok: true, missingEvents: [], message: "" };
+  } catch (error) {
+    const detail = error instanceof Error ? error.message : String(error);
+    return {
+      ok: false,
+      missingEvents: REQUIRED_STREAMING_EVENTS,
+      message: `Streaming probe error: ${detail}`,
+    };
+  } finally {
+    cleanupTempDir(bodyFile, "nemoclaw-streaming-probe");
+  }
+}
diff --git a/src/lib/onboard.ts b/src/lib/onboard.ts
index 6c8b101272..1156c0cc8d 100644
--- a/src/lib/onboard.ts
+++ b/src/lib/onboard.ts
@@ -495,7 +495,7 @@ function hydrateCredentialEnv(envName) {
   return value || null;
 }
 
-const { getCurlTimingArgs, summarizeCurlFailure, summarizeProbeFailure, runCurlProbe } = httpProbe;
+const { getCurlTimingArgs, summarizeCurlFailure, summarizeProbeFailure, runCurlProbe, runStreamingEventProbe } = httpProbe;
 
 function getNavigationChoice(value = "") {
   const normalized = String(value || "")
@@ -523,6 +523,7 @@ const {
   isNvcfFunctionNotFoundForAccount,
   nvcfFunctionNotFoundMessage,
   shouldSkipResponsesProbe,
+  shouldForceCompletionsApi,
 } = validation;
 
 // validateNvidiaApiKeyValue — see validation import above
@@ -1188,6 +1189,56 @@ function probeOpenAiLikeEndpoint(endpointUrl, model, apiKey, options = {}) {
   for (const probe of probes) {
     const result = probe.execute();
     if (result.ok) {
+      // Streaming event validation — catch backends like SGLang that return
+      // valid non-streaming responses but emit incomplete SSE events in
+      // streaming mode. Only run for /responses probes on custom endpoints
+      // where probeStreaming was requested.
+      if (probe.api === "openai-responses" && options.probeStreaming === true) {
+        const streamResult = runStreamingEventProbe([
+          "-sS",
+          ...getValidationProbeCurlArgs(),
+          "-H",
+          "Content-Type: application/json",
+          ...(apiKey ? ["-H", `Authorization: Bearer ${normalizeCredentialValue(apiKey)}`] : []),
+          "-d",
+          JSON.stringify({
+            model,
+            input: "Reply with exactly: OK",
+            stream: true,
+          }),
+          `${String(endpointUrl).replace(/\/+$/, "")}/responses`,
+        ]);
+        if (!streamResult.ok && streamResult.missingEvents.length > 0) {
+          // Backend responds but lacks required streaming events — fall back
+          // to /chat/completions silently.
+          console.log(`  ℹ ${streamResult.message}`);
+          failures.push({
+            name: probe.name + " (streaming)",
+            httpStatus: 0,
+            curlStatus: 0,
+            message: streamResult.message,
+            body: "",
+          });
+          continue;
+        }
+        if (!streamResult.ok) {
+          // Transport or execution failure — surface as a hard error instead
+          // of silently switching APIs.
+          return {
+            ok: false,
+            message: `${probe.name} (streaming): ${streamResult.message}`,
+            failures: [
+              {
+                name: probe.name + " (streaming)",
+                httpStatus: 0,
+                curlStatus: 0,
+                message: streamResult.message,
+                body: "",
+              },
+            ],
+          };
+        }
+      }
       return { ok: true, api: probe.api, label: probe.name };
     }
     // Preserve the raw response body alongside the summarized message so the
@@ -1336,6 +1387,8 @@ async function validateCustomOpenAiLikeSelection(
   const apiKey = getCredential(credentialEnv);
   const probe = probeOpenAiLikeEndpoint(endpointUrl, model, apiKey, {
     requireResponsesToolCalling: true,
+    skipResponsesProbe: shouldForceCompletionsApi(process.env.NEMOCLAW_PREFERRED_API),
+    probeStreaming: true,
   });
   if (probe.ok) {
     console.log(`  ${probe.label} available — OpenClaw will use ${probe.api}.`);
diff --git a/src/lib/validation.test.ts b/src/lib/validation.test.ts
index 7aa135ba27..e1b9774b88 100644
--- a/src/lib/validation.test.ts
+++ b/src/lib/validation.test.ts
@@ -12,6 +12,7 @@ import {
   isNvcfFunctionNotFoundForAccount,
   nvcfFunctionNotFoundMessage,
   shouldSkipResponsesProbe,
+  shouldForceCompletionsApi,
 } from "../../dist/lib/validation";
 
 describe("classifyValidationFailure", () => {
@@ -222,3 +223,29 @@ describe("shouldSkipResponsesProbe", () => {
     expect(shouldSkipResponsesProbe("")).toBe(false);
   });
 });
+
+describe("shouldForceCompletionsApi", () => {
+  it("returns true when passed openai-completions", () => {
+    expect(shouldForceCompletionsApi("openai-completions")).toBe(true);
+  });
+
+  it("returns true for the chat-completions alias", () => {
+    expect(shouldForceCompletionsApi("chat-completions")).toBe(true);
+  });
+
+  it("is case-insensitive", () => {
+    expect(shouldForceCompletionsApi("OpenAI-Completions")).toBe(true);
+  });
+
+  it("returns false when undefined", () => {
+    expect(shouldForceCompletionsApi(undefined)).toBe(false);
+  });
+
+  it("returns false for openai-responses", () => {
+    expect(shouldForceCompletionsApi("openai-responses")).toBe(false);
+  });
+
+  it("returns false for empty string", () => {
+    expect(shouldForceCompletionsApi("")).toBe(false);
+  });
+});
diff --git a/src/lib/validation.ts b/src/lib/validation.ts
index 8429e90e3a..6942dc6877 100644
--- a/src/lib/validation.ts
+++ b/src/lib/validation.ts
@@ -128,3 +128,14 @@ export function nvcfFunctionNotFoundMessage(model: string): string {
 export function shouldSkipResponsesProbe(provider: string): boolean {
   return provider === "nvidia-prod";
 }
+
+/**
+ * Whether the caller has explicitly requested the chat completions API path.
+ * Pass the value of `NEMOCLAW_PREFERRED_API` (or any other source). This lets
+ * users with backends that expose `/v1/responses` but lack full streaming-event
+ * support (e.g. SGLang) skip the Responses API probe during onboarding.
+ */
+export function shouldForceCompletionsApi(preferredApi?: string): boolean {
+  const value = (preferredApi || "").trim().toLowerCase();
+  return value === "openai-completions" || value === "chat-completions";
+}