diff --git a/connectors/sandbox--mirage.md b/connectors/sandbox--mirage.md new file mode 100644 index 0000000..f37085e --- /dev/null +++ b/connectors/sandbox--mirage.md @@ -0,0 +1,391 @@ +--- +{ + "category": "sandbox", + "website": "https://docs.mirage.strukto.ai", + "aliases": ["@struktoai/mirage-node", "@struktoai/mirage-browser"] +} +--- + +# Add a Flue Connector: Mirage + +You are an AI coding agent installing the Mirage sandbox connector for a +Flue project. Follow these instructions exactly. Confirm with the user only +when something is genuinely ambiguous (e.g. an unusual project layout). + +## What this connector does + +Wraps an already-initialized Mirage `Workspace` (created with the user's own +`@struktoai/mirage-node` or `@struktoai/mirage-browser` SDK) into Flue's +`SandboxFactory` interface. The user owns the workspace and its mounts; +this connector just adapts the workspace. + +Things to know before installing: + +- Mirage publishes two runtime packages with the same `Workspace` API: + `@struktoai/mirage-node` for `--target node`, and + `@struktoai/mirage-browser` for `--target cloudflare` (Cloudflare Workers + are a browser-class runtime). The connector itself imports types from + `@struktoai/mirage-core` (re-exported by both) so the same file works for + either target. The user picks the right runtime package in their agent + code based on their build target. +- Some Mirage resources are Node-only (`SSHResource`, `PostgresResource`, + `MongoDBResource`, `EmailResource`, FUSE). Importing them from + `@struktoai/mirage-browser` is a build error, so using any of those + pins the user to `--target node`. +- If you see `@struktoai/mirage-agents` in Mirage's docs, **don't install + it for Flue** — it's an adapter for other agent frameworks, not for Flue. + +## Where to write the file + +Pick the location based on the user's project layout: + +- **`.flue/` layout** (project has files at the root and uses `.flue/agents/` + etc.): write to `./.flue/connectors/mirage.ts`. +- **Root layout** (the project root itself contains `agents/` and friends): + write to `./connectors/mirage.ts`. + +If neither feels right (uncommon layout, multiple workspaces, etc.), ask the +user before writing. + +Create any missing parent directories. + +## File contents + +Write this file verbatim. Do not "improve" it — it conforms to the published +`SandboxApi` contract. + +```ts +/** + * Mirage connector for Flue. + * + * Wraps an already-initialized Mirage `Workspace` (from + * `@struktoai/mirage-node` or `@struktoai/mirage-browser`) into Flue's + * SandboxFactory interface. The user constructs the Workspace and mounts + * resources directly using the Mirage SDK — Flue just adapts it. + * + * @example + * ```typescript + * import { Workspace, RAMResource, MountMode } from '@struktoai/mirage-node'; + * import { mirage } from '../connectors/mirage'; + * + * const ws = new Workspace({ '/data': new RAMResource() }, { mode: MountMode.WRITE }); + * const agent = await init({ sandbox: mirage(ws), model: 'anthropic/claude-sonnet-4-6' }); + * const session = await agent.session(); + * ``` + */ +import { createSandboxSessionEnv } from '@flue/sdk/sandbox'; +import type { SandboxApi, SandboxFactory, SessionEnv, FileStat } from '@flue/sdk/sandbox'; +import type { Workspace as MirageWorkspace } from '@struktoai/mirage-core'; + +export interface MirageConnectorOptions { + /** + * Default working directory for `exec()` calls when the caller doesn't + * pass one. Mirage workspaces are rooted at `/` (mounts hang off this + * root), so `/` is the safe default. Pin to a specific writable mount + * (e.g. `/data`) if you want the agent to default to working there. + */ + cwd?: string; +} + +/** + * Quote a string for safe inclusion in a `bash`-style command line. + * Mirage's shell executor parses POSIX-ish syntax, so the same single-quote + * escape used for real bash works here. + */ +function shellQuote(value: string): string { + return `'${value.replace(/'/g, `'\\''`)}'`; +} + +/** + * Implements SandboxApi by wrapping a Mirage Workspace. + * + * Each Flue session maps onto a dedicated Mirage session (created lazily + * by id) so that cwd, env, history, and lastExitCode stay isolated when + * one Workspace is shared across multiple Flue sessions. + * + * Filesystem operations route through `workspace.fs.*` (Mirage's direct + * VFS API) for read/write/readdir/stat/exists/single-level mkdir. + * Recursive `mkdir -p` and `rm -rf` shell out via `workspace.execute()` + * because `WorkspaceFS` exposes only single-level `mkdir` and + * `unlink`/`rmdir`. + * + * `cwd`, `env`, and `signal` (including `AbortSignal.timeout(...)`) all + * pass directly through to `ExecuteOptions` — Mirage runs each call in an + * isolated session for `cwd`/`env`, and observes the signal cooperatively + * at LIST/PIPELINE/loop boundaries. No shell-prefix workarounds. + */ +class MirageSandboxApi implements SandboxApi { + constructor( + private workspace: MirageWorkspace, + private flueSessionId: string, + ) {} + + async readFile(path: string): Promise { + const bytes = await this.workspace.fs.readFile(path); + return new TextDecoder('utf-8').decode(bytes); + } + + async readFileBuffer(path: string): Promise { + // Defensive copy: Mirage may hand back a view onto an internal buffer. + const bytes = await this.workspace.fs.readFile(path); + return new Uint8Array(bytes); + } + + async writeFile(path: string, content: string | Uint8Array): Promise { + const bytes = + typeof content === 'string' ? new TextEncoder().encode(content) : content; + await this.workspace.fs.writeFile(path, bytes); + } + + async stat(path: string): Promise { + const s = await this.workspace.fs.stat(path); + // Mirage's FileStat: { name, size: number|null, modified: string|null, + // type: FileType|null }. FileType.DIRECTORY is the literal 'directory'. + const isDirectory = s.type === 'directory'; + return { + isFile: !isDirectory, + isDirectory, + isSymbolicLink: false, // Mirage doesn't model symlinks. + size: s.size ?? 0, + // Use Unix epoch as the "missing mtime" sentinel so callers + // comparing mtimes (e.g. cache layers) can't confuse it with + // a real recent modification. + mtime: s.modified ? new Date(s.modified) : new Date(0), + }; + } + + async readdir(path: string): Promise { + // Mirage returns absolute paths; some implementations include a + // trailing `/` for directories, which `lastIndexOf('/') + 1` would + // turn into an empty string — strip those. + const entries = await this.workspace.fs.readdir(path); + return entries.map((p) => p.slice(p.lastIndexOf('/') + 1)).filter((n) => n.length > 0); + } + + async exists(path: string): Promise { + return this.workspace.fs.exists(path); + } + + async mkdir(path: string, options?: { recursive?: boolean }): Promise { + if (options?.recursive) { + // `WorkspaceFS.mkdir` is single-level. Mirage's executor implements + // `mkdir -p` natively, so shell out for the recursive case. + const result = await this.runShell(`mkdir -p ${shellQuote(path)}`); + if (result.exitCode !== 0) { + throw new Error( + `[flue:mirage] mkdir -p failed for ${path}: ` + + (result.stderr || result.stdout || `exit ${result.exitCode}`), + ); + } + return; + } + await this.workspace.fs.mkdir(path); + } + + async rm(path: string, options?: { recursive?: boolean; force?: boolean }): Promise { + // `WorkspaceFS` only exposes `unlink` (file) and `rmdir` (empty dir). + // For Flue's `recursive` / `force`, shell out to Mirage's `rm`. + if (options?.recursive || options?.force) { + const flags: string[] = []; + if (options.recursive) flags.push('r'); + if (options.force) flags.push('f'); + const result = await this.runShell(`rm -${flags.join('')} ${shellQuote(path)}`); + if (result.exitCode !== 0) { + throw new Error( + `[flue:mirage] rm failed for ${path}: ` + + (result.stderr || result.stdout || `exit ${result.exitCode}`), + ); + } + return; + } + // Plain delete: try unlink first, fall back to rmdir for empty dirs. + try { + await this.workspace.fs.unlink(path); + } catch { + await this.workspace.fs.rmdir(path); + } + } + + async exec( + command: string, + options?: { + cwd?: string; + env?: Record; + timeout?: number; + signal?: AbortSignal; + }, + ): Promise<{ stdout: string; stderr: string; exitCode: number }> { + return this.runShell(command, options); + } + + private async runShell( + command: string, + options?: { + cwd?: string; + env?: Record; + timeout?: number; + signal?: AbortSignal; + }, + ): Promise<{ stdout: string; stderr: string; exitCode: number }> { + // Build the AbortSignal: prefer the caller's signal, fall back to a + // timeout-derived one, or compose both if both are set. + let signal: AbortSignal | undefined; + if (typeof options?.timeout === 'number' && options?.signal) { + signal = AbortSignal.any([ + options.signal, + AbortSignal.timeout(options.timeout * 1000), + ]); + } else if (typeof options?.timeout === 'number') { + signal = AbortSignal.timeout(options.timeout * 1000); + } else if (options?.signal) { + signal = options.signal; + } + + try { + const result = await this.workspace.execute(command, { + sessionId: this.flueSessionId, + cwd: options?.cwd, + env: options?.env, + signal, + }); + return { + stdout: result.stdoutText, + stderr: result.stderrText, + exitCode: result.exitCode, + }; + } catch (err) { + // On timeout: synthesize a 124-shaped result (matches `timeout(1)`), + // matching what other Flue sandbox connectors return. + const isTimeout = + typeof options?.timeout === 'number' && + err instanceof Error && + (err.name === 'AbortError' || err.name === 'TimeoutError'); + if (isTimeout) { + return { + stdout: '', + stderr: `[flue:mirage] Command timed out after ${options.timeout} seconds.`, + exitCode: 124, + }; + } + throw err; + } + } +} + +/** + * Create a Flue sandbox factory from an initialized Mirage Workspace. + * The user owns the workspace lifecycle; Flue wraps it into a SessionEnv + * for agent use. + */ +export function mirage( + workspace: MirageWorkspace, + options?: MirageConnectorOptions, +): SandboxFactory { + return { + async createSessionEnv({ id, cwd }: { id: string; cwd?: string }): Promise { + // Map this Flue session to a dedicated Mirage session so cwd, env, + // history, and lastExitCode stay isolated across Flue sessions + // sharing the same Workspace. createSession throws on duplicate + // ids, so fall back to getSession if the id is already registered + // (e.g. session resumed after a reload). + try { + workspace.createSession(id); + } catch { + workspace.getSession(id); + } + + // Mirage workspaces are mount-rooted at `/`. `/` is a safe no-op + // default; pin via `options.cwd` to default to a specific writable + // mount (e.g. `/data`). + const sandboxCwd = cwd ?? options?.cwd ?? '/'; + const api = new MirageSandboxApi(workspace, id); + return createSandboxSessionEnv(api, sandboxCwd); + }, + }; +} +``` + +## Required dependencies + +Pick the runtime package that matches the user's Flue build target. If +you can't tell which target they're on, check `package.json` scripts for +`flue dev` / `flue build` invocations and look for a `wrangler.jsonc` (or +`.toml` / `.json`) at the project root. If still unclear, ask. + +For `--target node`: + +```bash +npm install @struktoai/mirage-node +``` + +For `--target cloudflare`: + +```bash +npm install @struktoai/mirage-browser +``` + +(Use the user's package manager — `pnpm add`, `yarn add`, etc. if their +lockfile indicates a different one.) + +## Authentication + +**Mirage itself has no API key.** It runs in-process — there's no remote +service to authenticate against. + +Authentication is per-mounted-resource. Each backend the user mounts +(`S3Resource`, `SlackResource`, `GitHubResource`, `PostgresResource`, …) +has its own credentials, configured when the user constructs the resource +in their own agent code. The connector never touches them. + +**Never invent values for any of these credentials** — they must come from +the user. Mirage's docs have a per-resource setup guide for every +supported backend at +`https://docs.mirage.strukto.ai/typescript/setup/` (e.g. +`…/setup/s3`, `…/setup/slack`). + +Use the project's existing conventions (`AGENTS.md`, `.env`, `.dev.vars`, +a secret manager, CI vars) for storing whatever credentials the mounted +resources need. If nothing in the project gives you a clear signal, ask +the user. + +For reference: `flue dev --env ` and `flue run --env ` load +any `.env`-format file the user points them at. + +## Wiring it into an agent + +Here's what using this connector looks like inside a Flue agent. If the +user is already working on an agent that this connector is meant to plug +into, you can finish that work by wiring the connector into it. Otherwise, +share this snippet so they can wire it up themselves. + +```ts +import type { FlueContext } from '@flue/sdk/client'; +import { Workspace, RAMResource, MountMode } from '@struktoai/mirage-node'; +import { mirage } from '../connectors/mirage'; // adjust path to match the user's layout + +export const triggers = { webhook: true }; + +export default async function ({ init }: FlueContext) { + const ws = new Workspace({ '/data': new RAMResource() }, { mode: MountMode.WRITE }); + + const agent = await init({ + sandbox: mirage(ws, { cwd: '/data' }), + model: 'anthropic/claude-sonnet-4-6', + }); + const session = await agent.session(); + + return await session.shell('echo "hello mirage" > /data/hello.txt && cat /data/hello.txt'); +} +``` + +## Verify + +1. Run the user's typechecker (`npx tsc --noEmit` is a safe default) and + confirm the new file has no errors. +2. Confirm the import path you used for the connector matches where you + actually wrote the file. +3. Tell the user the next steps: install `@struktoai/mirage-node` or + `@struktoai/mirage-browser` (whichever matches their target), make sure + any credentials for resources they mount are available at runtime (per + the Authentication section above), and run `flue dev` (or + `flue run `) to try it. diff --git a/connectors/sandbox--vercel.md b/connectors/sandbox--vercel.md index 746b7e2..0da52df 100644 --- a/connectors/sandbox--vercel.md +++ b/connectors/sandbox--vercel.md @@ -107,12 +107,29 @@ class VercelSandboxApi implements SandboxApi { async exec( command: string, - options?: { cwd?: string; env?: Record; timeout?: number }, + options?: { + cwd?: string; + env?: Record; + timeout?: number; + signal?: AbortSignal; + }, ): Promise<{ stdout: string; stderr: string; exitCode: number }> { - const signal = + // Vercel's SDK accepts an AbortSignal directly, so we forward both + // `timeout` (synthesized as a signal) and the caller's `signal`. + // Compose them with AbortSignal.any so whichever fires first wins: + // - timeout-only → recoverable 124-shape ShellResult. + // - caller-only → rethrow so the host abort propagates. + // - both → if the caller's signal fired, propagate; + // otherwise treat as timeout. + const timeoutSignal = typeof options?.timeout === 'number' ? AbortSignal.timeout(options.timeout * 1000) : undefined; + const callerSignal = options?.signal; + const signal = + callerSignal && timeoutSignal + ? AbortSignal.any([callerSignal, timeoutSignal]) + : (callerSignal ?? timeoutSignal); try { const response = await this.sandbox.runCommand({ @@ -128,9 +145,11 @@ class VercelSandboxApi implements SandboxApi { ]); return { stdout, stderr, exitCode: response.exitCode }; } catch (err) { + // If the caller's signal fired, rethrow so the host abort wins. + if (callerSignal?.aborted) throw err; const aborted = - signal?.aborted && - (err === signal.reason || + timeoutSignal?.aborted && + (err === timeoutSignal.reason || (err instanceof Error && (err.name === 'AbortError' || err.name === 'TimeoutError'))); if (aborted) { return { diff --git a/connectors/sandbox.md b/connectors/sandbox.md index 6e9edc1..d8c4d6d 100644 --- a/connectors/sandbox.md +++ b/connectors/sandbox.md @@ -50,6 +50,13 @@ These are the things that aren't obvious from the spec or the example. the user if their layout is unusual. - **Imports.** The published surface is `@flue/sdk/sandbox`. Don't import from `@flue/sdk/internal` or any other internal path. +- **Cancellation.** `SandboxApi.exec()` receives `timeout` (primary) and + optionally `signal`. Always forward `timeout` to the provider's native + timeout option when one exists — that's how the LLM bash tool stops a + command. Forward `signal` only if the provider has a real cancellation + primitive (`AbortSignal`, process kill, cancel token); otherwise leave + it alone. The SDK does pre/post `signal.aborted` checks at the + `SandboxApi` boundary, so you don't need to add them yourself. - **Credentials.** If the provider needs secrets at runtime, never invent values for them. Let the project's conventions (`AGENTS.md`, an existing `.env` / `.dev.vars`, a secret manager, CI vars, etc.) decide where they diff --git a/docs/sandbox-connector-spec.md b/docs/sandbox-connector-spec.md index 1df46b9..e3d70dd 100644 --- a/docs/sandbox-connector-spec.md +++ b/docs/sandbox-connector-spec.md @@ -105,11 +105,22 @@ export interface SandboxApi { rm(path: string, options?: { recursive?: boolean; force?: boolean }): Promise; exec( command: string, - options?: { cwd?: string; env?: Record; timeout?: number }, + options?: { + cwd?: string; + env?: Record; + timeout?: number; + signal?: AbortSignal; + }, ): Promise<{ stdout: string; stderr: string; exitCode: number }>; } ``` +`timeout` is the **primary** cancellation contract — every connector should +honor it by forwarding to the provider SDK's native timeout option. +`signal` is an *optional* enhancement: connectors whose provider SDK +supports mid-flight cancellation (e.g. accepts an `AbortSignal`) should +forward it; others may ignore it. See "Cancellation" below. + ### `SandboxFactory` (your factory returns this) ```ts @@ -187,9 +198,29 @@ Delete a file or directory. Honor `options.recursive` and `options.force`. ### `exec(command, options?) → Promise<{ stdout, stderr, exitCode }>` Run a shell command. Honor `options.cwd`, `options.env`, and -`options.timeout` if your provider supports them. If `stderr` is not -separately surfaced, return `''` for it; do the same for `exitCode` if -unavailable, defaulting to `0` only when the call clearly succeeded. +`options.timeout`. If your provider's SDK doesn't expose a native timeout +option, translate `timeout` into an `AbortSignal.timeout(ms)` and pass it +to whatever the SDK accepts — or, as a last resort, race the call against +a `setTimeout` and reject. Connectors **must** make a best-effort attempt +at honoring `timeout`: it's how the LLM bash tool tells the agent "stop +this command after N seconds and let me retry." Returning a 124-shaped +`ShellResult` (`exitCode: 124`, `stderr` describing the timeout) on +deadline expiry matches the convention used by other Flue connectors and +the `timeout(1)` utility. + +If your provider's SDK *also* supports an `AbortSignal`, forward +`options.signal` too — this gives SDK-level callers (`agent.shell(cmd, +{ signal })`) true mid-flight cancellation. Connectors whose provider +SDK can't observe a signal should ignore `signal`: Flue's +`createSandboxSessionEnv` wrapper performs pre/post `signal.aborted` +checks for you, so post-completion abort still surfaces correctly without +any work in the connector. Do not attempt to fake mid-flight cancellation +with `Promise.race` against the signal — the underlying remote process +will keep running, which surprises users. + +If `stderr` is not separately surfaced, return `''` for it; do the same +for `exitCode` if unavailable, defaulting to `0` only when the call +clearly succeeded. --- diff --git a/packages/sdk/src/agent.ts b/packages/sdk/src/agent.ts index 2346549..fac666a 100644 --- a/packages/sdk/src/agent.ts +++ b/packages/sdk/src/agent.ts @@ -216,8 +216,61 @@ function createBashTool(env: SessionEnv): AgentTool { parameters: BashParams, async execute(_toolCallId: string, params: Static, signal?: AbortSignal) { throwIfAborted(signal); - const result = await env.exec(params.command, { timeout: params.timeout, signal }); - return formatBashResult(result, params.command); + + // Two layers cooperate to enforce `params.timeout`: + // + // 1. Pass `timeout` to env.exec as a hint. Sandbox connectors + // forward it to their provider's native timeout option + // (E2B `timeoutMs`, Daytona `timeout`, etc.) so signal- + // blind providers still observe the deadline with full + // fidelity. Bash factories translate it into a signal + // internally. + // 2. Compose a local AbortSignal.timeout into `signal` as a + // backstop. Connectors that ignore both fields will at + // least see the merged signal aborted on the way out. + // + // On timeout we return a 124-shaped ShellResult so the model + // can recover. On host abort we rethrow so the outer call + // cancels. This timeout-as-recoverable-result behavior lives + // here in the LLM-facing tool, not in SessionEnv/SandboxApi: + // SDK callers express timeouts via AbortSignal.timeout(...) and + // accept abort semantics; the model can only emit JSON, so it + // needs `params.timeout` and a recoverable shape on timeout. + const timeoutSignal = + typeof params.timeout === 'number' ? AbortSignal.timeout(params.timeout * 1000) : undefined; + const execSignal = + signal && timeoutSignal + ? AbortSignal.any([signal, timeoutSignal]) + : (signal ?? timeoutSignal); + + const timedOut = () => + formatBashResult( + { + stdout: '', + stderr: `[flue] Command timed out after ${params.timeout} seconds.`, + exitCode: 124, + }, + params.command, + ); + try { + const result = await env.exec(params.command, { + timeout: params.timeout, + signal: execSignal, + }); + // Some connectors don't observe the signal mid-flight and + // just return whatever the remote produced. If the timeout + // fired during that window and the host signal didn't, + // surface it as a recoverable timeout instead of a stale + // success. + if (timeoutSignal?.aborted && !signal?.aborted) return timedOut(); + return formatBashResult(result, params.command); + } catch (err) { + // Same rule on the throwing path: timeout-only → recoverable + // 124-shape; host signal involved → rethrow so the caller's + // cancellation surfaces as an AbortError. + if (timeoutSignal?.aborted && !signal?.aborted) return timedOut(); + throw err; + } }, }; } diff --git a/packages/sdk/src/cloudflare/cf-sandbox.ts b/packages/sdk/src/cloudflare/cf-sandbox.ts index 63956f0..649b888 100644 --- a/packages/sdk/src/cloudflare/cf-sandbox.ts +++ b/packages/sdk/src/cloudflare/cf-sandbox.ts @@ -98,8 +98,9 @@ export async function cfSandboxToSessionEnv( signal?: AbortSignal; }, ): Promise<{ stdout: string; stderr: string; exitCode: number }> { - // The Cloudflare sandbox API has no signal param, so we can - // only check before and after the remote call. + // The Cloudflare sandbox API has no signal param, so we rely on + // `timeout` for deadline enforcement and only observe `signal` + // before and after the remote call. const externalSignal = execOpts?.signal; if (externalSignal?.aborted) throw abortErrorFor(externalSignal); diff --git a/packages/sdk/src/sandbox.ts b/packages/sdk/src/sandbox.ts index 1325cb8..db8518d 100644 --- a/packages/sdk/src/sandbox.ts +++ b/packages/sdk/src/sandbox.ts @@ -81,43 +81,27 @@ function createBashSessionEnv( command: string, options?: { cwd?: string; env?: Record; signal?: AbortSignal }, ) => Promise; - const timeout = opts?.timeout; - const externalSignal = opts?.signal; - // Track the timeout signal separately so we can tell which - // source aborted after exec returns. - let timeoutSignal: AbortSignal | undefined; - let timer: ReturnType | undefined; - if (typeof timeout === 'number') { - const ctrl = new AbortController(); - timeoutSignal = ctrl.signal; - timer = setTimeout(() => ctrl.abort(), timeout * 1000); - } + // Just-bash has no native timeout option. Translate `timeout` + // into an AbortSignal and compose with the caller's signal so + // bash factories observe deadlines with the same fidelity as + // signal-aware sandbox connectors. + const timeoutSignal = + typeof opts?.timeout === 'number' + ? AbortSignal.timeout(opts.timeout * 1000) + : undefined; const mergedSignal = - externalSignal && timeoutSignal - ? AbortSignal.any([externalSignal, timeoutSignal]) - : (externalSignal ?? timeoutSignal); + opts?.signal && timeoutSignal + ? AbortSignal.any([opts.signal, timeoutSignal]) + : (opts?.signal ?? timeoutSignal); - try { - const result = await exec.call( - bash, - cmd, - opts ? { cwd: opts.cwd, env: opts.env, signal: mergedSignal } : undefined, - ); - // External signal throws (caller wants cancellation); numeric - // timeout returns a ShellResult so the LLM bash tool sees a - // recoverable value. - if (externalSignal?.aborted) throw abortErrorFor(externalSignal); - if (timeoutSignal?.aborted) { - return { - ...result, - stderr: result.stderr || `[flue] Command timed out after ${timeout} seconds.`, - }; - } - return result; - } finally { - if (timer) clearTimeout(timer); - } + const result = await exec.call( + bash, + cmd, + opts ? { cwd: opts.cwd, env: opts.env, signal: mergedSignal } : undefined, + ); + if (opts?.signal?.aborted) throw abortErrorFor(opts.signal); + return result; }, scope: (options) => createScope(options?.commands ?? []), readFile: (p) => fs.readFile(resolve(p)), @@ -171,7 +155,25 @@ function assertBashLike(value: unknown): asserts value is BashLike { } } -/** Interface that remote sandbox providers must implement. */ +/** + * Interface that remote sandbox providers must implement. + * + * `exec()` cancellation is expressed two ways. Connectors should honor at + * least one — preferably `timeout`, since most provider SDKs expose a + * native timeout option but few support mid-flight cancellation: + * + * - `timeout?: number` (seconds): the **primary** cancellation contract. + * Forward to the provider's native timeout option (E2B `timeoutMs`, + * Daytona `timeout`, Modal `timeout`, etc.). Required for parity with + * the LLM bash tool, which always passes a `timeout` hint when the + * model requests one. + * - `signal?: AbortSignal` (optional): for connectors whose SDK supports + * mid-flight cancellation (Mirage's executor, in-process bash). Lets + * SDK callers do ad-hoc `abort()`. Connectors that can't honor it + * should ignore it; the deadline is still enforced via `timeout`. + * + * Connectors that support both should observe whichever fires first. + */ export interface SandboxApi { readFile(path: string): Promise; readFileBuffer(path: string): Promise; @@ -210,12 +212,24 @@ export function createSandboxSessionEnv(api: SandboxApi, cwd: string): SessionEn signal?: AbortSignal; }, ): Promise { - return api.exec(command, { + // Pre/post abort checks here — not in every connector. Most + // provider SDKs (E2B, Daytona, Modal, Boxd, etc.) don't accept + // an AbortSignal, so a caller that aborts during a long-running + // remote command would otherwise see the call return + // successfully and the abort silently dropped. Centralizing the + // check means connectors only need to wire `signal` into their + // SDK when one supports it (Mirage, Vercel); the rest get + // correct abort semantics for free. + const signal = options?.signal; + if (signal?.aborted) throw abortErrorFor(signal); + const result = await api.exec(command, { cwd: options?.cwd ?? cwd, env: options?.env, timeout: options?.timeout, - signal: options?.signal, + signal, }); + if (signal?.aborted) throw abortErrorFor(signal); + return result; }, async readFile(path: string): Promise { diff --git a/packages/sdk/src/types.ts b/packages/sdk/src/types.ts index e416d56..9792fc1 100644 --- a/packages/sdk/src/types.ts +++ b/packages/sdk/src/types.ts @@ -92,13 +92,24 @@ export interface SessionEnv { options?: { cwd?: string; env?: Record; + /** + * Wall-clock deadline hint in seconds. Forwarded to the underlying + * sandbox connector's native timeout option (E2B `timeoutMs`, + * Daytona `timeout`, etc.) so signal-blind providers still observe + * the deadline with full fidelity. + * + * Independent of `signal`. Callers that have a deadline AND want + * mid-flight cancellation should pass both: `timeout` for + * provider-native enforcement, `signal` for ad-hoc abort. The + * bash tool does this when the model emits a `timeout` parameter. + */ timeout?: number; /** * Cancel the in-flight command. Aborting rejects with an - * `AbortError`. Composes with `timeout`; whichever fires first - * wins. `timeout` and `signal` differ on purpose: `signal` - * throws, `timeout` returns a `ShellResult` whose stderr - * reports the timeout. + * `AbortError`. Connectors that wrap a signal-aware SDK observe + * this mid-flight; others see it only before/after the remote + * call returns. Use `timeout` for guaranteed deadline enforcement + * on signal-blind connectors. */ signal?: AbortSignal; }, @@ -557,7 +568,7 @@ export interface SandboxFactory { export interface BashLike { exec( command: string, - options?: { cwd?: string; env?: Record }, + options?: { cwd?: string; env?: Record; signal?: AbortSignal }, ): Promise; getCwd(): string; fs: {