diff --git a/.craft.yml b/.craft.yml
index 85a4021..54d78cc 100644
--- a/.craft.yml
+++ b/.craft.yml
@@ -9,6 +9,10 @@ targets:
     id: "@vitest-evals/harness-ai-sdk"
     access: public
     includeNames: /^vitest-evals-harness-ai-sdk-\d.*\.tgz$/
+  - name: npm
+    id: "@vitest-evals/harness-openai-agents"
+    access: public
+    includeNames: /^vitest-evals-harness-openai-agents-\d.*\.tgz$/
   - name: npm
     id: "@vitest-evals/harness-pi-ai"
     access: public
diff --git a/.github/workflows/merge-jobs.yml b/.github/workflows/merge-jobs.yml
index 939df3a..d5932ea 100644
--- a/.github/workflows/merge-jobs.yml
+++ b/.github/workflows/merge-jobs.yml
@@ -66,6 +66,7 @@ jobs:
           mkdir -p artifacts
           pnpm --filter vitest-evals pack --pack-destination artifacts
           pnpm --filter @vitest-evals/harness-ai-sdk pack --pack-destination artifacts
+          pnpm --filter @vitest-evals/harness-openai-agents pack --pack-destination artifacts
           pnpm --filter @vitest-evals/harness-pi-ai pack --pack-destination artifacts
           ls -la artifacts
 
diff --git a/.gitignore b/.gitignore
index ef70c63..2d3b640 100644
--- a/.gitignore
+++ b/.gitignore
@@ -88,6 +88,9 @@ dist
 # Build files
 /dist
 
+# vitest-evals replay recordings
+.vitest-evals/
+
 # Gatsby files
 .cache/
 # Comment in the public line in if your project uses Gatsby and not Next.js
diff --git a/README.md b/README.md
index 6085fa2..cd1eb27 100644
--- a/README.md
+++ b/README.md
@@ -5,9 +5,12 @@ Monorepo for the explicit-run `vitest-evals` shape:
 - `packages/vitest-evals`: core suite API, judges, normalized harness/session
   types, reporter, and legacy compatibility exports
 - `packages/harness-ai-sdk`: `ai-sdk`-focused harness adapter
+- `packages/harness-openai-agents`: `@openai/agents`-focused harness adapter
 - `packages/harness-pi-ai`: `pi-ai`-focused harness adapter with tool replay
 - `apps/demo-pi`: end-to-end Pi Mono demo evals with an app-local refund agent
 - `apps/demo-ai-sdk`: end-to-end AI SDK demo evals with app-local refund tools
+- `apps/demo-openai-agents`: end-to-end OpenAI Agents demo evals with
+  app-local refund tools
 
 ## Workspace Layout
 
@@ -15,9 +18,11 @@ Monorepo for the explicit-run `vitest-evals` shape:
 packages/
   vitest-evals/
   harness-ai-sdk/
+  harness-openai-agents/
   harness-pi-ai/
 apps/
   demo-ai-sdk/
+  demo-openai-agents/
   demo-pi/
 ```
 
@@ -158,8 +163,8 @@ when the judge needs richer run/session data or the suite's configured model
 prompt seam.
 
 Tool replay is available for opt-in tools in the first-party harnesses.
-Configure it globally in Vitest and then mark individual tools with
-`replay: true`:
+Configure the replay mode and directory globally in Vitest, then opt individual
+tools in from the harness with `toolReplay: { toolName: true }`.
 
 ```ts
 import tsconfigPaths from "vite-tsconfig-paths";
@@ -187,5 +192,7 @@ errors on missing recordings. Recordings are stored under
 `.vitest-evals/recordings/<tool-name>/`.
 
 `pnpm evals` fans out to each workspace package or app that exposes an `evals`
-script. The demo apps expect provider keys in `.env` or `.env.local`. The
+script. The shared eval CLI defaults replay to `auto` and writes recordings
+under `.vitest-evals/recordings`, unless those environment variables are
+already set. Demo apps expect provider keys in `.env` or `.env.local`. The
 intentional failing examples remain under the `evals:fail` scripts.
diff --git a/apps/demo-ai-sdk/evals/shared.ts b/apps/demo-ai-sdk/evals/shared.ts
index 9dde534..353bc28 100644
--- a/apps/demo-ai-sdk/evals/shared.ts
+++ b/apps/demo-ai-sdk/evals/shared.ts
@@ -91,7 +91,6 @@ async function createRefund({
 const refundTools = {
   lookupInvoice: {
     description: "Look up invoice details inside demo billing.",
-    replay: true,
     inputSchema: z.object({
       invoiceId: z
         .string()
@@ -111,6 +110,9 @@ const refundTools = {
 
 export const refundHarness = aiSdkHarness({
   tools: refundTools,
+  toolReplay: {
+    lookupInvoice: true,
+  },
   prompt: (input, options) =>
     generateText({
       model: anthropic("claude-sonnet-4-5"),
diff --git a/apps/demo-openai-agents/README.md b/apps/demo-openai-agents/README.md
new file mode 100644
index 0000000..16b30b5
--- /dev/null
+++ b/apps/demo-openai-agents/README.md
@@ -0,0 +1,34 @@
+# Demo OpenAI Agents App
+
+This app demonstrates an `@openai/agents` harness wired into `vitest-evals`
+through the workspace packages:
+
+- `vitest-evals`
+- `@vitest-evals/harness-openai-agents`
+
+The passing live eval lives in `evals/refund.eval.ts`.
+It demonstrates a real OpenAI Agents `Agent`, `Runner`, local function tools,
+tool replay configured from the harness, and explicit Vitest assertions on
+`run.output` and the normalized session trace.
+
+The intentionally failing examples live in `evals/refund.fail.eval.ts`.
+One fails an automatic harness-backed judge, and one fails explicit assertions
+after the harness completes.
+
+Run them with:
+
+```sh
+pnpm --filter @demo/demo-openai-agents run evals
+pnpm --filter @demo/demo-openai-agents run evals -- -v
+pnpm --filter @demo/demo-openai-agents run evals -- -vv
+pnpm --filter @demo/demo-openai-agents run evals -- -vvv
+pnpm --filter @demo/demo-openai-agents run evals -- -vvvv
+pnpm --filter @demo/demo-openai-agents run evals:verbose
+pnpm --filter @demo/demo-openai-agents run evals:fail
+```
+
+`pnpm --filter @demo/demo-openai-agents run evals` runs only the passing eval.
+Use `pnpm --filter @demo/demo-openai-agents run evals:fail` to run just the
+intentional failures.
+
+Both scripts expect `OPENAI_API_KEY` to be present in `.env` or `.env.local`.
diff --git a/apps/demo-openai-agents/evals/refund.eval.ts b/apps/demo-openai-agents/evals/refund.eval.ts
new file mode 100644
index 0000000..de16be7
--- /dev/null
+++ b/apps/demo-openai-agents/evals/refund.eval.ts
@@ -0,0 +1,47 @@
+import {
+  describeEval,
+  StructuredOutputJudge,
+  ToolCallJudge,
+} from "vitest-evals";
+import { expect } from "vitest";
+import { assertRefundCase, refundHarness } from "./shared";
+import type { RefundCase } from "../src/refundAgent";
+
+const outputJudge = StructuredOutputJudge();
+
+describeEval(
+  "demo openai agents refund agent",
+  {
+    skipIf: () => !process.env.OPENAI_API_KEY,
+    harness: refundHarness,
+    judges: [ToolCallJudge()],
+  },
+  (it) => {
+    it.for<RefundCase>([
+      {
+        name: "approves refundable invoice",
+        input: "Refund invoice inv_123",
+        expectedStatus: "approved",
+        expectedTools: ["lookupInvoice", "createRefund"],
+      },
+      {
+        name: "denies non-refundable invoice",
+        input: "Refund invoice inv_404",
+        expectedStatus: "denied",
+        expectedTools: ["lookupInvoice"],
+      },
+    ])("$name", async ({ input, ...metadata }, { run }) => {
+      const result = await run(input, {
+        metadata,
+      });
+
+      await assertRefundCase(result, metadata);
+      await expect(result).toSatisfyJudge(outputJudge, {
+        metadata,
+        expected: {
+          status: metadata.expectedStatus,
+        },
+      });
+    });
+  },
+);
diff --git a/apps/demo-openai-agents/evals/refund.fail.eval.ts b/apps/demo-openai-agents/evals/refund.fail.eval.ts
new file mode 100644
index 0000000..b7031fe
--- /dev/null
+++ b/apps/demo-openai-agents/evals/refund.fail.eval.ts
@@ -0,0 +1,63 @@
+import { expect } from "vitest";
+import { describeEval, StructuredOutputJudge } from "vitest-evals";
+import { refundHarness } from "./shared";
+import type { RefundCase } from "../src/refundAgent";
+
+type AssertionRefundCase = RefundCase;
+type ScoredRefundCase = RefundCase & {
+  expected: Record<string, unknown>;
+};
+
+describeEval(
+  "demo openai agents refund scorer failing example",
+  {
+    skipIf: () => !process.env.OPENAI_API_KEY,
+    harness: refundHarness,
+    judges: [StructuredOutputJudge()],
+  },
+  (it) => {
+    it.for<ScoredRefundCase>([
+      {
+        name: "judge expects approval for a denied invoice",
+        input: "Refund invoice inv_404",
+        expectedStatus: "denied",
+        expectedTools: ["lookupInvoice"],
+        expected: {
+          status: "approved",
+        },
+      },
+    ])("$name", async ({ input, ...metadata }, { run }) => {
+      await run(input, {
+        metadata,
+      });
+    });
+  },
+);
+
+describeEval(
+  "demo openai agents refund assertion failing example",
+  {
+    skipIf: () => !process.env.OPENAI_API_KEY,
+    harness: refundHarness,
+  },
+  (it) => {
+    it.for<AssertionRefundCase>([
+      {
+        name: "asserts the wrong refund id after approval",
+        input: "Refund invoice inv_123",
+        expectedStatus: "approved",
+        expectedTools: ["lookupInvoice", "createRefund"],
+      },
+    ])("$name", async ({ input, ...metadata }, { run }) => {
+      const result = await run(input, {
+        metadata,
+      });
+
+      expect(result.output).toMatchObject({
+        status: "approved",
+        invoiceId: "inv_123",
+        refundId: "rf_wrong",
+      });
+    });
+  },
+);
diff --git a/apps/demo-openai-agents/evals/shared.ts b/apps/demo-openai-agents/evals/shared.ts
new file mode 100644
index 0000000..3286125
--- /dev/null
+++ b/apps/demo-openai-agents/evals/shared.ts
@@ -0,0 +1,40 @@
+import { openaiAgentsHarness } from "@vitest-evals/harness-openai-agents";
+import { expect } from "vitest";
+import { type HarnessRun, toolCalls } from "vitest-evals";
+import {
+  createRefundAgent,
+  createRefundRunner,
+  parseRefundDecision,
+  promptRefundModel,
+  resolveResultText,
+  type RefundCase,
+} from "../src/refundAgent";
+
+export const refundHarness = openaiAgentsHarness({
+  createAgent: () => createRefundAgent(),
+  createRunner: () => createRefundRunner(),
+  prompt: promptRefundModel,
+  runOptions: {
+    maxTurns: 5,
+  },
+  toolReplay: {
+    lookupInvoice: true,
+  },
+  normalize: {
+    output: ({ result }) => parseRefundDecision(resolveResultText(result)),
+  },
+});
+
+export async function assertRefundCase(
+  run: HarnessRun,
+  expected: Pick<RefundCase, "expectedStatus" | "expectedTools">,
+) {
+  expect(run.output).toMatchObject({
+    status: expected.expectedStatus,
+  });
+  expect(toolCalls(run.session).map((call) => call.name)).toEqual(
+    expected.expectedTools,
+  );
+  expect(run.usage.model).toContain("gpt");
+  expect(run.usage.totalTokens).toBeGreaterThan(0);
+}
diff --git a/apps/demo-openai-agents/package.json b/apps/demo-openai-agents/package.json
new file mode 100644
index 0000000..5cf014b
--- /dev/null
+++ b/apps/demo-openai-agents/package.json
@@ -0,0 +1,16 @@
+{
+  "name": "@demo/demo-openai-agents",
+  "private": true,
+  "version": "0.1.0",
+  "scripts": {
+    "evals": "node ./scripts/run-evals.mjs",
+    "evals:verbose": "node ./scripts/run-evals.mjs -v",
+    "evals:fail": "node ./scripts/run-evals.mjs --fail"
+  },
+  "dependencies": {
+    "@openai/agents": "^0.8.5",
+    "@vitest-evals/harness-openai-agents": "workspace:*",
+    "vitest-evals": "workspace:*",
+    "zod": "^4.3.6"
+  }
+}
diff --git a/apps/demo-openai-agents/scripts/run-evals.mjs b/apps/demo-openai-agents/scripts/run-evals.mjs
new file mode 100644
index 0000000..9ab807c
--- /dev/null
+++ b/apps/demo-openai-agents/scripts/run-evals.mjs
@@ -0,0 +1,50 @@
+import { spawnSync } from "node:child_process";
+import { dirname, resolve } from "node:path";
+import { fileURLToPath } from "node:url";
+import { createEvalEnv, parseEvalCliArgs } from "../../../scripts/eval-cli.mjs";
+
+const WORKSPACE_ROOT = resolve(
+  dirname(fileURLToPath(import.meta.url)),
+  "../../..",
+);
+
+const { failMode, forwardedArgs, toolDetailLevel } = parseEvalCliArgs(
+  process.argv.slice(2),
+);
+const env = createEvalEnv(process.env, toolDetailLevel);
+
+const explicitTargetIndex = forwardedArgs.findIndex(
+  (arg) => !arg.startsWith("-"),
+);
+const target =
+  explicitTargetIndex >= 0
+    ? forwardedArgs.splice(explicitTargetIndex, 1)[0]
+    : failMode
+      ? "apps/demo-openai-agents/evals/refund.fail.eval.ts"
+      : "apps/demo-openai-agents/evals/refund.eval.ts";
+
+const command = [
+  "exec",
+  "dotenv",
+  "-e",
+  ".env",
+  "-e",
+  ".env.local",
+  "--",
+  "vitest",
+  "run",
+  target,
+  "--config",
+  "vitest.config.ts",
+  "--reporter",
+  "packages/vitest-evals/src/reporter.ts",
+  ...forwardedArgs,
+];
+
+const result = spawnSync("pnpm", command, {
+  cwd: WORKSPACE_ROOT,
+  env,
+  stdio: "inherit",
+});
+
+process.exit(result.status ?? 1);
diff --git a/apps/demo-openai-agents/src/refundAgent.test.ts b/apps/demo-openai-agents/src/refundAgent.test.ts
new file mode 100644
index 0000000..b7842f5
--- /dev/null
+++ b/apps/demo-openai-agents/src/refundAgent.test.ts
@@ -0,0 +1,109 @@
+import { describe, expect, test } from "vitest";
+import {
+  createRefund,
+  createRefundAgent,
+  createRefundRunner,
+  lookupInvoice,
+  parseRefundDecision,
+  resolveResultText,
+} from "./refundAgent";
+
+describe("parseRefundDecision", () => {
+  test("parses plain approved JSON", () => {
+    expect(
+      parseRefundDecision(
+        '{"status":"approved","invoiceId":"inv_123","refundId":"rf_inv_123","amount":4200}',
+      ),
+    ).toEqual({
+      status: "approved",
+      invoiceId: "inv_123",
+      refundId: "rf_inv_123",
+      amount: 4200,
+    });
+  });
+
+  test("parses fenced denied JSON", () => {
+    expect(
+      parseRefundDecision(
+        [
+          "```json",
+          '{"status":"denied","invoiceId":"inv_404","reason":"not refundable"}',
+          "```",
+        ].join("\n"),
+      ),
+    ).toEqual({
+      status: "denied",
+      invoiceId: "inv_404",
+      reason: "not refundable",
+    });
+  });
+
+  test("parses JSON embedded in surrounding text", () => {
+    expect(
+      parseRefundDecision(
+        [
+          "Here is the decision:",
+          '{"status":"denied","invoiceId":"inv_404","reason":"not refundable"}',
+        ].join("\n"),
+      ),
+    ).toEqual({
+      status: "denied",
+      invoiceId: "inv_404",
+      reason: "not refundable",
+    });
+  });
+
+  test("parses embedded JSON with braces inside string values", () => {
+    expect(
+      parseRefundDecision(
+        [
+          "Decision payload:",
+          '{"status":"denied","invoiceId":"inv_404","reason":"saw literal {brace} text"}',
+          "Thanks.",
+        ].join("\n"),
+      ),
+    ).toEqual({
+      status: "denied",
+      invoiceId: "inv_404",
+      reason: "saw literal {brace} text",
+    });
+  });
+});
+
+test("demo billing tools are deterministic", async () => {
+  await expect(lookupInvoice({ invoiceId: "inv_123" })).resolves.toEqual({
+    invoiceId: "inv_123",
+    amount: 4200,
+    refundable: true,
+    customer: "Acme Co",
+  });
+  await expect(
+    createRefund({ invoiceId: "inv_123", amount: 4200 }),
+  ).resolves.toEqual({
+    refundId: "rf_inv_123",
+    amount: 4200,
+    status: "submitted",
+  });
+});
+
+test("createRefundAgent wires OpenAI Agents tools", () => {
+  const agent = createRefundAgent();
+
+  expect(agent.name).toBe("demo_refund_agent");
+  expect(agent.tools.map((tool) => tool.name)).toEqual([
+    "lookupInvoice",
+    "createRefund",
+  ]);
+});
+
+test("createRefundRunner disables tracing for demo eval runs", () => {
+  expect(createRefundRunner().config.tracingDisabled).toBe(true);
+});
+
+test("resolveResultText reads OpenAI Agents final output", () => {
+  expect(
+    resolveResultText({
+      finalOutput: '{"status":"denied","invoiceId":"inv_404","reason":"no"}',
+    }),
+  ).toBe('{"status":"denied","invoiceId":"inv_404","reason":"no"}');
+});
diff --git a/apps/demo-openai-agents/src/refundAgent.ts b/apps/demo-openai-agents/src/refundAgent.ts
new file mode 100644
index 0000000..fa36976
--- /dev/null
+++ b/apps/demo-openai-agents/src/refundAgent.ts
@@ -0,0 +1,301 @@
+import { Agent, Runner, tool } from "@openai/agents";
+import type { HarnessPromptOptions } from "vitest-evals";
+import { z } from "zod";
+
+export type InvoiceRecord = {
+  invoiceId: string;
+  amount: number;
+  refundable: boolean;
+  customer: string;
+};
+
+export type RefundDecision =
+  | {
+      status: "approved";
+      invoiceId: string;
+      refundId: string;
+      amount: number;
+    }
+  | {
+      status: "denied";
+      invoiceId: string;
+      reason: string;
+    };
+
+export type RefundEvalMetadata = {
+  name?: string;
+  expectedStatus: RefundDecision["status"];
+  expectedTools: string[];
+};
+
+export type RefundCase = RefundEvalMetadata & {
+  input: string;
+};
+
+export type LookupInvoiceInput = {
+  invoiceId: string;
+};
+
+export type CreateRefundInput = {
+  invoiceId: string;
+  amount: number;
+};
+
+export const LOOKUP_INVOICE_DESCRIPTION =
+  "Look up invoice details inside demo billing.";
+export const CREATE_REFUND_DESCRIPTION =
+  "Create a refund for a refundable invoice.";
+export const DEFAULT_REFUND_MODEL = "gpt-4.1-mini";
+export const REFUND_SYSTEM_PROMPT = [
+  "You are the demo refund operations agent.",
+  "You must decide whether a refund should be approved for the invoice in the user's request.",
+  "Always call lookupInvoice before making a decision.",
+  "If the invoice is refundable, call createRefund with the full invoice amount.",
+  "If the invoice is not refundable, do not call createRefund.",
+  "Return JSON only and do not wrap it in markdown.",
+  'Approved shape: {"status":"approved","invoiceId":"...","refundId":"...","amount":4200}',
+  'Denied shape: {"status":"denied","invoiceId":"...","reason":"..."}',
+].join("\n");
+
+const INVOICES: Record<string, InvoiceRecord> = {
+  inv_123: {
+    invoiceId: "inv_123",
+    amount: 4200,
+    refundable: true,
+    customer: "Acme Co",
+  },
+  inv_404: {
+    invoiceId: "inv_404",
+    amount: 1700,
+    refundable: false,
+    customer: "Globex",
+  },
+};
+
+/** Looks up a demo invoice record for the OpenAI Agents local function tool. */
+export async function lookupInvoice({
+  invoiceId,
+}: LookupInvoiceInput): Promise<InvoiceRecord> {
+  const invoice = INVOICES[invoiceId];
+  if (!invoice) {
+    throw new Error(`Invoice ${invoiceId} not found`);
+  }
+
+  return invoice;
+}
+
+/** Creates a deterministic demo refund record. */
+export async function createRefund({
+  invoiceId,
+  amount,
+}: CreateRefundInput): Promise<{
+  refundId: string;
+  amount: number;
+  status: "submitted";
+}> {
+  return {
+    refundId: `rf_${invoiceId}`,
+    amount,
+    status: "submitted",
+  };
+}
+
+function createRefundTools() {
+  const lookupInvoiceTool = tool({
+    name: "lookupInvoice",
+    description: LOOKUP_INVOICE_DESCRIPTION,
+    parameters: z.object({
+      invoiceId: z
+        .string()
+        .describe("The invoice id to inspect, such as inv_123."),
+    }),
+    execute: lookupInvoice,
+  });
+
+  const createRefundTool = tool({
+    name: "createRefund",
+    description: CREATE_REFUND_DESCRIPTION,
+    parameters: z.object({
+      invoiceId: z.string().describe("The invoice id that should be refunded."),
+      amount: z.number().describe("The amount to refund in cents."),
+    }),
+    execute: createRefund,
+  });
+
+  return [lookupInvoiceTool, createRefundTool];
+}
+
+/** Creates a fresh OpenAI Agents refund agent for one eval run. */
+export function createRefundAgent(options?: { model?: string }) {
+  return new Agent({
+    name: "demo_refund_agent",
+    instructions: REFUND_SYSTEM_PROMPT,
+    model: options?.model ?? DEFAULT_REFUND_MODEL,
+    modelSettings: {
+      temperature: 0,
+    },
+    tools: createRefundTools(),
+  });
+}
+
+/** Creates the OpenAI Agents runner used by the demo harness. */
+export function createRefundRunner() {
+  return new Runner({
+    tracingDisabled: true,
+    modelSettings: {
+      temperature: 0,
+    },
+  });
+}
+
+/** Uses the same OpenAI Agents stack as a provider-agnostic judge prompt seam. */
+export async function promptRefundModel(
+  input: string,
+  options?: HarnessPromptOptions,
+) {
+  const runner = createRefundRunner();
+  const agent = new Agent({
+    name: "demo_refund_prompt",
+    instructions: options?.system ?? "Return a concise answer.",
+    model: DEFAULT_REFUND_MODEL,
+    modelSettings: {
+      temperature: 0,
+    },
+  });
+  const result = await runner.run(agent, input, {
+    maxTurns: 2,
+  });
+  const outputText = resolveResultText(result);
+
+  if (!outputText) {
+    throw new Error("Prompt model returned an empty response.");
+  }
+
+  return outputText;
+}
+
+/** Parses the demo agent's final JSON payload into a typed refund decision. */
+export function parseRefundDecision(text: string): RefundDecision {
+  const cleaned = stripMarkdownFence(text);
+  const jsonText = extractJsonObjectText(cleaned);
+  const parsed = JSON.parse(jsonText) as Record<string, unknown>;
+
+  if (
+    parsed.status === "approved" &&
+    typeof parsed.invoiceId === "string" &&
+    typeof parsed.refundId === "string" &&
+    typeof parsed.amount === "number"
+  ) {
+    return {
+      status: "approved",
+      invoiceId: parsed.invoiceId,
+      refundId: parsed.refundId,
+      amount: parsed.amount,
+    };
+  }
+
+  if (
+    parsed.status === "denied" &&
+    typeof parsed.invoiceId === "string" &&
+    typeof parsed.reason === "string"
+  ) {
+    return {
+      status: "denied",
+      invoiceId: parsed.invoiceId,
+      reason: parsed.reason,
+    };
+  }
+
+  throw new Error(`Refund agent returned an invalid decision payload: ${text}`);
+}
+
+/** Extracts text from an OpenAI Agents run result for app output mapping. */
+export function resolveResultText(result: unknown) {
+  if (!result || typeof result !== "object") {
+    return typeof result === "string" ? result : "";
+  }
+
+  const finalOutput = (result as { finalOutput?: unknown }).finalOutput;
+  if (typeof finalOutput === "string") {
+    return finalOutput.trim();
+  }
+
+  const output = (result as { output?: unknown }).output;
+  if (typeof output === "string") {
+    return output.trim();
+  }
+
+  return finalOutput === undefined ? "" : JSON.stringify(finalOutput);
+}
+
+function stripMarkdownFence(text: string) {
+  const trimmed = text.trim();
+  if (!trimmed.startsWith("```") || !trimmed.endsWith("```")) {
+    return trimmed;
+  }
+
+  const firstNewline = trimmed.indexOf("\n");
+  if (firstNewline === -1) {
+    return trimmed;
+  }
+
+  const fenceHeader = trimmed.slice(3, firstNewline).trim().toLowerCase();
+  if (fenceHeader !== "" && fenceHeader !== "json") {
+    return trimmed;
+  }
+
+  return trimmed.slice(firstNewline + 1, -3).trim();
+}
+
+function extractJsonObjectText(text: string) {
+  const start = text.indexOf("{");
+  if (start === -1) {
+    return text;
+  }
+
+  let depth = 0;
+  let inString = false;
+  let isEscaped = false;
+
+  for (let index = start; index < text.length; index += 1) {
+    const char = text[index];
+
+    if (inString) {
+      if (isEscaped) {
+        isEscaped = false;
+        continue;
+      }
+
+      if (char === "\\") {
+        isEscaped = true;
+        continue;
+      }
+
+      if (char === '"') {
+        inString = false;
+      }
+      continue;
+    }
+
+    if (char === '"') {
+      inString = true;
+      continue;
+    }
+
+    if (char === "{") {
+      depth += 1;
+      continue;
+    }
+
+    if (char !== "}") {
+      continue;
+    }
+
+    depth -= 1;
+    if (depth === 0) {
+      return text.slice(start, index + 1);
+    }
+  }
+
+  return text;
+}
diff --git a/apps/demo-openai-agents/tsconfig.json b/apps/demo-openai-agents/tsconfig.json
new file mode 100644
index 0000000..5f62fc7
--- /dev/null
+++ b/apps/demo-openai-agents/tsconfig.json
@@ -0,0 +1,4 @@
+{
+  "extends": "../../tsconfig.base.json",
+  "include": ["**/*.ts"]
+}
diff --git a/apps/demo-pi/evals/refund.eval.ts b/apps/demo-pi/evals/refund.eval.ts
index 0f97ba6..319e2cb 100644
--- a/apps/demo-pi/evals/refund.eval.ts
+++ b/apps/demo-pi/evals/refund.eval.ts
@@ -20,6 +20,9 @@ describeEval(
     skipIf: () => !process.env.ANTHROPIC_API_KEY,
     harness: piAiHarness({
       createAgent: () => createRefundAgent(),
+      toolReplay: {
+        lookupInvoice: true,
+      },
       prompt: promptRefundModel,
     }),
     judges: [ToolCallJudge()],
diff --git a/apps/demo-pi/evals/refund.fail.eval.ts b/apps/demo-pi/evals/refund.fail.eval.ts
index a326949..01f986b 100644
--- a/apps/demo-pi/evals/refund.fail.eval.ts
+++ b/apps/demo-pi/evals/refund.fail.eval.ts
@@ -14,6 +14,9 @@ type ScoredRefundCase = RefundCase & {
 
 const harness = piAiHarness({
   createAgent: () => createRefundAgent(),
+  toolReplay: {
+    lookupInvoice: true,
+  },
   prompt: promptRefundModel,
 });
 
diff --git a/apps/demo-pi/src/refundAgent.ts b/apps/demo-pi/src/refundAgent.ts
index 1814c5a..29be95d 100644
--- a/apps/demo-pi/src/refundAgent.ts
+++ b/apps/demo-pi/src/refundAgent.ts
@@ -108,7 +108,6 @@ export async function createRefund({
 const refundAgentTools = {
   lookupInvoice: {
     description: LOOKUP_INVOICE_DESCRIPTION,
-    replay: true,
     execute: lookupInvoice,
   },
   createRefund: {
diff --git a/docs/architecture.md b/docs/architecture.md
index e91fad8..ba2aa07 100644
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -27,9 +27,11 @@ packages/
       judges/
       legacy/
   harness-ai-sdk/
+  harness-openai-agents/
   harness-pi-ai/
 apps/
   demo-ai-sdk/
+  demo-openai-agents/
   demo-pi/
 ```
 
@@ -127,6 +129,10 @@ the judge in matcher options.
 
 ## First-Party Harness Packages
 
+Replay/VCR policy is configured at the harness boundary with `toolReplay` and
+global Vitest environment settings. Tool definitions should describe tool
+behavior only.
+
 ### `@vitest-evals/harness-ai-sdk`
 
 Adapts `ai-sdk`-style results into the normalized run/session shape. It can
@@ -134,6 +140,14 @@ derive output, usage, messages, tool calls, and errors from common AI SDK
 result objects, while still allowing custom `run`, `session`, `output`, and
 `usage` overrides.
 
+### `@vitest-evals/harness-openai-agents`
+
+Adapts `@openai/agents` `Runner.run(agent, input, options)` workflows into the
+normalized run/session shape. It accepts an existing agent or `createAgent()`
+factory, supports custom app entrypoints, normalizes `RunResult` output,
+messages, usage, tool calls, tool results, errors, trace metadata, and records
+replay metadata for opt-in local function tools.
+
 ### `@vitest-evals/harness-pi-ai`
 
 Adapts `pi-ai` style agents into the same normalized shape. It also owns the
@@ -149,9 +163,10 @@ surface it.
 
 ## Demo Apps
 
-`apps/demo-pi` and `apps/demo-ai-sdk` own their demo fixtures locally. They stay
-under `apps/` because they are product demos, while `packages/` is reserved for
-real package surfaces that can be published or consumed independently.
+`apps/demo-pi`, `apps/demo-ai-sdk`, and `apps/demo-openai-agents` own their demo
+fixtures locally. They stay under `apps/` because they are product demos, while
+`packages/` is reserved for real package surfaces that can be published or
+consumed independently.
 
 ## Extension Points
 
diff --git a/docs/development-guide.md b/docs/development-guide.md
index c94fc65..c5e4770 100644
--- a/docs/development-guide.md
+++ b/docs/development-guide.md
@@ -65,6 +65,14 @@ Owns:
 - adapting AI SDK results into `HarnessRun`
 - AI SDK specific usage/session normalization
 
+### `packages/harness-openai-agents`
+
+Owns:
+
+- adapting OpenAI Agents SDK `Runner.run(...)` results into `HarnessRun`
+- OpenAI Agents specific `RunResult` and function-tool normalization
+- replay metadata for opt-in local function tools
+
 ### `packages/harness-pi-ai`
 
 Owns:
@@ -75,9 +83,10 @@ Owns:
 
 ## Demo Apps
 
-`apps/demo-pi` and `apps/demo-ai-sdk` own live demo eval coverage and any
-app-local refund fixtures they need. Keep them realistic; they are part of the
-product story, not just smoke tests. `packages/` is for real package surfaces.
+`apps/demo-pi`, `apps/demo-ai-sdk`, and `apps/demo-openai-agents` own live demo
+eval coverage and any app-local refund fixtures they need. Keep them realistic;
+they are part of the product story, not just smoke tests. `packages/` is for
+real package surfaces.
 
 ## Adding a New Judge
 
diff --git a/docs/testing.md b/docs/testing.md
index 708494a..e47bc5b 100644
--- a/docs/testing.md
+++ b/docs/testing.md
@@ -23,12 +23,18 @@ packages/vitest-evals/src/
     scorers/*.test.ts
 packages/harness-ai-sdk/src/
   index.test.ts
+packages/harness-openai-agents/src/
+  index.test.ts
 packages/harness-pi-ai/src/
   index.test.ts
 apps/demo-pi/src/
   refundAgent.test.ts
+apps/demo-openai-agents/src/
+  refundAgent.test.ts
 apps/demo-ai-sdk/evals/
   *.eval.ts
+apps/demo-openai-agents/evals/
+  *.eval.ts
 apps/demo-pi/evals/
   *.eval.ts
 ```
diff --git a/package.json b/package.json
index dd7b740..a58c884 100644
--- a/package.json
+++ b/package.json
@@ -13,9 +13,9 @@
     "prepare": "simple-git-hooks",
     "release:check": "node ./scripts/check-release-config.mjs",
     "typecheck": "tsc --noEmit",
-    "test": "dotenv -e .env -e .env.local -- vitest run packages apps --config=./vitest.config.ts --reporter=./packages/vitest-evals/src/reporter.ts",
-    "test:watch": "dotenv -e .env -e .env.local -- vitest packages apps --config=./vitest.config.ts --reporter=./packages/vitest-evals/src/reporter.ts",
-    "test:ci": "dotenv -e .env -e .env.local -- vitest run packages apps --config=./vitest.config.ts --coverage --reporter=./packages/vitest-evals/src/reporter.ts --reporter=junit --outputFile=tests.junit.xml"
+    "test": "dotenv -e .env -e .env.local -- vitest run packages apps scripts --config=./vitest.config.ts --reporter=./packages/vitest-evals/src/reporter.ts",
+    "test:watch": "dotenv -e .env -e .env.local -- vitest packages apps scripts --config=./vitest.config.ts --reporter=./packages/vitest-evals/src/reporter.ts",
+    "test:ci": "dotenv -e .env -e .env.local -- vitest run packages apps scripts --config=./vitest.config.ts --coverage --reporter=./packages/vitest-evals/src/reporter.ts --reporter=junit --outputFile=tests.junit.xml"
   },
   "repository": {
     "type": "git",
diff --git a/packages/harness-ai-sdk/README.md b/packages/harness-ai-sdk/README.md
index e2461a4..0c79b08 100644
--- a/packages/harness-ai-sdk/README.md
+++ b/packages/harness-ai-sdk/README.md
@@ -11,13 +11,14 @@ npm install -D ai vitest-evals @vitest-evals/harness-ai-sdk
 ## Usage
 
 ```ts
+import { expect } from "vitest";
 import { generateText, stepCountIs } from "ai";
 import { openai } from "@ai-sdk/openai";
 import { aiSdkHarness } from "@vitest-evals/harness-ai-sdk";
+import { describeEval, toolCalls } from "vitest-evals";
 
 const tools = {
   lookupInvoice: {
-    replay: true,
     inputSchema: lookupInvoiceSchema,
     execute: lookupInvoice,
   },
@@ -25,6 +26,9 @@ const tools = {
 
 const harness = aiSdkHarness({
   tools,
+  toolReplay: {
+    lookupInvoice: true,
+  },
   prompt: (input, options) =>
     generateText({
       model: openai("gpt-4o-mini"),
@@ -39,6 +43,19 @@ const harness = aiSdkHarness({
       stopWhen: stepCountIs(5),
     }),
 });
+
+describeEval("refund agent", { harness }, (it) => {
+  it("approves a refundable invoice", async ({ run }) => {
+    const result = await run("Refund invoice inv_123");
+
+    expect(result.output).toMatchObject({
+      status: "approved",
+    });
+    expect(toolCalls(result.session).map((call) => call.name)).toContain(
+      "lookupInvoice",
+    );
+  });
+});
 ```
 
 If your existing AI SDK app exposes its own entrypoint, wire that in directly:
@@ -61,7 +78,7 @@ The adapter infers:
 - usage diagnostics from `totalUsage` / `usage`
 - `run.output` from common AI SDK result fields such as `output`, `object`, and
   `text`
-- replay/cassette metadata for opt-in tools when they set `replay: true`
+- replay/cassette metadata for local tools configured with `toolReplay`
 
 See the workspace demo app in `apps/demo-ai-sdk` and the RFC notes in
 `docs/harness-first-rfc.md`.
diff --git a/packages/harness-ai-sdk/src/index.test.ts b/packages/harness-ai-sdk/src/index.test.ts
index 5fcdeea..aaba2d4 100644
--- a/packages/harness-ai-sdk/src/index.test.ts
+++ b/packages/harness-ai-sdk/src/index.test.ts
@@ -403,7 +403,6 @@ test("default agent run receives wrapped runtime tools", async () => {
     }),
     tools: {
       lookupInvoice: {
-        replay: true,
         inputSchema: z.object({
           invoiceId: z.string(),
         }),
@@ -1387,9 +1386,11 @@ test("records and replays opt-in tools in auto mode", async () => {
 
   const replayHarness = aiSdkHarness({
     prompt: judgePrompt,
+    toolReplay: {
+      lookupInvoice: true,
+    },
     tools: {
       lookupInvoice: {
-        replay: true,
         inputSchema: z.object({
           invoiceId: z.string(),
         }),
@@ -1521,6 +1522,53 @@ test("records and replays opt-in tools in auto mode", async () => {
   });
 });
 
+test("does not opt into replay from tool definitions", async () => {
+  replayDir = mkdtempSync(join(process.cwd(), ".tmp-ai-sdk-replay-"));
+  vi.stubEnv("VITEST_EVALS_REPLAY_MODE", "auto");
+  vi.stubEnv("VITEST_EVALS_REPLAY_DIR", replayDir);
+
+  const execute = vi.fn(async ({ invoiceId }: { invoiceId: string }) => ({
+    invoiceId,
+    refundable: true,
+  }));
+
+  const harness = aiSdkHarness({
+    prompt: judgePrompt,
+    tools: {
+      lookupInvoice: {
+        replay: true,
+        inputSchema: z.object({
+          invoiceId: z.string(),
+        }),
+        execute,
+      },
+    } as unknown as AiSdkToolset<string, DemoMetadata>,
+    task: async ({ runtime }) => {
+      await runtime.tools.lookupInvoice.execute?.(
+        {
+          invoiceId: "inv_123",
+        },
+        {
+          toolCallId: "call_lookup",
+          messages: [],
+        } satisfies ToolExecutionOptions,
+      );
+
+      return {
+        text: '{"status":"approved"}',
+      };
+    },
+  });
+
+  const run = await harness.run(
+    "Refund invoice inv_123",
+    createHarnessContext({}),
+  );
+
+  expect(execute).toHaveBeenCalledTimes(1);
+  expect(toolCalls(run.session)[0].metadata?.replay).toBeUndefined();
+});
+
 test("rejects async iterable replay outputs after awaiting execute", async () => {
   replayDir = mkdtempSync(join(process.cwd(), ".tmp-ai-sdk-replay-"));
   vi.stubEnv("VITEST_EVALS_REPLAY_MODE", "auto");
@@ -1532,9 +1580,11 @@ test("rejects async iterable replay outputs after awaiting execute", async () =>
 
   const replayHarness = aiSdkHarness({
     prompt: judgePrompt,
+    toolReplay: {
+      streamRefund: true,
+    },
     tools: {
       streamRefund: {
-        replay: true,
         inputSchema: z.object({
           invoiceId: z.string(),
         }),
@@ -1578,9 +1628,11 @@ test("errors when strict mode is missing a recording", async () => {
 
   const replayHarness = aiSdkHarness({
     prompt: judgePrompt,
+    toolReplay: {
+      lookupInvoice: true,
+    },
     tools: {
       lookupInvoice: {
-        replay: true,
         inputSchema: z.object({
           invoiceId: z.string(),
         }),
diff --git a/packages/harness-ai-sdk/src/index.ts b/packages/harness-ai-sdk/src/index.ts
index 1d812cb..16f8a6d 100644
--- a/packages/harness-ai-sdk/src/index.ts
+++ b/packages/harness-ai-sdk/src/index.ts
@@ -109,11 +109,19 @@ export type AiSdkToolReplayConfig<
 export type AiSdkToolDefinition<
   TArgs extends JsonValue = JsonValue,
   TResult extends JsonValue = JsonValue,
+  _TInput = string,
+  _TMetadata extends HarnessMetadata = HarnessMetadata,
+> = Tool<TArgs, TResult>;
+
+export type AiSdkToolReplayPolicy<
   TInput = string,
   TMetadata extends HarnessMetadata = HarnessMetadata,
-> = Tool<TArgs, TResult> & {
-  replay?: boolean | AiSdkToolReplayConfig<TArgs, TResult, TInput, TMetadata>;
-};
+> = boolean | AiSdkToolReplayConfig<JsonValue, JsonValue, TInput, TMetadata>;
+
+export type AiSdkToolReplayPolicies<
+  TInput = string,
+  TMetadata extends HarnessMetadata = HarnessMetadata,
+> = Record<string, AiSdkToolReplayPolicy<TInput, TMetadata>>;
 
 export type AiSdkToolset<
   TInput = string,
@@ -199,6 +207,7 @@ interface AiSdkHarnessBaseOptions<
   >,
 > {
   tools?: TTools;
+  toolReplay?: AiSdkToolReplayPolicies<TInput, TMetadata>;
   session?: (
     args: AiSdkHarnessResultArgs<TAgent, TInput, TMetadata, TResult, TTools>,
   ) => MaybePromise<NormalizedSession>;
@@ -295,6 +304,7 @@ async function runAiSdkHarness<
     input,
     context,
     tools: options.tools,
+    toolReplay: options.toolReplay,
     replayMetadataByToolCallId,
     runtimeToolCalls,
   });
@@ -534,18 +544,22 @@ function createToolset<
   input,
   context,
   tools,
+  toolReplay,
   replayMetadataByToolCallId,
   runtimeToolCalls,
 }: {
   input: TInput;
   context: HarnessContext<TMetadata>;
   tools: TTools | undefined;
+  toolReplay: AiSdkToolReplayPolicies<TInput, TMetadata> | undefined;
   replayMetadataByToolCallId: Map<string, ReplayMetadata>;
   runtimeToolCalls: ToolCallRecord[];
 }) {
   return Object.fromEntries(
     Object.entries(tools ?? {}).map(([toolName, tool]) => {
-      if (tool.replay && !tool.execute) {
+      const replay = toolReplay?.[toolName];
+
+      if (replay && !tool.execute) {
         throw new Error(
           `Tool replay requires execute() for ${toolName}. Provider-executed tools cannot be recorded automatically.`,
         );
@@ -573,14 +587,14 @@ function createToolset<
           } satisfies AiSdkToolContext<TInput, TMetadata>;
 
           try {
-            const executionResult = tool.replay
+            const executionResult = replay
               ? await executeToolWithReplay({
                   toolName,
                   toolInput,
                   execute,
                   execution,
                   context: replayContext,
-                  replay: tool.replay,
+                  replay,
                 })
               : {
                   result: await execute(toolInput, execution),
@@ -659,14 +673,15 @@ async function executeToolWithReplay<
   execute: NonNullable<TTool["execute"]>;
   execution: ToolExecutionOptions;
   context: AiSdkToolContext<TInput, TMetadata>;
-  replay: NonNullable<TTool["replay"]>;
+  replay: AiSdkToolReplayPolicy<TInput, TMetadata>;
 }) {
-  const replayInput = toReplayJsonValue(
-    toolInput,
-    `${toolName} tool input`,
-  ) as InferToolInput<TTool> & JsonValue;
+  const replayInput = toReplayJsonValue(toolInput, `${toolName} tool input`);
 
-  return executeWithReplay({
+  return executeWithReplay<
+    JsonValue,
+    JsonValue,
+    AiSdkToolContext<TInput, TMetadata>
+  >({
     toolName,
     args: replayInput,
     context,
@@ -682,10 +697,7 @@ async function executeToolWithReplay<
         );
       }
 
-      return toReplayJsonValue(
-        output,
-        `${toolName} tool output`,
-      ) as InferToolOutput<TTool> & JsonValue;
+      return toReplayJsonValue(output, `${toolName} tool output`);
     },
     replay,
   });
diff --git a/packages/harness-openai-agents/README.md b/packages/harness-openai-agents/README.md
new file mode 100644
index 0000000..051dfc3
--- /dev/null
+++ b/packages/harness-openai-agents/README.md
@@ -0,0 +1,126 @@
+# @vitest-evals/harness-openai-agents
+
+`@openai/agents`-focused harness adapter for `vitest-evals`.
+
+## Install
+
+```sh
+npm install -D @openai/agents vitest-evals @vitest-evals/harness-openai-agents
+```
+
+## Usage
+
+```ts
+import { expect } from "vitest";
+import { Runner } from "@openai/agents";
+import { openaiAgentsHarness } from "@vitest-evals/harness-openai-agents";
+import { describeEval, toolCalls } from "vitest-evals";
+
+const harness = openaiAgentsHarness({
+  createAgent: () => createClassifierAgent(),
+  createRunner: () =>
+    new Runner({
+      modelProvider,
+      tracingDisabled: true,
+    }),
+  prompt: sharedJudgePrompt,
+});
+
+describeEval("classifier agent", { harness }, (it) => {
+  it("classifies a bottle", async ({ run }) => {
+    const result = await run("Classify bottle bt_123");
+
+    expect(result.output).toMatchObject({
+      label: "bourbon",
+    });
+    expect(toolCalls(result.session).map((call) => call.name)).toContain(
+      "lookup_bottle",
+    );
+  });
+});
+```
+
+The adapter calls `runner.run(agent, input, options)` by default. It forwards
+the eval metadata, artifact helpers, and abort signal through the run options,
+then normalizes the `RunResult` into the standard `HarnessRun` shape.
+
+If your application has a custom entrypoint, wire it directly:
+
+```ts
+const harness = openaiAgentsHarness({
+  createAgent: () => createClassifierAgent(),
+  createRunner: () => new Runner({ modelProvider, tracingDisabled: true }),
+  prompt: sharedJudgePrompt,
+  run: ({ agent, input, runner, runOptions }) =>
+    runBottleClassifier({ agent, runner, input, runOptions }),
+  normalize: {
+    output: ({ result }) => result.classification,
+    outputText: ({ output }) => JSON.stringify(output),
+  },
+});
+```
+
+The required `prompt` callback is passed to harness-backed judges as
+`JudgeContext.harness.prompt`, so rubric or factuality judges can share the
+same provider/model setup as the suite harness.
+
+The adapter provides:
+
+- native `Runner.run(agent, input, options)` execution
+- support for existing agents or per-test `createAgent()` factories
+- a `run` escape hatch for app-specific entrypoints
+- normalized assistant output, messages, tool calls, tool results, usage,
+  timings, errors, and replay-friendly metadata
+- app-facing `run.output` plus a deliberate `session.outputText` for judges
+- opt-in replay metadata for local function tools configured with `toolReplay`
+
+## Tool Replay
+
+Replay is configured globally in Vitest via environment variables:
+
+```ts
+import { defineConfig } from "vitest/config";
+
+export default defineConfig({
+  test: {
+    env: {
+      VITEST_EVALS_REPLAY_MODE: "auto",
+      VITEST_EVALS_REPLAY_DIR: ".vitest-evals/recordings",
+    },
+  },
+});
+```
+
+Then opt local function tools into replay by name:
+
+```ts
+import { Agent, Runner, tool } from "@openai/agents";
+import { openaiAgentsHarness } from "@vitest-evals/harness-openai-agents";
+
+const lookupBottle = tool({
+  name: "lookup_bottle",
+  description: "Look up bottle facts.",
+  parameters: lookupBottleSchema,
+  async execute({ bottleId }) {
+    return fetchBottleFacts(bottleId);
+  },
+});
+
+const harness = openaiAgentsHarness({
+  createAgent: () => new Agent({ name: "classifier", tools: [lookupBottle] }),
+  createRunner: () => new Runner({ modelProvider, tracingDisabled: true }),
+  prompt: sharedJudgePrompt,
+  toolReplay: {
+    lookup_bottle: true,
+  },
+});
+```
+
+`toolReplay` is keyed by the OpenAI tool name. Values can be `true` or the
+standard replay config object with `key`, `sanitize`, and `version` callbacks.
+
+Hosted OpenAI tools are still normalized from the SDK run items when they are
+present in `newItems`, but replay recording is only automatic for local
+function tools that execute in the application process.
+
+See the workspace demo app in `apps/demo-openai-agents`.
diff --git a/packages/harness-openai-agents/package.json b/packages/harness-openai-agents/package.json
new file mode 100644
index 0000000..8d13d23
--- /dev/null
+++ b/packages/harness-openai-agents/package.json
@@ -0,0 +1,31 @@
+{
+  "name": "@vitest-evals/harness-openai-agents",
+  "version": "0.9.0-beta.1",
+  "sideEffects": false,
+  "types": "./dist/index.d.ts",
+  "main": "./dist/index.js",
+  "module": "./dist/index.mjs",
+  "files": ["dist"],
+  "publishConfig": {
+    "access": "public"
+  },
+  "exports": {
+    ".": {
+      "source": "./src/index.ts",
+      "types": "./dist/index.d.ts",
+      "require": "./dist/index.js",
+      "import": "./dist/index.mjs"
+    }
+  },
+  "scripts": {
+    "build": "tsup --config ./tsup.config.ts"
+  },
+  "peerDependencies": {
+    "@openai/agents": ">=0.8 <1",
+    "vitest-evals": "*"
+  },
+  "devDependencies": {
+    "@openai/agents": "^0.8.5",
+    "vitest-evals": "workspace:*"
+  }
+}
diff --git a/packages/harness-openai-agents/src/index.test.ts b/packages/harness-openai-agents/src/index.test.ts
new file mode 100644
index 0000000..c26ac07
--- /dev/null
+++ b/packages/harness-openai-agents/src/index.test.ts
@@ -0,0 +1,920 @@
+import { mkdtempSync, readFileSync, rmSync } from "node:fs";
+import { join } from "node:path";
+import { Agent, tool } from "@openai/agents";
+import { afterEach, expect, test, vi } from "vitest";
+import { describeEval, getHarnessRunFromError, toolCalls } from "vitest-evals";
+import type { JsonValue } from "vitest-evals/harness";
+import { openaiAgentsHarness, type OpenAiAgentsTool } from "./index";
+
+type DemoMetadata = {
+  scenario?: string;
+};
+
+type DemoAgent = {
+  name: string;
+  model: string;
+  tools?: OpenAiAgentsTool<string, DemoMetadata>[];
+};
+
+let replayDir: string | undefined;
+
+const judgePrompt = async (input: string) => input;
+
+afterEach(() => {
+  vi.unstubAllEnvs();
+  if (replayDir) {
+    rmSync(replayDir, { recursive: true, force: true });
+    replayDir = undefined;
+  }
+});
+
+function createHarnessContext<TMetadata extends Record<string, unknown>>(
+  metadata: TMetadata,
+) {
+  const context = {
+    metadata,
+    task: {
+      meta: {},
+    },
+    artifacts: {} as Record<string, JsonValue>,
+    setArtifact: vi.fn((name: string, value: JsonValue) => {
+      context.artifacts[name] = value;
+    }),
+  };
+
+  return context;
+}
+
+const runResult = {
+  finalOutput: {
+    status: "classified",
+    category: "bourbon",
+  },
+  state: {
+    usage: {
+      requests: 1,
+      inputTokens: 13,
+      outputTokens: 8,
+      totalTokens: 21,
+    },
+  },
+  lastAgent: {
+    name: "classifier",
+    model: "gpt-4.1-mini",
+  },
+  rawResponses: [
+    {
+      id: "resp_123",
+    },
+  ],
+  newItems: [
+    {
+      type: "message_output_item",
+      rawItem: {
+        type: "message",
+        role: "assistant",
+        content: [
+          {
+            type: "output_text",
+            text: '{"status":"classified","category":"bourbon"}',
+          },
+        ],
+        status: "completed",
+      },
+      agent: {
+        name: "classifier",
+      },
+    },
+    {
+      type: "tool_call_item",
+      rawItem: {
+        type: "function_call",
+        callId: "call_lookup",
+        name: "lookupBottle",
+        arguments: JSON.stringify({
+          bottleId: "bt_123",
+        }),
+        status: "completed",
+      },
+    },
+    {
+      type: "tool_call_output_item",
+      output: {
+        bottleId: "bt_123",
+        family: "bourbon",
+      },
+      rawItem: {
+        type: "function_call_result",
+        callId: "call_lookup",
+        name: "lookupBottle",
+        output: {
+          bottleId: "bt_123",
+          family: "bourbon",
+        },
+        status: "completed",
+      },
+    },
+  ],
+} as const;
+
+describeEval(
+  "openai agents harness adapter",
+  {
+    harness: openaiAgentsHarness({
+      prompt: judgePrompt,
+      agent: {
+        name: "classifier",
+        model: "gpt-4.1-mini",
+      },
+      runner: {
+        run: vi.fn(async (_agent: DemoAgent, _input: string, options) => {
+          expect(options?.context).toMatchObject({
+            metadata: {
+              scenario: "peated",
+            },
+          });
+          expect(options?.stream).toBe(false);
+          return {
+            ...runResult,
+            output: runResult.newItems,
+          };
+        }),
+      },
+    }),
+  },
+  (it) => {
+    it("normalizes native run results", async ({ run }) => {
+      const result = await run("Classify bottle bt_123", {
+        metadata: {
+          scenario: "peated",
+        },
+      });
+
+      expect(result.output).toEqual({
+        status: "classified",
+        category: "bourbon",
+      });
+      expect(result.session.outputText).toBe(
+        '{"status":"classified","category":"bourbon"}',
+      );
+      expect(result.usage).toMatchObject({
+        model: "gpt-4.1-mini",
+        inputTokens: 13,
+        outputTokens: 8,
+        totalTokens: 21,
+        toolCalls: 1,
+      });
+      expect(result.session.model).toBe("gpt-4.1-mini");
+      expect(result.session.messages).toMatchObject([
+        {
+          role: "user",
+          content: "Classify bottle bt_123",
+        },
+        {
+          role: "assistant",
+          content: '{"status":"classified","category":"bourbon"}',
+        },
+        {
+          role: "assistant",
+          toolCalls: [
+            {
+              id: "call_lookup",
+              name: "lookupBottle",
+              arguments: {
+                bottleId: "bt_123",
+              },
+              result: {
+                bottleId: "bt_123",
+                family: "bourbon",
+              },
+            },
+          ],
+        },
+        {
+          role: "tool",
+          content: {
+            bottleId: "bt_123",
+            family: "bourbon",
+          },
+          metadata: {
+            name: "lookupBottle",
+            toolCallId: "call_lookup",
+            isError: false,
+          },
+        },
+      ]);
+    });
+  },
+);
+
+test("exposes prompt and supports custom app output mapping", async () => {
+  const prompt = vi.fn(async (input: string) => `judge: ${input}`);
+  const harness = openaiAgentsHarness({
+    prompt,
+    createAgent: () => ({
+      name: "classifier",
+      model: "gpt-4.1-mini",
+    }),
+    run: async ({ context, runOptions }) => {
+      context.setArtifact("entrypoint", "custom");
+      expect(runOptions.context).toMatchObject({
+        metadata: {
+          scenario: "domain",
+        },
+      });
+
+      return {
+        classification: {
+          label: "bourbon",
+          confidence: 0.92,
+        },
+      };
+    },
+    normalize: {
+      output: ({ result }) =>
+        (result as { classification: { label: string; confidence: number } })
+          .classification,
+      outputText: ({ output }) => JSON.stringify(output),
+    },
+  });
+
+  await expect(harness.prompt("score this")).resolves.toBe("judge: score this");
+
+  const result = await harness.run(
+    "Classify bottle bt_123",
+    createHarnessContext({
+      scenario: "domain",
+    }),
+  );
+
+  expect(prompt).toHaveBeenCalledWith("score this");
+  expect(result.output).toEqual({
+    label: "bourbon",
+    confidence: 0.92,
+  });
+  expect(result.session.outputText).toBe(
+    '{"label":"bourbon","confidence":0.92}',
+  );
+  expect(result.artifacts).toEqual({
+    entrypoint: "custom",
+  });
+});
+
+test("wraps OpenAI Agents function tools with replay metadata", async () => {
+  replayDir = mkdtempSync(join(process.cwd(), ".tmp-openai-agents-replay-"));
+  vi.stubEnv("VITEST_EVALS_REPLAY_MODE", "auto");
+  vi.stubEnv("VITEST_EVALS_REPLAY_DIR", replayDir);
+
+  const invoke = vi.fn(async (...args: unknown[]) => {
+    const rawInput = args[1];
+    if (typeof rawInput !== "string") {
+      throw new Error("Expected JSON tool input");
+    }
+
+    const input = JSON.parse(rawInput) as { bottleId: string };
+    return {
+      bottleId: input.bottleId,
+      family: "bourbon",
+    };
+  });
+  const lookupBottle = {
+    type: "function",
+    name: "lookupBottle",
+    invoke,
+  } satisfies OpenAiAgentsTool<string, DemoMetadata>;
+  const originalInvoke = lookupBottle.invoke;
+  const agent = {
+    name: "classifier",
+    model: "gpt-4.1-mini",
+    tools: [lookupBottle],
+  } satisfies DemoAgent;
+  const runner = {
+    run: vi.fn(async (runAgent: DemoAgent, _input: string, runOptions) => {
+      expect(runAgent).not.toBe(agent);
+      expect(runAgent.tools).not.toBe(agent.tools);
+      expect(runAgent.tools?.[0]).not.toBe(lookupBottle);
+      const evidence = await runAgent.tools?.[0].invoke?.(
+        runOptions?.context,
+        JSON.stringify({
+          bottleId: "bt_123",
+        }),
+        {
+          toolCallId: "call_lookup",
+        },
+      );
+
+      return {
+        finalOutput: {
+          label: "bourbon",
+          evidence,
+        },
+      };
+    }),
+  };
+  const harness = openaiAgentsHarness({
+    prompt: judgePrompt,
+    agent,
+    runner,
+    toolReplay: {
+      lookupBottle: true,
+    },
+  });
+
+  const firstRun = await harness.run(
+    "Classify bottle bt_123",
+    createHarnessContext({}),
+  );
+
+  expect(invoke).toHaveBeenCalledTimes(1);
+  expect(agent.tools?.[0]).toBe(lookupBottle);
+  expect(agent.tools?.[0].invoke).toBe(originalInvoke);
+  expect(toolCalls(firstRun.session)).toMatchObject([
+    {
+      id: "call_lookup",
+      name: "lookupBottle",
+      arguments: {
+        bottleId: "bt_123",
+      },
+      result: {
+        bottleId: "bt_123",
+        family: "bourbon",
+      },
+      metadata: {
+        replay: {
+          status: "recorded",
+        },
+      },
+    },
+  ]);
+
+  const recordingPath = (
+    toolCalls(firstRun.session)[0].metadata?.replay as { recordingPath: string }
+  ).recordingPath;
+  const recording = JSON.parse(
+    readFileSync(join(process.cwd(), recordingPath), "utf8"),
+  ) as {
+    input: { bottleId: string };
+    output: { bottleId: string; family: string };
+  };
+  expect(recording.input).toEqual({
+    bottleId: "bt_123",
+  });
+  expect(recording.output).toEqual({
+    bottleId: "bt_123",
+    family: "bourbon",
+  });
+
+  invoke.mockImplementation(async () => {
+    throw new Error("tool should not execute after recording exists");
+  });
+
+  const secondRun = await harness.run(
+    "Classify bottle bt_123",
+    createHarnessContext({}),
+  );
+
+  expect(invoke).toHaveBeenCalledTimes(1);
+  expect(agent.tools?.[0]).toBe(lookupBottle);
+  expect(agent.tools?.[0].invoke).toBe(originalInvoke);
+  expect(toolCalls(secondRun.session)).toMatchObject([
+    {
+      id: "call_lookup",
+      name: "lookupBottle",
+      result: {
+        bottleId: "bt_123",
+        family: "bourbon",
+      },
+      metadata: {
+        replay: {
+          status: "replayed",
+        },
+      },
+    },
+  ]);
+});
+
+test("prefers captured local tool results over model-visible output wrappers", async () => {
+  const lookupBottle = {
+    type: "function",
+    name: "lookupBottle",
+    invoke: vi.fn(async () => ({
+      bottleId: "bt_123",
+      family: "bourbon",
+    })),
+  } satisfies OpenAiAgentsTool<string, DemoMetadata>;
+  const harness = openaiAgentsHarness({
+    prompt: judgePrompt,
+    agent: {
+      name: "classifier",
+      model: "gpt-4.1-mini",
+      tools: [lookupBottle],
+    } satisfies DemoAgent,
+    runner: {
+      run: async (agent: DemoAgent, _input: string, runOptions) => {
+        const evidence = await agent.tools?.[0].invoke?.(
+          runOptions?.context,
+          JSON.stringify({
+            bottleId: "bt_123",
+          }),
+          {
+            toolCallId: "call_lookup",
+          },
+        );
+
+        return {
+          finalOutput: "classified",
+          newItems: [
+            {
+              type: "tool_call_item",
+              rawItem: {
+                type: "function_call",
+                callId: "call_lookup",
+                name: "lookupBottle",
+                arguments: JSON.stringify({
+                  bottleId: "bt_123",
+                }),
+                status: "completed",
+              },
+            },
+            {
+              type: "tool_call_output_item",
+              rawItem: {
+                type: "function_call_result",
+                callId: "call_lookup",
+                name: "lookupBottle",
+                status: "completed",
+                output: {
+                  type: "text",
+                  text: JSON.stringify(evidence),
+                },
+              },
+            },
+          ],
+        };
+      },
+    },
+  });
+
+  const result = await harness.run(
+    "Classify bottle bt_123",
+    createHarnessContext({}),
+  );
+
+  expect(toolCalls(result.session)).toMatchObject([
+    {
+      id: "call_lookup",
+      name: "lookupBottle",
+      result: {
+        bottleId: "bt_123",
+        family: "bourbon",
+      },
+    },
+  ]);
+  expect(result.session.messages).toContainEqual(
+    expect.objectContaining({
+      role: "tool",
+      content: {
+        type: "text",
+        text: '{"bottleId":"bt_123","family":"bourbon"}',
+      },
+    }),
+  );
+});
+
+test("preserves explicit null captured local tool results", async () => {
+  const lookupBottle = {
+    type: "function",
+    name: "lookupBottle",
+    invoke: vi.fn(async () => null),
+  } satisfies OpenAiAgentsTool<string, DemoMetadata>;
+  const harness = openaiAgentsHarness({
+    prompt: judgePrompt,
+    agent: {
+      name: "classifier",
+      model: "gpt-4.1-mini",
+      tools: [lookupBottle],
+    } satisfies DemoAgent,
+    runner: {
+      run: async (agent: DemoAgent, _input: string, runOptions) => {
+        await agent.tools?.[0].invoke?.(
+          runOptions?.context,
+          JSON.stringify({
+            bottleId: "bt_unknown",
+          }),
+          {
+            toolCallId: "call_lookup",
+          },
+        );
+
+        return {
+          finalOutput: "classified",
+          newItems: [
+            {
+              type: "tool_call_item",
+              rawItem: {
+                type: "function_call",
+                callId: "call_lookup",
+                name: "lookupBottle",
+                arguments: JSON.stringify({
+                  bottleId: "bt_unknown",
+                }),
+                status: "completed",
+              },
+            },
+            {
+              type: "tool_call_output_item",
+              rawItem: {
+                type: "function_call_result",
+                callId: "call_lookup",
+                name: "lookupBottle",
+                status: "completed",
+                output: {
+                  type: "text",
+                  text: "null",
+                },
+              },
+            },
+          ],
+        };
+      },
+    },
+  });
+
+  const result = await harness.run(
+    "Classify bottle bt_unknown",
+    createHarnessContext({}),
+  );
+  const [call] = toolCalls(result.session);
+
+  expect(call).toHaveProperty("result", null);
+  expect(call.error).toBeUndefined();
+});
+
+test("errors when replay is configured for unknown OpenAI Agents tools", async () => {
+  const lookupBottle = {
+    type: "function",
+    name: "lookupBottle",
+    invoke: vi.fn(),
+  } satisfies OpenAiAgentsTool<string, DemoMetadata>;
+  const runner = {
+    run: vi.fn(),
+  };
+  const harness = openaiAgentsHarness({
+    prompt: judgePrompt,
+    agent: {
+      name: "classifier",
+      model: "gpt-4.1-mini",
+      tools: [lookupBottle],
+    },
+    runner,
+    toolReplay: {
+      misspelledLookup: true,
+    },
+  });
+
+  await expect(
+    harness.run("Classify bottle bt_123", createHarnessContext({})),
+  ).rejects.toThrow(
+    "Tool replay configured for unknown OpenAI Agents tool(s): misspelledLookup.",
+  );
+  expect(runner.run).not.toHaveBeenCalled();
+  expect(lookupBottle.invoke).not.toHaveBeenCalled();
+});
+
+test("errors when replay is configured for OpenAI Agents tools without invoke", async () => {
+  const hostedTool = {
+    type: "web_search_preview",
+    name: "web_search_preview",
+  } satisfies OpenAiAgentsTool<string, DemoMetadata>;
+  const runner = {
+    run: vi.fn(),
+  };
+  const harness = openaiAgentsHarness({
+    prompt: judgePrompt,
+    agent: {
+      name: "classifier",
+      model: "gpt-4.1-mini",
+      tools: [hostedTool],
+    },
+    runner,
+    toolReplay: {
+      web_search_preview: true,
+    },
+  });
+
+  await expect(
+    harness.run("Search for bottle facts", createHarnessContext({})),
+  ).rejects.toThrow(
+    "Tool replay requires invoke() for web_search_preview. Hosted or provider-executed OpenAI Agents tools cannot be recorded automatically.",
+  );
+  expect(runner.run).not.toHaveBeenCalled();
+});
+
+test("instruments real OpenAI Agent tools without mutating the caller's agent", async () => {
+  const lookupBottle = tool({
+    name: "lookupBottle",
+    description: "Look up bottle facts.",
+    parameters: {
+      type: "object",
+      properties: {
+        bottleId: {
+          type: "string",
+        },
+      },
+      required: ["bottleId"],
+      additionalProperties: false,
+    } as const,
+    execute: async (input: unknown) => {
+      const { bottleId } = input as { bottleId: string };
+
+      return {
+        bottleId,
+        family: "bourbon",
+      };
+    },
+  });
+  const agent = new Agent({
+    name: "classifier",
+    model: "gpt-4.1-mini",
+    tools: [lookupBottle],
+  });
+  const originalTool = agent.tools[0];
+  const harness = openaiAgentsHarness({
+    prompt: judgePrompt,
+    agent,
+    runner: {
+      run: async (runAgent, _input, runOptions) => {
+        expect(runAgent).not.toBe(agent);
+        expect(runAgent.tools[0]).not.toBe(originalTool);
+
+        const runtimeTool = runAgent.tools[0] as OpenAiAgentsTool<
+          string,
+          DemoMetadata
+        >;
+        const evidence = await runtimeTool.invoke?.(
+          runOptions?.context,
+          JSON.stringify({
+            bottleId: "bt_123",
+          }),
+          {
+            toolCallId: "call_lookup",
+          },
+        );
+
+        return {
+          finalOutput: evidence,
+        };
+      },
+    },
+  });
+
+  const result = await harness.run(
+    "Classify bottle bt_123",
+    createHarnessContext({}),
+  );
+
+  expect(agent.tools[0]).toBe(originalTool);
+  expect(toolCalls(result.session)).toMatchObject([
+    {
+      id: "call_lookup",
+      name: "lookupBottle",
+      arguments: {
+        bottleId: "bt_123",
+      },
+      result: {
+        bottleId: "bt_123",
+        family: "bourbon",
+      },
+    },
+  ]);
+});
+
+test("rejects implicit agent and runner factories", () => {
+  expect(() =>
+    openaiAgentsHarness({
+      prompt: judgePrompt,
+      agent: (() => ({
+        name: "classifier",
+        model: "gpt-4.1-mini",
+      })) as unknown as DemoAgent,
+      runner: {
+        run: async () => ({}),
+      },
+    }),
+  ).toThrow("Use createAgent() for agent factories");
+
+  expect(() =>
+    openaiAgentsHarness({
+      prompt: judgePrompt,
+      agent: {
+        name: "classifier",
+        model: "gpt-4.1-mini",
+      },
+      runner: (() => ({
+        run: async () => ({}),
+      })) as unknown as { run: () => Promise<unknown> },
+    }),
+  ).toThrow("Use createRunner() for runner factories");
+});
+
+test("keeps tool capture isolated across overlapping runs", async () => {
+  const invoke = vi.fn(async (_runContext: unknown, rawInput: unknown) => {
+    if (typeof rawInput !== "string") {
+      throw new Error("Expected JSON tool input");
+    }
+
+    const input = JSON.parse(rawInput) as { bottleId: string };
+    return {
+      bottleId: input.bottleId,
+    };
+  });
+  const lookupBottle = {
+    type: "function",
+    name: "lookupBottle",
+    invoke,
+  } satisfies OpenAiAgentsTool<string, DemoMetadata>;
+  const originalInvoke = lookupBottle.invoke;
+  const agent = {
+    name: "classifier",
+    model: "gpt-4.1-mini",
+    tools: [lookupBottle],
+  } satisfies DemoAgent;
+  const harness = openaiAgentsHarness({
+    prompt: judgePrompt,
+    agent,
+    runner: {
+      run: async (runAgent: DemoAgent, _input: string, runOptions) => {
+        const runtimeContext = runOptions?.context as
+          | { metadata: DemoMetadata }
+          | undefined;
+        const scenario = runtimeContext?.metadata.scenario ?? "unknown";
+        await new Promise((resolve) => setTimeout(resolve, 1));
+        const evidence = await runAgent.tools?.[0].invoke?.(
+          runOptions?.context,
+          JSON.stringify({
+            bottleId: `bt_${scenario}`,
+          }),
+          {
+            toolCallId: `call_${scenario}`,
+          },
+        );
+
+        return {
+          finalOutput: evidence,
+        };
+      },
+    },
+  });
+
+  const [firstRun, secondRun] = await Promise.all([
+    harness.run(
+      "Classify first bottle",
+      createHarnessContext({ scenario: "first" }),
+    ),
+    harness.run(
+      "Classify second bottle",
+      createHarnessContext({ scenario: "second" }),
+    ),
+  ]);
+
+  expect(agent.tools?.[0]).toBe(lookupBottle);
+  expect(agent.tools?.[0].invoke).toBe(originalInvoke);
+  expect(toolCalls(firstRun.session)).toMatchObject([
+    {
+      id: "call_first",
+      arguments: {
+        bottleId: "bt_first",
+      },
+      result: {
+        bottleId: "bt_first",
+      },
+    },
+  ]);
+  expect(toolCalls(secondRun.session)).toMatchObject([
+    {
+      id: "call_second",
+      arguments: {
+        bottleId: "bt_second",
+      },
+      result: {
+        bottleId: "bt_second",
+      },
+    },
+  ]);
+});
+
+test("marks failed tool output items as tool call errors", async () => {
+  const harness = openaiAgentsHarness({
+    prompt: judgePrompt,
+    agent: {
+      name: "editor",
+      model: "gpt-4.1-mini",
+    },
+    runner: {
+      run: async () => ({
+        finalOutput: "patch failed",
+        newItems: [
+          {
+            type: "tool_call_item",
+            rawItem: {
+              type: "apply_patch_call",
+              callId: "call_patch",
+              status: "completed",
+              operation: {
+                type: "update_file",
+                path: "README.md",
+                diff: "...",
+              },
+            },
+          },
+          {
+            type: "tool_call_output_item",
+            output: "patch rejected",
+            rawItem: {
+              type: "apply_patch_call_output",
+              callId: "call_patch",
+              status: "failed",
+              output: "patch rejected",
+            },
+          },
+        ],
+      }),
+    },
+  });
+
+  const result = await harness.run("Patch README", createHarnessContext({}));
+  const [call] = toolCalls(result.session);
+
+  expect(call).toMatchObject({
+    id: "call_patch",
+    name: "apply_patch_call",
+    error: {
+      message: "patch rejected",
+    },
+    metadata: {
+      outputStatus: "failed",
+    },
+  });
+  expect(call.result).toBeUndefined();
+});
+
+test("attaches partial tool calls when Runner.run errors", async () => {
+  const lookupBottle = {
+    type: "function",
+    name: "lookupBottle",
+    invoke: async () => ({
+      bottleId: "bt_missing",
+      family: "unknown",
+    }),
+  } satisfies OpenAiAgentsTool<string, DemoMetadata>;
+  const harness = openaiAgentsHarness({
+    prompt: judgePrompt,
+    agent: {
+      name: "classifier",
+      model: "gpt-4.1-mini",
+      tools: [lookupBottle],
+    } satisfies DemoAgent,
+    runner: {
+      run: async (agent: DemoAgent, _input: string, runOptions) => {
+        await agent.tools?.[0].invoke?.(
+          runOptions?.context,
+          JSON.stringify({
+            bottleId: "bt_missing",
+          }),
+          {
+            toolCallId: "call_lookup",
+          },
+        );
+
+        throw new Error("classifier failed after lookup");
+      },
+    },
+  });
+
+  const error = await harness
+    .run("Classify bottle bt_missing", createHarnessContext({}))
+    .catch((caughtError) => caughtError);
+  const run = getHarnessRunFromError(error);
+
+  expect(run).toBeDefined();
+  expect(run?.usage.toolCalls).toBe(1);
+  expect(run?.errors).toEqual([
+    {
+      type: "Error",
+      message: "classifier failed after lookup",
+    },
+  ]);
+  expect(toolCalls(run!.session)).toMatchObject([
+    {
+      id: "call_lookup",
+      name: "lookupBottle",
+      arguments: {
+        bottleId: "bt_missing",
+      },
+      result: {
+        bottleId: "bt_missing",
+        family: "unknown",
+      },
+    },
+  ]);
+});
diff --git a/packages/harness-openai-agents/src/index.ts b/packages/harness-openai-agents/src/index.ts
new file mode 100644
index 0000000..60e68b2
--- /dev/null
+++ b/packages/harness-openai-agents/src/index.ts
@@ -0,0 +1,1805 @@
+import type {
+  Harness,
+  HarnessContext,
+  HarnessMetadata,
+  HarnessPrompt,
+  HarnessRun,
+  JsonValue,
+  NormalizedMessage,
+  NormalizedSession,
+  TimingSummary,
+  ToolCallRecord,
+  UsageSummary,
+} from "vitest-evals/harness";
+import {
+  attachHarnessRunToError,
+  hasCallableMethod,
+  isHarnessRun,
+  isNormalizedSession,
+  normalizeContent,
+  normalizeMetadata,
+  normalizeRecord,
+  resolveHarnessRunErrors,
+  serializeError,
+  toJsonValue,
+} from "vitest-evals/harness";
+import {
+  executeWithReplay,
+  getReplayMetadataFromError,
+  normalizeReplayMetadata,
+} from "vitest-evals/replay";
+import type {
+  ReplayMode,
+  ToolRecording,
+  ToolReplayConfig,
+} from "vitest-evals/replay";
+
+type MaybePromise<T> = T | Promise<T>;
+
+export type OpenAiAgentsReplayMode = ReplayMode;
+
+export interface OpenAiAgentsRuntimeContext<
+  TMetadata extends HarnessMetadata = HarnessMetadata,
+> {
+  metadata: Readonly<TMetadata>;
+  artifacts: HarnessContext<TMetadata>["artifacts"];
+  setArtifact: HarnessContext<TMetadata>["setArtifact"];
+}
+
+export type OpenAiAgentsRunOptions<TContext = unknown> = Record<
+  string,
+  unknown
+> & {
+  context?: TContext;
+  signal?: AbortSignal;
+  stream?: boolean;
+};
+
+export interface OpenAiAgentsRunner<TAgent, TInput, TContext, TResult> {
+  run: (
+    agent: TAgent,
+    input: TInput,
+    options?: OpenAiAgentsRunOptions<TContext>,
+  ) => MaybePromise<TResult | HarnessRun>;
+}
+
+export interface OpenAiAgentsRuntime<
+  TInput = string,
+  TMetadata extends HarnessMetadata = HarnessMetadata,
+  TContext = OpenAiAgentsRuntimeContext<TMetadata>,
+> {
+  context: TContext;
+  runOptions: OpenAiAgentsRunOptions<TContext>;
+  signal?: AbortSignal;
+  tools: OpenAiAgentsTool<TInput, TMetadata>[];
+}
+
+export interface OpenAiAgentsHarnessRunArgs<
+  TAgent,
+  TInput,
+  TMetadata extends HarnessMetadata,
+  TRunner,
+  TResult,
+  TContext,
+> {
+  agent: TAgent;
+  input: TInput;
+  context: HarnessContext<TMetadata>;
+  runtime: OpenAiAgentsRuntime<TInput, TMetadata, TContext>;
+  runner: TRunner | undefined;
+  runOptions: OpenAiAgentsRunOptions<TContext>;
+}
+
+export interface OpenAiAgentsHarnessResultArgs<
+  TAgent,
+  TInput,
+  TMetadata extends HarnessMetadata,
+  TRunner,
+  TResult,
+  TContext,
+> extends OpenAiAgentsHarnessRunArgs<
+    TAgent,
+    TInput,
+    TMetadata,
+    TRunner,
+    TResult,
+    TContext
+  > {
+  result: TResult;
+  output: JsonValue | undefined;
+}
+
+export interface OpenAiAgentsToolContext<
+  TInput = string,
+  TMetadata extends HarnessMetadata = HarnessMetadata,
+> {
+  input: TInput;
+  metadata: HarnessContext<TMetadata>["metadata"];
+  signal?: AbortSignal;
+  setArtifact: HarnessContext<TMetadata>["setArtifact"];
+  runContext: unknown;
+  details: unknown;
+  tool: OpenAiAgentsTool<TInput, TMetadata>;
+}
+
+export type OpenAiAgentsToolRecording<
+  TArgs extends JsonValue = JsonValue,
+  TResult extends JsonValue = JsonValue,
+> = ToolRecording<TArgs, TResult>;
+
+export type OpenAiAgentsToolReplayConfig<
+  TArgs extends JsonValue = JsonValue,
+  TResult extends JsonValue = JsonValue,
+  TInput = string,
+  TMetadata extends HarnessMetadata = HarnessMetadata,
+> = ToolReplayConfig<
+  TArgs,
+  TResult,
+  OpenAiAgentsToolContext<TInput, TMetadata>
+>;
+
+export type OpenAiAgentsToolReplayPolicy<
+  TInput = string,
+  TMetadata extends HarnessMetadata = HarnessMetadata,
+> =
+  | boolean
+  | OpenAiAgentsToolReplayConfig<JsonValue, JsonValue, TInput, TMetadata>;
+
+export type OpenAiAgentsToolReplayPolicies<
+  TInput = string,
+  TMetadata extends HarnessMetadata = HarnessMetadata,
+> = Record<string, OpenAiAgentsToolReplayPolicy<TInput, TMetadata>>;
+
+type OpenAiAgentsInvoke = (...args: unknown[]) => unknown;
+
+export type OpenAiAgentsTool<
+  TInput = string,
+  TMetadata extends HarnessMetadata = HarnessMetadata,
+> = Record<string, unknown> & {
+  name?: string;
+  toolName?: string;
+  type?: string;
+  invoke?: OpenAiAgentsInvoke;
+};
+
+export interface OpenAiAgentsHarnessNormalizeOptions<
+  TAgent,
+  TInput = string,
+  TMetadata extends HarnessMetadata = HarnessMetadata,
+  TRunner = unknown,
+  TResult = unknown,
+  TContext = OpenAiAgentsRuntimeContext<TMetadata>,
+> {
+  session?: (
+    args: OpenAiAgentsHarnessResultArgs<
+      TAgent,
+      TInput,
+      TMetadata,
+      TRunner,
+      TResult,
+      TContext
+    >,
+  ) => MaybePromise<NormalizedSession>;
+  output?: (
+    args: Omit<
+      OpenAiAgentsHarnessResultArgs<
+        TAgent,
+        TInput,
+        TMetadata,
+        TRunner,
+        TResult,
+        TContext
+      >,
+      "output"
+    >,
+  ) => MaybePromise<JsonValue | undefined>;
+  outputText?: (
+    args: OpenAiAgentsHarnessResultArgs<
+      TAgent,
+      TInput,
+      TMetadata,
+      TRunner,
+      TResult,
+      TContext
+    >,
+  ) => MaybePromise<string | undefined>;
+  usage?: (
+    args: OpenAiAgentsHarnessResultArgs<
+      TAgent,
+      TInput,
+      TMetadata,
+      TRunner,
+      TResult,
+      TContext
+    >,
+  ) => MaybePromise<UsageSummary>;
+  timings?: (
+    args: OpenAiAgentsHarnessResultArgs<
+      TAgent,
+      TInput,
+      TMetadata,
+      TRunner,
+      TResult,
+      TContext
+    >,
+  ) => MaybePromise<TimingSummary | undefined>;
+  errors?: (
+    args: OpenAiAgentsHarnessResultArgs<
+      TAgent,
+      TInput,
+      TMetadata,
+      TRunner,
+      TResult,
+      TContext
+    >,
+  ) => MaybePromise<Array<Record<string, JsonValue>>>;
+}
+
+export interface OpenAiAgentsHarnessOptions<
+  TAgent,
+  TInput = string,
+  TMetadata extends HarnessMetadata = HarnessMetadata,
+  TRunner = OpenAiAgentsRunner<
+    TAgent,
+    TInput,
+    OpenAiAgentsRuntimeContext<TMetadata>,
+    unknown
+  >,
+  TResult = unknown,
+  TContext = OpenAiAgentsRuntimeContext<TMetadata>,
+> {
+  agent?: TAgent;
+  createAgent?: () => MaybePromise<TAgent>;
+  runner?: TRunner;
+  createRunner?: (
+    args: Omit<
+      OpenAiAgentsHarnessRunArgs<
+        TAgent,
+        TInput,
+        TMetadata,
+        TRunner,
+        TResult,
+        TContext
+      >,
+      "runner"
+    >,
+  ) => MaybePromise<TRunner>;
+  run?: (
+    args: OpenAiAgentsHarnessRunArgs<
+      TAgent,
+      TInput,
+      TMetadata,
+      TRunner,
+      TResult,
+      TContext
+    >,
+  ) => MaybePromise<TResult | HarnessRun>;
+  runOptions?:
+    | OpenAiAgentsRunOptions<TContext>
+    | ((
+        args: Omit<
+          OpenAiAgentsHarnessRunArgs<
+            TAgent,
+            TInput,
+            TMetadata,
+            TRunner,
+            TResult,
+            TContext
+          >,
+          "runner" | "runtime" | "runOptions"
+        >,
+      ) => MaybePromise<OpenAiAgentsRunOptions<TContext> | undefined>);
+  toolReplay?: OpenAiAgentsToolReplayPolicies<TInput, TMetadata>;
+  normalize?: OpenAiAgentsHarnessNormalizeOptions<
+    TAgent,
+    TInput,
+    TMetadata,
+    TRunner,
+    TResult,
+    TContext
+  >;
+  prompt: HarnessPrompt;
+  name?: string;
+}
+
+type RuntimeToolCapture = {
+  calls: ToolCallRecord[];
+};
+
+/** Adapts an `@openai/agents` Runner workflow into a normalized harness. */
+export function openaiAgentsHarness<
+  TAgent,
+  TInput = string,
+  TMetadata extends HarnessMetadata = HarnessMetadata,
+  TRunner = OpenAiAgentsRunner<
+    TAgent,
+    TInput,
+    OpenAiAgentsRuntimeContext<TMetadata>,
+    unknown
+  >,
+  TResult = unknown,
+  TContext = OpenAiAgentsRuntimeContext<TMetadata>,
+>(
+  options: OpenAiAgentsHarnessOptions<
+    TAgent,
+    TInput,
+    TMetadata,
+    TRunner,
+    TResult,
+    TContext
+  >,
+): Harness<TInput, TMetadata> {
+  validateOptions(options);
+
+  return {
+    name: options.name ?? "openai-agents",
+    prompt: options.prompt,
+    run: async (input, context) => {
+      const agent = await resolveAgent(options);
+      return executeOpenAiAgentsHarness(options, agent, input, context);
+    },
+  };
+}
+
+async function executeOpenAiAgentsHarness<
+  TAgent,
+  TInput,
+  TMetadata extends HarnessMetadata,
+  TRunner,
+  TResult,
+  TContext,
+>(
+  options: OpenAiAgentsHarnessOptions<
+    TAgent,
+    TInput,
+    TMetadata,
+    TRunner,
+    TResult,
+    TContext
+  >,
+  agent: TAgent,
+  input: TInput,
+  context: HarnessContext<TMetadata>,
+): Promise<HarnessRun> {
+  const startedAt = Date.now();
+  const capture: RuntimeToolCapture = {
+    calls: [],
+  };
+
+  return withInstrumentedAgentTools(
+    agent,
+    {
+      input,
+      context,
+      capture,
+      toolReplay: options.toolReplay,
+    },
+    async (instrumentedAgent, runtimeTools) => {
+      const defaultRuntimeContext = {
+        metadata: context.metadata,
+        artifacts: context.artifacts,
+        setArtifact: context.setArtifact,
+      } satisfies OpenAiAgentsRuntimeContext<TMetadata>;
+      const runOptions = await resolveRunOptions<
+        TAgent,
+        TInput,
+        TMetadata,
+        TRunner,
+        TResult,
+        TContext
+      >(
+        options,
+        instrumentedAgent,
+        input,
+        context,
+        defaultRuntimeContext as TContext,
+      );
+      const runtime = {
+        context: runOptions.context as TContext,
+        runOptions,
+        signal: runOptions.signal,
+        tools: runtimeTools,
+      } satisfies OpenAiAgentsRuntime<TInput, TMetadata, TContext>;
+      const runner = await resolveRunner(options, {
+        agent: instrumentedAgent,
+        input,
+        context,
+        runtime,
+        runOptions,
+      });
+
+      try {
+        const result = await runAgent(options, {
+          agent: instrumentedAgent,
+          input,
+          context,
+          runtime,
+          runner,
+          runOptions,
+        });
+        const settledResult = await settleRunResult(result);
+
+        if (isHarnessRun(settledResult) && !hasResultOverrides(options)) {
+          if (
+            Object.keys(context.artifacts).length > 0 &&
+            !settledResult.artifacts
+          ) {
+            settledResult.artifacts = context.artifacts;
+          }
+          return settledResult;
+        }
+
+        const normalizeResult = settledResult as TResult;
+        const baseResultArgs = {
+          agent: instrumentedAgent,
+          input,
+          context,
+          runtime,
+          runner,
+          runOptions,
+          result: normalizeResult,
+        };
+        const output = options.normalize?.output
+          ? await options.normalize.output(baseResultArgs)
+          : resolveOutput(normalizeResult);
+        const resultArgs = {
+          ...baseResultArgs,
+          output,
+        } satisfies OpenAiAgentsHarnessResultArgs<
+          TAgent,
+          TInput,
+          TMetadata,
+          TRunner,
+          TResult,
+          TContext
+        >;
+        const usage = options.normalize?.usage
+          ? await options.normalize.usage(resultArgs)
+          : resolveUsage(normalizeResult, capture.calls.length);
+        const outputText = options.normalize?.outputText
+          ? await options.normalize.outputText(resultArgs)
+          : resolveOutputText(normalizeResult, output);
+        const session = options.normalize?.session
+          ? await options.normalize.session(resultArgs)
+          : resolveSession(input, normalizeResult, output, outputText, usage, {
+              runtimeToolCalls: capture.calls,
+            });
+
+        return {
+          session,
+          output,
+          usage,
+          timings: options.normalize?.timings
+            ? await options.normalize.timings(resultArgs)
+            : { totalMs: Date.now() - startedAt },
+          artifacts:
+            Object.keys(context.artifacts).length > 0
+              ? context.artifacts
+              : undefined,
+          errors: options.normalize?.errors
+            ? await options.normalize.errors(resultArgs)
+            : resolveHarnessRunErrors(normalizeResult),
+        };
+      } catch (error) {
+        const usage =
+          capture.calls.length > 0 ? { toolCalls: capture.calls.length } : {};
+        const run = {
+          session: resolveSession(
+            input,
+            undefined,
+            undefined,
+            undefined,
+            usage,
+            {
+              runtimeToolCalls: capture.calls,
+            },
+          ),
+          output: undefined,
+          usage,
+          timings: { totalMs: Date.now() - startedAt },
+          artifacts:
+            Object.keys(context.artifacts).length > 0
+              ? context.artifacts
+              : undefined,
+          errors: [serializeError(error)],
+        } satisfies HarnessRun;
+
+        throw attachHarnessRunToError(error, run);
+      }
+    },
+  );
+}
+
+function validateOptions<
+  TAgent,
+  TInput,
+  TMetadata extends HarnessMetadata,
+  TRunner,
+  TResult,
+  TContext,
+>(
+  options: OpenAiAgentsHarnessOptions<
+    TAgent,
+    TInput,
+    TMetadata,
+    TRunner,
+    TResult,
+    TContext
+  >,
+) {
+  const hasAgent = options.agent !== undefined;
+  const hasCreateAgent = typeof options.createAgent === "function";
+
+  if (hasAgent && hasCreateAgent) {
+    throw new Error(
+      "openaiAgentsHarness accepts either agent or createAgent(), not both.",
+    );
+  }
+
+  if (!hasAgent && !hasCreateAgent) {
+    throw new Error(
+      "openaiAgentsHarness requires either an agent instance or createAgent().",
+    );
+  }
+
+  if (options.runner && options.createRunner) {
+    throw new Error(
+      "openaiAgentsHarness accepts either runner or createRunner(), not both.",
+    );
+  }
+
+  if (typeof options.agent === "function") {
+    throw new Error(
+      "openaiAgentsHarness agent must be an Agent instance. Use createAgent() for agent factories.",
+    );
+  }
+
+  if (
+    typeof options.runner === "function" &&
+    !hasCallableMethod(options.runner, "run")
+  ) {
+    throw new Error(
+      "openaiAgentsHarness runner must be a Runner instance. Use createRunner() for runner factories.",
+    );
+  }
+
+  if (!options.run && !options.runner && !options.createRunner) {
+    throw new Error(
+      "openaiAgentsHarness requires runner/createRunner for Runner.run(agent, input, options), or run() for a custom entrypoint.",
+    );
+  }
+}
+
+async function resolveAgent<
+  TAgent,
+  TInput,
+  TMetadata extends HarnessMetadata,
+  TRunner,
+  TResult,
+  TContext,
+>(
+  options: OpenAiAgentsHarnessOptions<
+    TAgent,
+    TInput,
+    TMetadata,
+    TRunner,
+    TResult,
+    TContext
+  >,
+) {
+  if (options.createAgent) {
+    return options.createAgent();
+  }
+
+  if (options.agent !== undefined) {
+    return options.agent;
+  }
+
+  throw new Error(
+    "openaiAgentsHarness requires either an agent instance or createAgent().",
+  );
+}
+
+async function resolveRunner<
+  TAgent,
+  TInput,
+  TMetadata extends HarnessMetadata,
+  TRunner,
+  TResult,
+  TContext,
+>(
+  options: OpenAiAgentsHarnessOptions<
+    TAgent,
+    TInput,
+    TMetadata,
+    TRunner,
+    TResult,
+    TContext
+  >,
+  args: Omit<
+    OpenAiAgentsHarnessRunArgs<
+      TAgent,
+      TInput,
+      TMetadata,
+      TRunner,
+      TResult,
+      TContext
+    >,
+    "runner"
+  >,
+) {
+  if (options.createRunner) {
+    return options.createRunner(args);
+  }
+
+  if (options.runner !== undefined) {
+    return options.runner;
+  }
+
+  return undefined;
+}
+
+async function resolveRunOptions<
+  TAgent,
+  TInput,
+  TMetadata extends HarnessMetadata,
+  TRunner,
+  TResult,
+  TContext,
+>(
+  options: OpenAiAgentsHarnessOptions<
+    TAgent,
+    TInput,
+    TMetadata,
+    TRunner,
+    TResult,
+    TContext
+  >,
+  agent: TAgent,
+  input: TInput,
+  context: HarnessContext<TMetadata>,
+  defaultRuntimeContext: TContext,
+): Promise<OpenAiAgentsRunOptions<TContext>> {
+  const userOptions =
+    typeof options.runOptions === "function"
+      ? await options.runOptions({
+          agent,
+          input,
+          context,
+        })
+      : options.runOptions;
+  const baseOptions = userOptions ?? {};
+
+  return {
+    ...baseOptions,
+    context:
+      "context" in baseOptions
+        ? (baseOptions.context as TContext)
+        : defaultRuntimeContext,
+    signal:
+      "signal" in baseOptions
+        ? (baseOptions.signal as AbortSignal | undefined)
+        : context.signal,
+    stream: "stream" in baseOptions ? Boolean(baseOptions.stream) : false,
+  };
+}
+
+async function runAgent<
+  TAgent,
+  TInput,
+  TMetadata extends HarnessMetadata,
+  TRunner,
+  TResult,
+  TContext,
+>(
+  options: OpenAiAgentsHarnessOptions<
+    TAgent,
+    TInput,
+    TMetadata,
+    TRunner,
+    TResult,
+    TContext
+  >,
+  args: OpenAiAgentsHarnessRunArgs<
+    TAgent,
+    TInput,
+    TMetadata,
+    TRunner,
+    TResult,
+    TContext
+  >,
+): Promise<TResult | HarnessRun> {
+  if (options.run) {
+    return options.run(args);
+  }
+
+  if (hasRunnerRunMethod<TAgent, TInput, TContext, TResult>(args.runner)) {
+    return args.runner.run(args.agent, args.input, args.runOptions);
+  }
+
+  throw new Error(
+    "openaiAgentsHarness requires runner/createRunner for the default Runner.run path, or run() for a custom entrypoint.",
+  );
+}
+
+function hasRunnerRunMethod<TAgent, TInput, TContext, TResult>(
+  runner: unknown,
+): runner is OpenAiAgentsRunner<TAgent, TInput, TContext, TResult> {
+  return hasCallableMethod(runner, "run");
+}
+
+async function settleRunResult(result: unknown) {
+  if (
+    result &&
+    typeof result === "object" &&
+    "completed" in result &&
+    isPromiseLike((result as { completed?: unknown }).completed)
+  ) {
+    await (result as { completed: Promise<unknown> }).completed;
+  }
+
+  return result;
+}
+
+function hasResultOverrides<
+  TAgent,
+  TInput,
+  TMetadata extends HarnessMetadata,
+  TRunner,
+  TResult,
+  TContext,
+>(
+  options: OpenAiAgentsHarnessOptions<
+    TAgent,
+    TInput,
+    TMetadata,
+    TRunner,
+    TResult,
+    TContext
+  >,
+) {
+  return Boolean(
+    options.normalize?.output ??
+      options.normalize?.outputText ??
+      options.normalize?.session ??
+      options.normalize?.usage ??
+      options.normalize?.timings ??
+      options.normalize?.errors,
+  );
+}
+
+async function withInstrumentedAgentTools<
+  TAgent,
+  TInput,
+  TMetadata extends HarnessMetadata,
+  TResult,
+>(
+  agent: TAgent,
+  args: {
+    input: TInput;
+    context: HarnessContext<TMetadata>;
+    capture: RuntimeToolCapture;
+    toolReplay: OpenAiAgentsToolReplayPolicies<TInput, TMetadata> | undefined;
+  },
+  callback: (
+    agent: TAgent,
+    runtimeTools: OpenAiAgentsTool<TInput, TMetadata>[],
+  ) => Promise<TResult>,
+) {
+  const agentTools = getAgentTools<TInput, TMetadata>(agent) ?? [];
+  validateToolReplayPolicies(agentTools, args.toolReplay);
+
+  if (agentTools.length === 0) {
+    return callback(agent, []);
+  }
+
+  const runtimeTools = agentTools.map((tool) => instrumentTool(tool, args));
+  const instrumentedAgent = cloneAgentWithTools(agent, runtimeTools);
+  return callback(instrumentedAgent, runtimeTools);
+}
+
+function getAgentTools<TInput, TMetadata extends HarnessMetadata>(
+  agent: unknown,
+): OpenAiAgentsTool<TInput, TMetadata>[] | undefined {
+  const tools = getObjectProperty(agent, "tools");
+  return Array.isArray(tools)
+    ? (tools as OpenAiAgentsTool<TInput, TMetadata>[])
+    : undefined;
+}
+
+function instrumentTool<TInput, TMetadata extends HarnessMetadata>(
+  tool: OpenAiAgentsTool<TInput, TMetadata>,
+  args: {
+    input: TInput;
+    context: HarnessContext<TMetadata>;
+    capture: RuntimeToolCapture;
+    toolReplay: OpenAiAgentsToolReplayPolicies<TInput, TMetadata> | undefined;
+  },
+): OpenAiAgentsTool<TInput, TMetadata> {
+  const toolName = resolveToolName(tool);
+  const replay = args.toolReplay?.[toolName];
+
+  if (typeof tool.invoke !== "function") {
+    if (replay) {
+      throw new Error(
+        `Tool replay requires invoke() for ${toolName}. Hosted or provider-executed OpenAI Agents tools cannot be recorded automatically.`,
+      );
+    }
+
+    return tool;
+  }
+
+  const originalInvoke = tool.invoke;
+  const instrumentedInvoke = (async (runContext, rawInput, details) =>
+    executeInstrumentedTool({
+      tool,
+      toolName,
+      replay,
+      rawInput,
+      runContext,
+      details,
+      input: args.input,
+      context: args.context,
+      capture: args.capture,
+      execute: () => originalInvoke(runContext, rawInput, details),
+    })) as OpenAiAgentsInvoke;
+
+  return {
+    ...tool,
+    invoke: instrumentedInvoke,
+  };
+}
+
+function validateToolReplayPolicies<TInput, TMetadata extends HarnessMetadata>(
+  tools: OpenAiAgentsTool<TInput, TMetadata>[],
+  toolReplay: OpenAiAgentsToolReplayPolicies<TInput, TMetadata> | undefined,
+) {
+  const replayToolNames = Object.entries(toolReplay ?? {})
+    .filter(([, replay]) => Boolean(replay))
+    .map(([toolName]) => toolName);
+  if (replayToolNames.length === 0) {
+    return;
+  }
+
+  const knownToolNames = new Set(tools.map(resolveToolName));
+  const unknownToolNames = replayToolNames.filter(
+    (toolName) => !knownToolNames.has(toolName),
+  );
+  if (unknownToolNames.length > 0) {
+    throw new Error(
+      `Tool replay configured for unknown OpenAI Agents tool(s): ${unknownToolNames.join(", ")}.`,
+    );
+  }
+}
+
+function cloneAgentWithTools<TAgent, TInput, TMetadata extends HarnessMetadata>(
+  agent: TAgent,
+  tools: OpenAiAgentsTool<TInput, TMetadata>[],
+): TAgent {
+  if (hasCallableMethod(agent, "clone")) {
+    return (
+      agent as {
+        clone: (config: {
+          tools: OpenAiAgentsTool<TInput, TMetadata>[];
+        }) => TAgent;
+      }
+    ).clone({ tools });
+  }
+
+  if (!agent || typeof agent !== "object") {
+    return agent;
+  }
+
+  return Object.assign({}, agent, { tools }) as TAgent;
+}
+
+async function executeInstrumentedTool<
+  TInput,
+  TMetadata extends HarnessMetadata,
+>({
+  tool,
+  toolName,
+  replay,
+  rawInput,
+  runContext,
+  details,
+  input,
+  context,
+  capture,
+  execute,
+}: {
+  tool: OpenAiAgentsTool<TInput, TMetadata>;
+  toolName: string;
+  replay: OpenAiAgentsToolReplayPolicy<TInput, TMetadata> | undefined;
+  rawInput: unknown;
+  runContext: unknown;
+  details: unknown;
+  input: TInput;
+  context: HarnessContext<TMetadata>;
+  capture: RuntimeToolCapture;
+  execute: () => MaybePromise<unknown>;
+}) {
+  const startedAt = new Date();
+  const toolCallId = resolveToolCallId(runContext, rawInput, details);
+  const normalizedArgs = normalizeArguments(rawInput);
+  const replayContext = {
+    input,
+    metadata: context.metadata,
+    signal: context.signal,
+    setArtifact: context.setArtifact,
+    runContext,
+    details,
+    tool,
+  } satisfies OpenAiAgentsToolContext<TInput, TMetadata>;
+
+  try {
+    const execution = replay
+      ? await executeWithReplay({
+          toolName,
+          args: normalizeReplayToolInput(rawInput),
+          context: replayContext,
+          execute: async () =>
+            toReplayJsonValue(await execute(), `${toolName} tool output`),
+          replay,
+        })
+      : {
+          result: await execute(),
+          replay: undefined,
+        };
+    const finishedAt = new Date();
+    const normalizedResult = normalizeToolResult(execution.result);
+    const call = {
+      ...(toolCallId ? { id: toolCallId } : {}),
+      name: toolName,
+      ...(normalizedArgs !== undefined ? { arguments: normalizedArgs } : {}),
+      ...(normalizedResult !== undefined ? { result: normalizedResult } : {}),
+      startedAt: startedAt.toISOString(),
+      finishedAt: finishedAt.toISOString(),
+      durationMs: finishedAt.getTime() - startedAt.getTime(),
+      metadata: normalizeReplayMetadata(execution.replay),
+    } satisfies ToolCallRecord;
+
+    capture.calls.push(call);
+    return execution.result;
+  } catch (error) {
+    const finishedAt = new Date();
+    const replay = getReplayMetadataFromError(error);
+    const call = {
+      ...(toolCallId ? { id: toolCallId } : {}),
+      name: toolName,
+      ...(normalizedArgs !== undefined ? { arguments: normalizedArgs } : {}),
+      error: normalizeError(error),
+      startedAt: startedAt.toISOString(),
+      finishedAt: finishedAt.toISOString(),
+      durationMs: finishedAt.getTime() - startedAt.getTime(),
+      metadata: normalizeReplayMetadata(replay),
+    } satisfies ToolCallRecord;
+
+    capture.calls.push(call);
+    throw error;
+  }
+}
+
+function resolveToolName(tool: unknown) {
+  return (
+    stringProperty(tool, "name") ??
+    stringProperty(tool, "toolName") ??
+    stringProperty(getObjectProperty(tool, "function"), "name") ??
+    "unknown"
+  );
+}
+
+function resolveToolCallId(
+  runContext: unknown,
+  rawInput: unknown,
+  details: unknown,
+) {
+  return (
+    findStringAtPath(details, ["toolCallId"]) ??
+    findStringAtPath(details, ["tool_call_id"]) ??
+    findStringAtPath(details, ["callId"]) ??
+    findStringAtPath(details, ["call_id"]) ??
+    findStringAtPath(details, ["toolCall", "callId"]) ??
+    findStringAtPath(details, ["toolCall", "call_id"]) ??
+    findStringAtPath(details, ["rawItem", "callId"]) ??
+    findStringAtPath(details, ["rawItem", "call_id"]) ??
+    findStringAtPath(runContext, ["toolCallId"]) ??
+    findStringAtPath(runContext, ["tool_call_id"]) ??
+    findStringAtPath(runContext, ["toolCall", "callId"]) ??
+    findStringAtPath(rawInput, ["toolCallId"]) ??
+    findStringAtPath(rawInput, ["tool_call_id"])
+  );
+}
+
+function resolveOutput(result: unknown): JsonValue | undefined {
+  if (!result || typeof result !== "object") {
+    return toJsonValue(result);
+  }
+
+  const candidates = [
+    "finalOutput",
+    "final_output",
+    "object",
+    "result",
+    "decision",
+    "text",
+  ] satisfies string[];
+
+  for (const key of candidates) {
+    const normalized = toJsonValue((result as Record<string, unknown>)[key]);
+    if (normalized !== undefined) {
+      return normalized;
+    }
+  }
+
+  const output = (result as { output?: unknown }).output;
+  if (typeof output === "string") {
+    return output;
+  }
+
+  return undefined;
+}
+
+function resolveOutputText(
+  result: unknown,
+  output: JsonValue | undefined,
+): string | undefined {
+  if (!result || typeof result !== "object") {
+    return typeof output === "string" ? output : stringifyJson(output);
+  }
+
+  const directText =
+    stringProperty(result, "finalOutput") ??
+    stringProperty(result, "final_output") ??
+    stringProperty(result, "text");
+  if (directText !== undefined) {
+    return directText;
+  }
+
+  const itemText = resolveAssistantTextFromItems(
+    arrayProperty(result, "newItems") ?? arrayProperty(result, "output") ?? [],
+  );
+  if (itemText) {
+    return itemText;
+  }
+
+  return typeof output === "string" ? output : stringifyJson(output);
+}
+
+function resolveUsage(result: unknown, runtimeToolCallCount: number) {
+  const usage =
+    getObjectProperty(getObjectProperty(result, "state"), "usage") ??
+    getObjectProperty(getObjectProperty(result, "runContext"), "usage") ??
+    getObjectProperty(result, "usage");
+  const usageRecord =
+    usage && typeof usage === "object"
+      ? (usage as Record<string, unknown>)
+      : undefined;
+  const toolCallCount =
+    countToolCallsFromResult(result) || runtimeToolCallCount || undefined;
+
+  if (!usageRecord) {
+    return toolCallCount ? { toolCalls: toolCallCount } : {};
+  }
+
+  return {
+    provider: resolveProvider(result),
+    model: resolveModel(result),
+    inputTokens: numberProperty(usageRecord, "inputTokens"),
+    outputTokens: numberProperty(usageRecord, "outputTokens"),
+    reasoningTokens: numberProperty(usageRecord, "reasoningTokens"),
+    totalTokens: numberProperty(usageRecord, "totalTokens"),
+    toolCalls: toolCallCount,
+    retries: numberProperty(usageRecord, "retries"),
+    metadata: normalizeMetadata({
+      requests: usageRecord.requests,
+      requestUsageEntries: usageRecord.requestUsageEntries,
+      raw: usageRecord.raw,
+    }),
+  } satisfies UsageSummary;
+}
+
+function resolveSession(
+  input: unknown,
+  result: unknown,
+  output: JsonValue | undefined,
+  outputText: string | undefined,
+  usage: UsageSummary,
+  options: {
+    runtimeToolCalls: ToolCallRecord[];
+  },
+): NormalizedSession {
+  if (
+    isNormalizedSession(
+      (result as Record<string, unknown> | undefined)?.session,
+    )
+  ) {
+    return (result as { session: NormalizedSession }).session;
+  }
+
+  if (
+    isNormalizedSession((result as Record<string, unknown> | undefined)?.trace)
+  ) {
+    return (result as { trace: NormalizedSession }).trace;
+  }
+
+  const newItems = arrayProperty(result, "newItems");
+  const outputItems = arrayProperty(result, "output");
+  const messages =
+    newItems && newItems.length > 0
+      ? normalizeInputMessages(getObjectProperty(result, "input") ?? input)
+      : normalizeHistoryMessages(result, input);
+
+  if (newItems && newItems.length > 0) {
+    messages.push(...normalizeRunItems(newItems, options.runtimeToolCalls));
+  } else if (outputItems && outputItems.length > 0) {
+    messages.push(...normalizeRunItems(outputItems, options.runtimeToolCalls));
+  }
+
+  appendUnmatchedRuntimeToolCalls(messages, options.runtimeToolCalls);
+
+  if (
+    output !== undefined &&
+    !messages.some(
+      (message) =>
+        message.role === "assistant" && message.content !== undefined,
+    )
+  ) {
+    messages.push({
+      role: "assistant",
+      content: output,
+    });
+  }
+
+  return {
+    messages,
+    outputText,
+    provider: resolveProvider(result) ?? usage.provider,
+    model: resolveModel(result) ?? usage.model,
+    metadata: normalizeMetadata({
+      lastResponseId: getObjectProperty(result, "lastResponseId"),
+      interruptions: getObjectProperty(result, "interruptions"),
+      rawResponses: getObjectProperty(result, "rawResponses"),
+      inputGuardrailResults: getObjectProperty(result, "inputGuardrailResults"),
+      outputGuardrailResults: getObjectProperty(
+        result,
+        "outputGuardrailResults",
+      ),
+      toolInputGuardrailResults: getObjectProperty(
+        result,
+        "toolInputGuardrailResults",
+      ),
+      toolOutputGuardrailResults: getObjectProperty(
+        result,
+        "toolOutputGuardrailResults",
+      ),
+      activeAgent: normalizeAgentMetadata(
+        getObjectProperty(result, "activeAgent"),
+      ),
+      lastAgent: normalizeAgentMetadata(getObjectProperty(result, "lastAgent")),
+    }),
+  };
+}
+
+function normalizeHistoryMessages(
+  result: unknown,
+  fallbackInput: unknown,
+): NormalizedMessage[] {
+  const history = arrayProperty(result, "history");
+  if (!history || history.length === 0) {
+    return normalizeInputMessages(
+      getObjectProperty(result, "input") ?? fallbackInput,
+    );
+  }
+
+  const messages: NormalizedMessage[] = [];
+  for (const item of history) {
+    const normalized = normalizeModelMessage(item);
+    if (normalized) {
+      messages.push(normalized);
+    }
+  }
+
+  return messages.length > 0
+    ? messages
+    : normalizeInputMessages(
+        getObjectProperty(result, "input") ?? fallbackInput,
+      );
+}
+
+function normalizeInputMessages(input: unknown): NormalizedMessage[] {
+  if (Array.isArray(input)) {
+    const messages = input
+      .map((item) => normalizeModelMessage(item))
+      .filter((message): message is NormalizedMessage => Boolean(message));
+
+    return messages.length > 0
+      ? messages
+      : [
+          {
+            role: "user",
+            content: normalizeContent(input),
+          },
+        ];
+  }
+
+  return [
+    {
+      role: "user",
+      content: normalizeContent(input),
+    },
+  ];
+}
+
+function normalizeRunItems(
+  items: unknown[],
+  runtimeToolCalls: ToolCallRecord[],
+): NormalizedMessage[] {
+  const messages: NormalizedMessage[] = [];
+  const outputItemsByCallId = new Map<string, unknown>();
+  const runtimeCallsById = new Map(
+    runtimeToolCalls
+      .filter((call): call is ToolCallRecord & { id: string } =>
+        Boolean(call.id),
+      )
+      .map((call) => [call.id, call]),
+  );
+
+  for (const item of items) {
+    const rawItem = getRunItemRawItem(item);
+    const callId = resolveRawToolCallId(rawItem);
+    if (callId && isToolCallOutputItem(item, rawItem)) {
+      outputItemsByCallId.set(callId, item);
+    }
+  }
+
+  for (const item of items) {
+    const rawItem = getRunItemRawItem(item);
+
+    if (isAssistantMessageItem(item, rawItem)) {
+      messages.push({
+        role: "assistant",
+        content: normalizeMessageContent(rawItem, item),
+        metadata: normalizeRunItemMetadata(item, rawItem),
+      });
+      continue;
+    }
+
+    if (isToolCallItem(item, rawItem)) {
+      const callId = resolveRawToolCallId(rawItem);
+      const runtimeCall = callId ? runtimeCallsById.get(callId) : undefined;
+      const call = normalizeToolCallItem(
+        item,
+        rawItem,
+        outputItemsByCallId.get(callId ?? ""),
+        runtimeCall,
+      );
+      messages.push({
+        role: "assistant",
+        toolCalls: [call],
+        metadata: normalizeRunItemMetadata(item, rawItem),
+      });
+      continue;
+    }
+
+    if (isToolCallOutputItem(item, rawItem)) {
+      messages.push(normalizeToolResultMessage(item, rawItem));
+      continue;
+    }
+
+    const metadata = normalizeRunItemMetadata(item, rawItem);
+    if (metadata) {
+      messages.push({
+        role: "assistant",
+        metadata,
+      });
+    }
+  }
+
+  return messages;
+}
+
+function appendUnmatchedRuntimeToolCalls(
+  messages: NormalizedMessage[],
+  runtimeToolCalls: ToolCallRecord[],
+) {
+  const seenIds = new Set(
+    messages.flatMap((message) =>
+      (message.toolCalls ?? [])
+        .map((call) => call.id)
+        .filter((id): id is string => Boolean(id)),
+    ),
+  );
+  const unmatched = runtimeToolCalls.filter(
+    (call) => !call.id || !seenIds.has(call.id),
+  );
+
+  for (const call of unmatched) {
+    messages.push({
+      role: "assistant",
+      toolCalls: [call],
+    });
+
+    if (call.result !== undefined || call.error) {
+      messages.push({
+        role: "tool",
+        ...(call.result !== undefined
+          ? { content: call.result }
+          : call.error && call.error.message.length > 0
+            ? { content: call.error.message }
+            : {}),
+        metadata: normalizeMetadata({
+          name: call.name,
+          toolCallId: call.id,
+          isError: Boolean(call.error),
+        }),
+      });
+    }
+  }
+}
+
+function normalizeModelMessage(item: unknown): NormalizedMessage | undefined {
+  if (!item || typeof item !== "object") {
+    return undefined;
+  }
+
+  const rawItem = getRunItemRawItem(item);
+  const role = stringProperty(rawItem, "role");
+  if (
+    role !== "system" &&
+    role !== "user" &&
+    role !== "assistant" &&
+    role !== "tool"
+  ) {
+    return undefined;
+  }
+
+  const content = normalizeMessageContent(rawItem, item);
+  return {
+    role,
+    ...(content !== undefined ? { content } : {}),
+    metadata: normalizeRunItemMetadata(item, rawItem),
+  };
+}
+
+function normalizeToolCallItem(
+  item: unknown,
+  rawItem: unknown,
+  outputItem: unknown,
+  runtimeCall: ToolCallRecord | undefined,
+): ToolCallRecord {
+  const rawOutputItem = getRunItemRawItem(outputItem);
+  const output =
+    getObjectProperty(outputItem, "output") ??
+    getObjectProperty(rawOutputItem, "output");
+  const outputStatus = stringProperty(rawOutputItem, "status");
+  const outputError =
+    outputStatus === "failed" ? normalizeToolOutputError(output) : undefined;
+  const normalizedResult =
+    output !== undefined ? normalizeToolResult(output) : undefined;
+  const call = {
+    id: resolveRawToolCallId(rawItem),
+    name: resolveRawToolName(rawItem),
+    arguments: normalizeArguments(getObjectProperty(rawItem, "arguments")),
+    ...(outputError
+      ? { error: outputError }
+      : normalizedResult !== undefined
+        ? { result: normalizedResult }
+        : {}),
+    metadata: normalizeMetadata({
+      status: getObjectProperty(rawItem, "status"),
+      outputStatus,
+      namespace: getObjectProperty(rawItem, "namespace"),
+      providerData: getObjectProperty(rawItem, "providerData"),
+      itemType: getObjectProperty(item, "type"),
+      rawType: getObjectProperty(rawItem, "type"),
+    }),
+  } satisfies ToolCallRecord;
+
+  return mergeToolCalls(call, runtimeCall);
+}
+
+function normalizeToolResultMessage(
+  item: unknown,
+  rawItem: unknown,
+): NormalizedMessage {
+  const output =
+    getObjectProperty(item, "output") ?? getObjectProperty(rawItem, "output");
+  const status = stringProperty(rawItem, "status");
+  const isError = status === "failed";
+
+  return {
+    role: "tool",
+    ...(output !== undefined ? { content: normalizeContent(output) } : {}),
+    metadata: normalizeMetadata({
+      name: resolveRawToolName(rawItem),
+      toolCallId: resolveRawToolCallId(rawItem),
+      isError,
+      status,
+      namespace: getObjectProperty(rawItem, "namespace"),
+      providerData: getObjectProperty(rawItem, "providerData"),
+      itemType: getObjectProperty(item, "type"),
+      rawType: getObjectProperty(rawItem, "type"),
+    }),
+  };
+}
+
+function mergeToolCalls(
+  call: ToolCallRecord,
+  runtimeCall: ToolCallRecord | undefined,
+): ToolCallRecord {
+  if (!runtimeCall) {
+    return call;
+  }
+
+  const error = runtimeCall.error ?? call.error;
+  const hasRuntimeResult = hasOwnObjectProperty(runtimeCall, "result");
+  const hasCallResult = hasOwnObjectProperty(call, "result");
+  const result = hasRuntimeResult ? runtimeCall.result : call.result;
+
+  const merged = {
+    ...runtimeCall,
+    ...call,
+    id: call.id ?? runtimeCall.id,
+    name: call.name ?? runtimeCall.name,
+    arguments: call.arguments ?? runtimeCall.arguments,
+    metadata: normalizeMetadata({
+      ...(runtimeCall.metadata ?? {}),
+      ...(call.metadata ?? {}),
+    }),
+  };
+
+  if (error) {
+    const { result: _result, ...withoutResult } = merged;
+    return {
+      ...withoutResult,
+      error,
+    };
+  }
+
+  const { error: _error, result: _result, ...withoutOutcome } = merged;
+  if (hasRuntimeResult || hasCallResult) {
+    return {
+      ...withoutOutcome,
+      result,
+    };
+  }
+
+  return withoutOutcome;
+}
+
+function normalizeMessageContent(
+  rawItem: unknown,
+  item: unknown,
+): JsonValue | undefined {
+  const contentAccessor = getObjectProperty(item, "content");
+  if (typeof contentAccessor === "string" && contentAccessor.length > 0) {
+    return contentAccessor;
+  }
+
+  const content = getObjectProperty(rawItem, "content");
+  const text = extractText(content);
+  if (text) {
+    return text;
+  }
+
+  return content === undefined ? undefined : normalizeContent(content);
+}
+
+function resolveAssistantTextFromItems(items: unknown[]) {
+  const texts: string[] = [];
+
+  for (const item of items) {
+    const rawItem = getRunItemRawItem(item);
+    if (!isAssistantMessageItem(item, rawItem)) {
+      continue;
+    }
+
+    const text = extractText(getObjectProperty(rawItem, "content"));
+    if (text) {
+      texts.push(text);
+    }
+  }
+
+  return texts.join("\n\n");
+}
+
+function isAssistantMessageItem(item: unknown, rawItem: unknown) {
+  return (
+    getObjectProperty(item, "type") === "message_output_item" ||
+    stringProperty(rawItem, "role") === "assistant"
+  );
+}
+
+function isToolCallItem(item: unknown, rawItem: unknown) {
+  const itemType = getObjectProperty(item, "type");
+  const rawType = getObjectProperty(rawItem, "type");
+
+  return (
+    itemType === "tool_call_item" ||
+    rawType === "function_call" ||
+    rawType === "hosted_tool_call" ||
+    rawType === "tool_search_call" ||
+    rawType === "shell_call" ||
+    rawType === "computer_call" ||
+    rawType === "apply_patch_call"
+  );
+}
+
+function isToolCallOutputItem(item: unknown, rawItem: unknown) {
+  const itemType = getObjectProperty(item, "type");
+  const rawType = getObjectProperty(rawItem, "type");
+
+  return (
+    itemType === "tool_call_output_item" ||
+    rawType === "function_call_result" ||
+    rawType === "tool_search_output" ||
+    rawType === "shell_call_output" ||
+    rawType === "computer_call_result" ||
+    rawType === "apply_patch_call_output"
+  );
+}
+
+function getRunItemRawItem(item: unknown) {
+  return getObjectProperty(item, "rawItem") ?? item;
+}
+
+function normalizeRunItemMetadata(item: unknown, rawItem: unknown) {
+  return normalizeMetadata({
+    id: getObjectProperty(rawItem, "id"),
+    status: getObjectProperty(rawItem, "status"),
+    providerData: getObjectProperty(rawItem, "providerData"),
+    agent: normalizeAgentMetadata(getObjectProperty(item, "agent")),
+    itemType: getObjectProperty(item, "type"),
+    rawType: getObjectProperty(rawItem, "type"),
+  });
+}
+
+function resolveRawToolCallId(rawItem: unknown) {
+  return (
+    stringProperty(rawItem, "callId") ??
+    stringProperty(rawItem, "call_id") ??
+    stringProperty(rawItem, "id")
+  );
+}
+
+function resolveRawToolName(rawItem: unknown) {
+  const rawType = stringProperty(rawItem, "type");
+  if (rawType === "tool_search_call" || rawType === "tool_search_output") {
+    return "tool_search";
+  }
+
+  return (
+    stringProperty(rawItem, "name") ??
+    stringProperty(rawItem, "toolName") ??
+    stringProperty(rawItem, "namespace") ??
+    rawType ??
+    "unknown"
+  );
+}
+
+function countToolCallsFromResult(result: unknown): number {
+  const newItems = arrayProperty(result, "newItems");
+  const items =
+    newItems && newItems.length > 0
+      ? newItems
+      : (arrayProperty(result, "output") ?? []);
+  const seenCallIds = new Set<string>();
+
+  return items.reduce<number>((count, item) => {
+    const rawItem = getRunItemRawItem(item);
+    if (!isToolCallItem(item, rawItem)) {
+      return count;
+    }
+
+    const callId = resolveRawToolCallId(rawItem);
+    if (callId) {
+      if (seenCallIds.has(callId)) {
+        return count;
+      }
+
+      seenCallIds.add(callId);
+    }
+
+    return count + 1;
+  }, 0);
+}
+
+function normalizeArguments(
+  value: unknown,
+): Record<string, JsonValue> | undefined {
+  const parsed = parseMaybeJson(value);
+  if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
+    return parsed === undefined
+      ? undefined
+      : { input: normalizeContent(parsed) };
+  }
+
+  return normalizeRecord(parsed as Record<string, unknown>);
+}
+
+function normalizeReplayToolInput(value: unknown): JsonValue {
+  const parsed = parseMaybeJson(value);
+  return toReplayJsonValue(parsed, "OpenAI Agents tool input");
+}
+
+function normalizeToolResult(value: unknown): JsonValue | undefined {
+  const normalized = toJsonValue(value);
+  if (normalized !== undefined) {
+    return normalized;
+  }
+
+  return value === undefined ? undefined : String(value);
+}
+
+function normalizeToolOutputError(
+  output: unknown,
+): NonNullable<ToolCallRecord["error"]> {
+  return {
+    message: resolveToolOutputErrorMessage(output),
+  };
+}
+
+function resolveToolOutputErrorMessage(output: unknown) {
+  if (typeof output === "string") {
+    return output.length > 0 ? output : "Tool call failed";
+  }
+
+  const message =
+    stringProperty(output, "message") ??
+    stringProperty(output, "error") ??
+    stringProperty(output, "text") ??
+    extractText(output);
+  if (message && message.length > 0) {
+    return message;
+  }
+
+  const normalized = toJsonValue(output);
+  return normalized === undefined
+    ? "Tool call failed"
+    : JSON.stringify(normalized);
+}
+
+function parseMaybeJson(value: unknown) {
+  if (typeof value !== "string") {
+    return value;
+  }
+
+  try {
+    return JSON.parse(value) as unknown;
+  } catch {
+    return value;
+  }
+}
+
+function normalizeError(error: unknown): NonNullable<ToolCallRecord["error"]> {
+  const serialized = serializeError(error);
+  const { message, type, ...details } = serialized;
+
+  return {
+    ...details,
+    message: typeof message === "string" ? message : String(message),
+    ...(typeof type === "string" ? { type } : {}),
+  };
+}
+
+function toReplayJsonValue(value: unknown, label: string): JsonValue {
+  const normalized = toJsonValue(value);
+  if (normalized === undefined) {
+    throw new Error(
+      `Tool replay only supports JSON-serializable values. ${label} could not be normalized.`,
+    );
+  }
+
+  return normalized;
+}
+
+function extractText(value: unknown): string | undefined {
+  if (typeof value === "string") {
+    return value;
+  }
+
+  if (!Array.isArray(value)) {
+    return undefined;
+  }
+
+  const parts = value
+    .map((entry) => {
+      if (!entry || typeof entry !== "object") {
+        return undefined;
+      }
+
+      return (
+        stringProperty(entry, "text") ??
+        stringProperty(entry, "refusal") ??
+        stringProperty(entry, "transcript")
+      );
+    })
+    .filter((entry): entry is string => Boolean(entry));
+
+  return parts.length > 0 ? parts.join("") : undefined;
+}
+
+function resolveProvider(result: unknown) {
+  return (
+    stringProperty(result, "provider") ??
+    stringProperty(getObjectProperty(result, "model"), "provider") ??
+    stringProperty(getObjectProperty(result, "lastAgent"), "provider") ??
+    stringProperty(
+      getObjectProperty(getObjectProperty(result, "lastAgent"), "model"),
+      "provider",
+    )
+  );
+}
+
+function resolveModel(result: unknown) {
+  const directModel = getObjectProperty(result, "model");
+  const lastAgentModel = getObjectProperty(
+    getObjectProperty(result, "lastAgent"),
+    "model",
+  );
+
+  return (
+    stringProperty(result, "model") ??
+    stringProperty(directModel, "modelId") ??
+    stringProperty(directModel, "id") ??
+    (typeof lastAgentModel === "string" ? lastAgentModel : undefined) ??
+    stringProperty(lastAgentModel, "modelId") ??
+    stringProperty(lastAgentModel, "id")
+  );
+}
+
+function normalizeAgentMetadata(agent: unknown) {
+  if (!agent || typeof agent !== "object") {
+    return undefined;
+  }
+
+  return normalizeMetadata({
+    name: getObjectProperty(agent, "name"),
+    model: resolveModel({ lastAgent: agent }),
+  });
+}
+
+function getObjectProperty(value: unknown, key: string): unknown {
+  return value && typeof value === "object"
+    ? (value as Record<string, unknown>)[key]
+    : undefined;
+}
+
+function hasOwnObjectProperty(value: object, key: keyof ToolCallRecord) {
+  return Object.prototype.hasOwnProperty.call(value, key);
+}
+
+function stringProperty(value: unknown, key: string): string | undefined {
+  const property = getObjectProperty(value, key);
+  return typeof property === "string" ? property : undefined;
+}
+
+function numberProperty(value: unknown, key: string): number | undefined {
+  const property = getObjectProperty(value, key);
+  return typeof property === "number" ? property : undefined;
+}
+
+function arrayProperty(value: unknown, key: string): unknown[] | undefined {
+  const property = getObjectProperty(value, key);
+  return Array.isArray(property) ? property : undefined;
+}
+
+function findStringAtPath(value: unknown, path: string[]) {
+  let current = value;
+  for (const key of path) {
+    current = getObjectProperty(current, key);
+  }
+
+  return typeof current === "string" ? current : undefined;
+}
+
+function stringifyJson(value: JsonValue | undefined) {
+  return value === undefined ? undefined : JSON.stringify(value);
+}
+
+function isPromiseLike(value: unknown): value is Promise<unknown> {
+  return Boolean(
+    value && typeof (value as { then?: unknown }).then === "function",
+  );
+}
diff --git a/packages/harness-openai-agents/tsconfig.json b/packages/harness-openai-agents/tsconfig.json
new file mode 100644
index 0000000..9e25e6e
--- /dev/null
+++ b/packages/harness-openai-agents/tsconfig.json
@@ -0,0 +1,4 @@
+{
+  "extends": "../../tsconfig.base.json",
+  "include": ["src/**/*.ts"]
+}
diff --git a/packages/harness-openai-agents/tsup.config.ts b/packages/harness-openai-agents/tsup.config.ts
new file mode 100644
index 0000000..3d247e5
--- /dev/null
+++ b/packages/harness-openai-agents/tsup.config.ts
@@ -0,0 +1,11 @@
+import { defineConfig } from "tsup";
+
+export default defineConfig({
+  entry: ["src/**/*.ts", "!src/**/*.test.ts", "!src/**/*.test.*.ts"],
+  format: ["cjs", "esm"],
+  dts: true,
+  splitting: false,
+  sourcemap: true,
+  clean: true,
+  external: ["@openai/agents", "vitest-evals"],
+});
diff --git a/packages/harness-pi-ai/README.md b/packages/harness-pi-ai/README.md
index 622fe6e..3f1e24b 100644
--- a/packages/harness-pi-ai/README.md
+++ b/packages/harness-pi-ai/README.md
@@ -11,12 +11,31 @@ npm install -D vitest-evals @vitest-evals/harness-pi-ai
 ## Usage
 
 ```ts
+import { expect } from "vitest";
 import { piAiHarness } from "@vitest-evals/harness-pi-ai";
+import { describeEval, toolCalls } from "vitest-evals";
 
 const harness = piAiHarness({
   createAgent: () => createRefundAgent(),
+  toolReplay: {
+    lookupInvoice: true,
+  },
   prompt: sharedJudgePrompt,
 });
+
+describeEval("refund agent", { harness }, (it) => {
+  it("approves a refundable invoice", async ({ run }) => {
+    const result = await run("Refund invoice inv_123");
+
+    expect(result.output).toMatchObject({
+      status: "approved",
+    });
+    expect(toolCalls(result.session).map((call) => call.name)).toEqual([
+      "lookupInvoice",
+      "createRefund",
+    ]);
+  });
+});
 ```
 
 `prompt` gives rubric or factuality judges the same provider/model setup
@@ -74,7 +93,7 @@ The adapter provides:
 - a required prompt seam for LLM-backed judges
 - normalized session capture from emitted events and wrapped tool calls
 - usage/output inference for common `pi-ai`-style result objects
-- opt-in tool replay/recording when the tool definition sets `replay: true`
+- opt-in tool replay/recording from harness-level `toolReplay`
 
 See the workspace demo in `apps/demo-pi`.
 
@@ -95,17 +114,24 @@ export default defineConfig({
 });
 ```
 
-Then opt individual tools into recording/replay:
+Then opt individual tools into recording/replay from the harness:
 
 ```ts
-const tools = {
-  lookupInvoice: {
-    replay: true,
-    execute: async ({ invoiceId }) => fetchInvoice(invoiceId),
+const harness = piAiHarness({
+  createAgent: () => createRefundAgent(),
+  toolReplay: {
+    lookupInvoice: true,
   },
-};
+  prompt: sharedJudgePrompt,
+});
 ```
 
+When an agent exposes both a native Pi tool and a runtime tool with the same
+name, a native tool call records in its own cassette namespace. Runtime calls of
+that same name are treated as implementation details while the native tool is
+executing, so delegated runtime calls do not create duplicate trace entries or
+overwrite the native recording.
+
 Supported modes:
 
 - `off`: never read or write recordings
diff --git a/packages/harness-pi-ai/src/index.test.ts b/packages/harness-pi-ai/src/index.test.ts
index 70ee5c7..c9e0dc1 100644
--- a/packages/harness-pi-ai/src/index.test.ts
+++ b/packages/harness-pi-ai/src/index.test.ts
@@ -494,6 +494,151 @@ describeEval(
   },
 );
 
+test("lets native Pi tools own replay when they delegate to a runtime tool of the same name", async () => {
+  replayDir = mkdtempSync(join(process.cwd(), ".tmp-pi-overlap-replay-"));
+  vi.stubEnv("VITEST_EVALS_REPLAY_MODE", "auto");
+  vi.stubEnv("VITEST_EVALS_REPLAY_DIR", replayDir);
+
+  const lookupInvoice = vi.fn(async ({ invoiceId }: { invoiceId: string }) => ({
+    invoiceId,
+    refundable: true,
+  }));
+  let activeRuntime: DemoRuntime | undefined;
+  const nativeExecute = vi.fn(
+    async (_toolCallId: string, args: { invoiceId: string }) => {
+      if (!activeRuntime) {
+        throw new Error("Expected runtime before native tool execution");
+      }
+
+      const invoice = await activeRuntime.tools.lookupInvoice({
+        invoiceId: args.invoiceId,
+      });
+
+      return {
+        content: [{ type: "text", text: JSON.stringify(invoice) }],
+        details: invoice,
+      };
+    },
+  );
+
+  const replayHarness = piAiHarness({
+    prompt: judgePrompt,
+    toolReplay: {
+      lookupInvoice: true,
+    },
+    createAgent: () => {
+      const nativeTools = [
+        {
+          name: "lookupInvoice",
+          execute: nativeExecute,
+        },
+      ];
+
+      return {
+        toolset: {
+          lookupInvoice: {
+            execute: lookupInvoice,
+          },
+        } satisfies PiAiToolset<string, DemoMetadata>,
+        agent: {
+          state: {
+            tools: nativeTools,
+          },
+        },
+        async run(_input: string, runtime: DemoRuntime) {
+          activeRuntime = runtime;
+          const toolResult = await nativeTools[0].execute("lookupInvoice", {
+            invoiceId: "inv_123",
+          });
+
+          runtime.events.assistant(toolResult.content[0].text);
+
+          return {
+            decision: toolResult.details.refundable
+              ? { status: "approved" as const }
+              : { status: "denied" as const, reason: "not refundable" },
+          };
+        },
+      };
+    },
+  });
+
+  const firstRun = await replayHarness.run("Refund invoice inv_123", {
+    metadata: {},
+    task: {
+      meta: {},
+    },
+    artifacts: {},
+    setArtifact: vi.fn(),
+  });
+
+  expect(nativeExecute).toHaveBeenCalledTimes(1);
+  expect(lookupInvoice).toHaveBeenCalledTimes(1);
+  const firstCalls = toolCalls(firstRun.session);
+  expect(firstCalls).toHaveLength(1);
+  expect(firstCalls[0]).toMatchObject({
+    name: "lookupInvoice",
+    result: {
+      invoiceId: "inv_123",
+      refundable: true,
+    },
+    metadata: {
+      replay: {
+        status: "recorded",
+      },
+    },
+  });
+  const recordingPath = (
+    firstCalls[0].metadata?.replay as { recordingPath: string }
+  ).recordingPath;
+  expect(recordingPath).toContain("lookupInvoice.native");
+  const recording = JSON.parse(
+    readFileSync(join(process.cwd(), recordingPath), "utf8"),
+  ) as {
+    output: {
+      __vitestEvals: { kind: string };
+      normalizedResult: { invoiceId: string; refundable: boolean };
+    };
+  };
+  expect(recording.output).toMatchObject({
+    __vitestEvals: {
+      kind: "pi-ai-native-tool-result",
+    },
+    normalizedResult: {
+      invoiceId: "inv_123",
+      refundable: true,
+    },
+  });
+
+  nativeExecute.mockImplementation(async () => {
+    throw new Error("native tool should not execute after recording exists");
+  });
+  lookupInvoice.mockImplementation(async () => {
+    throw new Error("runtime tool should not execute after recording exists");
+  });
+
+  const secondRun = await replayHarness.run("Refund invoice inv_123", {
+    metadata: {},
+    task: {
+      meta: {},
+    },
+    artifacts: {},
+    setArtifact: vi.fn(),
+  });
+
+  expect(nativeExecute).toHaveBeenCalledTimes(1);
+  expect(lookupInvoice).toHaveBeenCalledTimes(1);
+  expect(toolCalls(secondRun.session)).toHaveLength(1);
+  expect(toolCalls(secondRun.session)[0]).toMatchObject({
+    name: "lookupInvoice",
+    metadata: {
+      replay: {
+        status: "replayed",
+      },
+    },
+  });
+});
+
 describeEval(
   "pi-ai harness infers runtime toolsets from existing agents",
   {
@@ -790,11 +935,13 @@ test("replays native agent tools without breaking the agent-facing result", asyn
 
   const replayHarness = piAiHarness({
     prompt: judgePrompt,
+    toolReplay: {
+      lookupInvoice: true,
+    },
     createAgent: () => {
       const nativeTools = [
         {
           name: "lookupInvoice",
-          replay: true,
           execute,
         },
       ];
@@ -932,6 +1079,76 @@ test("replays native agent tools without breaking the agent-facing result", asyn
   ]);
 });
 
+test("does not opt native agent tools into replay from tool objects", async () => {
+  replayDir = mkdtempSync(join(process.cwd(), ".tmp-pi-native-replay-"));
+  vi.stubEnv("VITEST_EVALS_REPLAY_MODE", "auto");
+  vi.stubEnv("VITEST_EVALS_REPLAY_DIR", replayDir);
+
+  const execute = vi.fn(
+    async (_toolCallId: string, args: { invoiceId: string }) => ({
+      content: [
+        {
+          type: "text",
+          text: JSON.stringify({
+            invoiceId: args.invoiceId,
+            refundable: true,
+          }),
+        },
+      ],
+      details: {
+        invoiceId: args.invoiceId,
+        refundable: true,
+      },
+    }),
+  );
+
+  const harness = piAiHarness({
+    prompt: judgePrompt,
+    createAgent: () => {
+      const nativeTools = [
+        {
+          name: "lookupInvoice",
+          replay: true,
+          execute,
+        },
+      ];
+
+      return {
+        agent: {
+          state: {
+            tools: nativeTools,
+          },
+        },
+        async run(_input: string, runtime: { events: DemoRuntime["events"] }) {
+          const toolResult = await nativeTools[0].execute("lookupInvoice", {
+            invoiceId: "inv_123",
+          });
+
+          runtime.events.assistant(toolResult.content[0].text);
+
+          return {
+            decision: {
+              status: "approved" as const,
+            },
+          };
+        },
+      };
+    },
+  });
+
+  const run = await harness.run("Refund invoice inv_123", {
+    metadata: {},
+    task: {
+      meta: {},
+    },
+    artifacts: {},
+    setArtifact: vi.fn(),
+  });
+
+  expect(execute).toHaveBeenCalledTimes(1);
+  expect(toolCalls(run.session)[0].metadata?.replay).toBeUndefined();
+});
+
 test("records and replays opt-in tools in auto mode", async () => {
   replayDir = mkdtempSync(join(process.cwd(), ".tmp-pi-replay-"));
   vi.stubEnv("VITEST_EVALS_REPLAY_MODE", "auto");
@@ -944,10 +1161,12 @@ test("records and replays opt-in tools in auto mode", async () => {
 
   const replayHarness = piAiHarness({
     prompt: judgePrompt,
+    toolReplay: {
+      lookupInvoice: true,
+    },
     createAgent: () => ({ id: "refund-agent" }),
     tools: {
       lookupInvoice: {
-        replay: true,
         execute,
       },
     } satisfies PiAiToolset<string, DemoMetadata>,
@@ -1016,6 +1235,51 @@ test("records and replays opt-in tools in auto mode", async () => {
   });
 });
 
+test("does not opt runtime tools into replay from tool definitions", async () => {
+  replayDir = mkdtempSync(join(process.cwd(), ".tmp-pi-replay-"));
+  vi.stubEnv("VITEST_EVALS_REPLAY_MODE", "auto");
+  vi.stubEnv("VITEST_EVALS_REPLAY_DIR", replayDir);
+
+  const execute = vi.fn(async ({ invoiceId }: { invoiceId: string }) => ({
+    invoiceId,
+    refundable: true,
+  }));
+
+  const harness = piAiHarness<{ id: string }, string, DemoMetadata>({
+    prompt: judgePrompt,
+    createAgent: () => ({ id: "refund-agent" }),
+    tools: {
+      lookupInvoice: {
+        replay: true,
+        execute,
+      },
+    } as unknown as PiAiToolset<string, DemoMetadata>,
+    run: async ({ runtime }) => {
+      await runtime.tools.lookupInvoice({
+        invoiceId: "inv_123",
+      });
+
+      return {
+        decision: {
+          status: "approved",
+        },
+      };
+    },
+  });
+
+  const run = await harness.run("Refund invoice inv_123", {
+    metadata: {},
+    task: {
+      meta: {},
+    },
+    artifacts: {},
+    setArtifact: vi.fn(),
+  });
+
+  expect(execute).toHaveBeenCalledTimes(1);
+  expect(toolCalls(run.session)[0].metadata?.replay).toBeUndefined();
+});
+
 test("errors when strict mode is missing a recording", async () => {
   replayDir = mkdtempSync(join(process.cwd(), ".tmp-pi-replay-"));
   vi.stubEnv("VITEST_EVALS_REPLAY_MODE", "strict");
@@ -1028,10 +1292,12 @@ test("errors when strict mode is missing a recording", async () => {
 
   const replayHarness = piAiHarness({
     prompt: judgePrompt,
+    toolReplay: {
+      lookupInvoice: true,
+    },
     createAgent: () => ({ id: "refund-agent" }),
     tools: {
       lookupInvoice: {
-        replay: true,
         execute,
       },
     } satisfies PiAiToolset<string, DemoMetadata>,
diff --git a/packages/harness-pi-ai/src/index.ts b/packages/harness-pi-ai/src/index.ts
index 4e4c374..d93d738 100644
--- a/packages/harness-pi-ai/src/index.ts
+++ b/packages/harness-pi-ai/src/index.ts
@@ -49,14 +49,6 @@ type PiAgentToolLike<
   TMetadata extends HarnessMetadata = HarnessMetadata,
 > = {
   name: string;
-  replay?:
-    | boolean
-    | PiAiToolReplayConfig<
-        Record<string, JsonValue>,
-        JsonValue,
-        TInput,
-        TMetadata
-      >;
   execute: (toolCallId: string, args: Record<string, JsonValue>) => unknown;
 };
 
@@ -105,6 +97,23 @@ export type PiAiToolReplayConfig<
   TMetadata extends HarnessMetadata = HarnessMetadata,
 > = ToolReplayConfig<TArgs, TResult, PiAiToolContext<TInput, TMetadata>>;
 
+export type PiAiToolReplayPolicy<
+  TInput = string,
+  TMetadata extends HarnessMetadata = HarnessMetadata,
+> =
+  | boolean
+  | PiAiToolReplayConfig<
+      Record<string, JsonValue>,
+      JsonValue,
+      TInput,
+      TMetadata
+    >;
+
+export type PiAiToolReplayPolicies<
+  TInput = string,
+  TMetadata extends HarnessMetadata = HarnessMetadata,
+> = Record<string, PiAiToolReplayPolicy<TInput, TMetadata>>;
+
 export interface PiAiToolDefinition<
   TArgs extends Record<string, JsonValue> = Record<string, JsonValue>,
   TResult extends JsonValue = JsonValue,
@@ -112,7 +121,6 @@ export interface PiAiToolDefinition<
   TMetadata extends HarnessMetadata = HarnessMetadata,
 > {
   description?: string;
-  replay?: boolean | PiAiToolReplayConfig<TArgs, TResult, TInput, TMetadata>;
   execute: (
     args: TArgs,
     context: PiAiToolContext<TInput, TMetadata>,
@@ -190,6 +198,7 @@ interface PiAiHarnessBaseOptions<
 > {
   agent?: TAgent;
   createAgent?: () => MaybePromise<TAgent>;
+  toolReplay?: PiAiToolReplayPolicies<TInput, TMetadata>;
   normalize?: PiAiHarnessNormalizeOptions<
     TAgent,
     TInput,
@@ -309,6 +318,10 @@ type InferredToolSurfaces<TInput, TMetadata extends HarnessMetadata> = {
   nativeToolsets?: Array<PiAgentToolLike<TInput, TMetadata>[]>;
 };
 
+type PiToolExecutionState = {
+  activeNativeToolNames: Map<string, number>;
+};
+
 /** Adapts a Pi agent runtime into a normalized vitest-evals harness. */
 export function piAiHarness<
   TAgent,
@@ -403,10 +416,13 @@ async function executePiHarnessRun<
   runtimeTools: TTools | undefined,
   nativeToolsets?: Array<PiAgentToolLike<TInput, TMetadata>[]>,
 ): Promise<HarnessRun> {
+  const executionState = createPiToolExecutionState();
   const runtime = createRuntime({
     input,
     context,
     tools: runtimeTools,
+    toolReplay: options.toolReplay,
+    executionState,
     messages,
   });
 
@@ -419,6 +435,8 @@ async function executePiHarnessRun<
         context,
         messages,
         toolCalls: runtime.toolCalls,
+        toolReplay: options.toolReplay,
+        executionState,
       },
       () =>
         runAgent(options, {
@@ -725,6 +743,8 @@ async function withInstrumentedAgentTools<
     context: HarnessContext<TMetadata>;
     messages: NormalizedMessage[];
     toolCalls: ToolCallRecord[];
+    toolReplay: PiAiToolReplayPolicies<TInput, TMetadata> | undefined;
+    executionState: PiToolExecutionState;
   },
   callback: () => Promise<TResult>,
 ) {
@@ -756,13 +776,17 @@ async function withInstrumentedAgentTools<
         signal: args.context.signal,
         setArtifact: args.context.setArtifact,
       } satisfies PiAiToolContext<TInput, TMetadata>;
+      const leaveNativeTool = enterNativeToolExecution(
+        args.executionState,
+        tool.name,
+      );
 
       try {
         const execution = await executeNativeToolWithReplay({
           toolName: tool.name,
           toolCallId,
           execute: originalExecute,
-          replay: tool.replay,
+          replay: args.toolReplay?.[tool.name],
           args: rawArgs,
           context: toolContext,
         });
@@ -806,6 +830,8 @@ async function withInstrumentedAgentTools<
           toolCalls: [call],
         });
         throw error;
+      } finally {
+        leaveNativeTool();
       }
     };
     instrumentedExecute[ORIGINAL_NATIVE_EXECUTE] = originalExecute;
@@ -912,6 +938,39 @@ function getNativeToolExecuteOrigin<TInput, TMetadata extends HarnessMetadata>(
   return nativeExecute[ORIGINAL_NATIVE_EXECUTE] ?? nativeExecute;
 }
 
+function createPiToolExecutionState(): PiToolExecutionState {
+  return {
+    activeNativeToolNames: new Map(),
+  };
+}
+
+function enterNativeToolExecution(
+  state: PiToolExecutionState,
+  toolName: string,
+) {
+  state.activeNativeToolNames.set(
+    toolName,
+    (state.activeNativeToolNames.get(toolName) ?? 0) + 1,
+  );
+
+  return () => {
+    const nextCount = (state.activeNativeToolNames.get(toolName) ?? 1) - 1;
+    if (nextCount <= 0) {
+      state.activeNativeToolNames.delete(toolName);
+      return;
+    }
+
+    state.activeNativeToolNames.set(toolName, nextCount);
+  };
+}
+
+function hasActiveNativeToolExecution(
+  state: PiToolExecutionState,
+  toolName: string,
+) {
+  return (state.activeNativeToolNames.get(toolName) ?? 0) > 0;
+}
+
 async function executeNativeToolWithReplay<
   TInput,
   TMetadata extends HarnessMetadata,
@@ -926,7 +985,7 @@ async function executeNativeToolWithReplay<
   toolName: string;
   toolCallId: string;
   execute: PiAgentToolLike<TInput, TMetadata>["execute"];
-  replay: PiAgentToolLike<TInput, TMetadata>["replay"];
+  replay: PiAiToolReplayPolicy<TInput, TMetadata> | undefined;
   args: Record<string, JsonValue>;
   context: PiAiToolContext<TInput, TMetadata>;
 }) {
@@ -934,7 +993,7 @@ async function executeNativeToolWithReplay<
   let liveResult: unknown;
 
   const execution = await executeWithReplay({
-    toolName,
+    toolName: createNativeReplayToolName(toolName),
     args,
     context,
     execute: async (toolArgs) => {
@@ -959,6 +1018,10 @@ async function executeNativeToolWithReplay<
   };
 }
 
+function createNativeReplayToolName(toolName: string) {
+  return `${toolName}.native`;
+}
+
 function createRuntime<
   TInput,
   TMetadata extends HarnessMetadata,
@@ -967,11 +1030,15 @@ function createRuntime<
   input,
   context,
   tools,
+  toolReplay,
+  executionState,
   messages,
 }: {
   input: TInput;
   context: HarnessContext<TMetadata>;
   tools: TTools | undefined;
+  toolReplay: PiAiToolReplayPolicies<TInput, TMetadata> | undefined;
+  executionState: PiToolExecutionState;
   messages: NormalizedMessage[];
 }): PiAiRuntime<TTools, TInput, TMetadata> & {
   toolCalls: ToolCallRecord[];
@@ -1019,6 +1086,10 @@ function createRuntime<
       toolName,
       async (args: Record<string, JsonValue>) => {
         const startedAt = new Date();
+        const isNativeImplementationCall = hasActiveNativeToolExecution(
+          executionState,
+          toolName,
+        );
         const toolContext = {
           input,
           metadata: context.metadata,
@@ -1030,10 +1101,18 @@ function createRuntime<
           const execution = await executeToolWithReplay({
             toolName,
             tool,
+            replay: isNativeImplementationCall
+              ? undefined
+              : toolReplay?.[toolName],
             args,
             context: toolContext,
           });
           const finishedAt = new Date();
+
+          if (isNativeImplementationCall) {
+            return execution.result;
+          }
+
           const call = {
             name: toolName,
             arguments: args,
@@ -1058,6 +1137,10 @@ function createRuntime<
           return execution.result;
         } catch (error) {
           const finishedAt = new Date();
+          if (isNativeImplementationCall) {
+            throw error;
+          }
+
           const call = {
             name: toolName,
             arguments: args,
@@ -1319,19 +1402,26 @@ async function executeToolWithReplay<
 >({
   toolName,
   tool,
+  replay,
   args,
   context,
 }: {
   toolName: string;
   tool: PiAiToolDefinition<TArgs, TResult, TInput, TMetadata>;
+  replay: PiAiToolReplayPolicy<TInput, TMetadata> | undefined;
   args: TArgs;
   context: PiAiToolContext<TInput, TMetadata>;
 }) {
-  return executeWithReplay({
+  return executeWithReplay<
+    Record<string, JsonValue>,
+    JsonValue,
+    PiAiToolContext<TInput, TMetadata>
+  >({
     toolName,
     args,
     context,
-    execute: tool.execute,
-    replay: tool.replay,
+    execute: (toolArgs, toolContext) =>
+      tool.execute(toolArgs as TArgs, toolContext),
+    replay,
   });
 }
diff --git a/packages/vitest-evals/README.md b/packages/vitest-evals/README.md
index ab06535..7c5ef7d 100644
--- a/packages/vitest-evals/README.md
+++ b/packages/vitest-evals/README.md
@@ -14,6 +14,8 @@ Install a first-party harness package for the runtime you want to test:
 npm install -D @vitest-evals/harness-pi-ai
 # or
 npm install -D @vitest-evals/harness-ai-sdk
+# or
+npm install -D @vitest-evals/harness-openai-agents
 ```
 
 ## Core Model
@@ -146,10 +148,21 @@ The harness owns normalization, diagnostics, tool capture, replay plumbing, and
 reporter-facing artifacts. Your app just needs one runtime seam where those
 wrapped pieces can be injected.
 
+Replay opt-in belongs on the harness, via `toolReplay`, while replay mode and
+recording directory can live in Vitest environment config. Tool definitions
+should stay free of VCR policy.
+
 For the Pi-specific harness, output/session/usage normalization should usually
 be inferred automatically. Treat low-level normalization callbacks as an escape
 hatch, not part of the primary authoring path.
 
+For OpenAI Agents SDK apps, use
+`@vitest-evals/harness-openai-agents` with an existing `Agent` or
+`createAgent()` factory and a `Runner` / `createRunner()` callback. The harness
+calls `Runner.run(agent, input, options)` by default and exposes the same
+normalization and replay hooks when the app needs a custom entrypoint or
+structured domain output mapping.
+
 ## Custom App Harnesses
 
 First-party harness packages are conveniences, not the only supported path. If
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 031cd0d..32da930 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -75,14 +75,29 @@ importers:
         specifier: ^4.3.6
         version: 4.3.6
 
+  apps/demo-openai-agents:
+    dependencies:
+      '@openai/agents':
+        specifier: ^0.8.5
+        version: 0.8.5(ws@8.20.0)(zod@4.3.6)
+      '@vitest-evals/harness-openai-agents':
+        specifier: workspace:*
+        version: link:../../packages/harness-openai-agents
+      vitest-evals:
+        specifier: workspace:*
+        version: link:../../packages/vitest-evals
+      zod:
+        specifier: ^4.3.6
+        version: 4.3.6
+
   apps/demo-pi:
     dependencies:
       '@mariozechner/pi-agent-core':
         specifier: 0.67.68
-        version: 0.67.68(ws@8.20.0)(zod@4.3.6)
+        version: 0.67.68(@modelcontextprotocol/sdk@1.29.0(zod@4.3.6))(ws@8.20.0)(zod@4.3.6)
       '@mariozechner/pi-ai':
         specifier: 0.67.68
-        version: 0.67.68(ws@8.20.0)(zod@4.3.6)
+        version: 0.67.68(@modelcontextprotocol/sdk@1.29.0(zod@4.3.6))(ws@8.20.0)(zod@4.3.6)
       '@vitest-evals/harness-pi-ai':
         specifier: workspace:*
         version: link:../../packages/harness-pi-ai
@@ -99,14 +114,23 @@ importers:
         specifier: workspace:*
         version: link:../vitest-evals
 
+  packages/harness-openai-agents:
+    devDependencies:
+      '@openai/agents':
+        specifier: ^0.8.5
+        version: 0.8.5(ws@8.20.0)(zod@4.3.6)
+      vitest-evals:
+        specifier: workspace:*
+        version: link:../vitest-evals
+
   packages/harness-pi-ai:
     devDependencies:
       '@mariozechner/pi-agent-core':
         specifier: ^0.67.68
-        version: 0.67.68(ws@8.20.0)(zod@4.3.6)
+        version: 0.67.68(@modelcontextprotocol/sdk@1.29.0(zod@4.3.6))(ws@8.20.0)(zod@4.3.6)
       '@mariozechner/pi-ai':
         specifier: ^0.67.68
-        version: 0.67.68(ws@8.20.0)(zod@4.3.6)
+        version: 0.67.68(@modelcontextprotocol/sdk@1.29.0(zod@4.3.6))(ws@8.20.0)(zod@4.3.6)
       vitest-evals:
         specifier: workspace:*
         version: link:../vitest-evals
@@ -351,24 +375,28 @@ packages:
     engines: {node: '>=14.21.3'}
     cpu: [arm64]
     os: [linux]
+    libc: [musl]
 
   '@biomejs/cli-linux-arm64@1.9.4':
     resolution: {integrity: sha512-fJIW0+LYujdjUgJJuwesP4EjIBl/N/TcOX3IvIHJQNsAqvV2CHIogsmA94BPG6jZATS4Hi+xv4SkBBQSt1N4/g==}
     engines: {node: '>=14.21.3'}
     cpu: [arm64]
     os: [linux]
+    libc: [glibc]
 
   '@biomejs/cli-linux-x64-musl@1.9.4':
     resolution: {integrity: sha512-gEhi/jSBhZ2m6wjV530Yy8+fNqG8PAinM3oV7CyO+6c3CEh16Eizm21uHVsyVBEB6RIM8JHIl6AGYCv6Q6Q9Tg==}
     engines: {node: '>=14.21.3'}
     cpu: [x64]
     os: [linux]
+    libc: [musl]
 
   '@biomejs/cli-linux-x64@1.9.4':
     resolution: {integrity: sha512-lRCJv/Vi3Vlwmbd6K+oQ0KhLHMAysN8lXoCI7XeHlxaajk06u7G+UsFSO01NAs5iYuWKmVZjmiOzJ0OJmGsMwg==}
     engines: {node: '>=14.21.3'}
     cpu: [x64]
     os: [linux]
+    libc: [glibc]
 
   '@biomejs/cli-win32-arm64@1.9.4':
     resolution: {integrity: sha512-tlbhLk+WXZmgwoIKwHIHEBZUwxml7bRJgk0X2sPyNR3S93cdRq6XulAZRQJ17FYGGzWne0fgrXBKpl7l4M87Hg==}
@@ -547,6 +575,12 @@ packages:
       '@modelcontextprotocol/sdk':
         optional: true
 
+  '@hono/node-server@1.19.14':
+    resolution: {integrity: sha512-GwtvgtXxnWsucXvbQXkRgqksiH2Qed37H9xHZocE5sA3N8O8O8/8FA3uclQXxXVzc9XBZuEOMK7+r02FmSpHtw==}
+    engines: {node: '>=18.14.1'}
+    peerDependencies:
+      hono: ^4
+
   '@isaacs/cliui@8.0.2':
     resolution: {integrity: sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA==}
     engines: {node: '>=12'}
@@ -582,6 +616,39 @@ packages:
   '@mistralai/mistralai@2.2.0':
     resolution: {integrity: sha512-JQUGIXjFWnw/J9LpTSf/ZXwVW3Sh8FBAcfTo5QvAHqkl4CfSiIwnjRJhMoAFcP6ncCe84YPU1ncDGX+p3OXnfg==}
 
+  '@modelcontextprotocol/sdk@1.29.0':
+    resolution: {integrity: sha512-zo37mZA9hJWpULgkRpowewez1y6ML5GsXJPY8FI0tBBCd77HEvza4jDqRKOXgHNn867PVGCyTdzqpz0izu5ZjQ==}
+    engines: {node: '>=18'}
+    peerDependencies:
+      '@cfworker/json-schema': ^4.1.1
+      zod: ^3.25 || ^4.0
+    peerDependenciesMeta:
+      '@cfworker/json-schema':
+        optional: true
+
+  '@openai/agents-core@0.8.5':
+    resolution: {integrity: sha512-qs9mmN+D+UmqEZo3qrvhhIIXIOgSvJPic0v4a+ruq+eYgcQMk3PY8lLcsdQwJit6zf2Wyfv1q2cX5m3jzWZpKw==}
+    peerDependencies:
+      zod: ^4.0.0
+    peerDependenciesMeta:
+      zod:
+        optional: true
+
+  '@openai/agents-openai@0.8.5':
+    resolution: {integrity: sha512-cGYmyiVy8ecgf2Vch0L/ekeNo3xuZsuWnRsxyv+w9ai9dgxUifdEQ6G3dtsjMLtmXVHRVGoO7mVBr+tKcilntw==}
+    peerDependencies:
+      zod: ^4.0.0
+
+  '@openai/agents-realtime@0.8.5':
+    resolution: {integrity: sha512-JqKVsR33OvKtTxRp5Ylhw8WfNvJ49ZIhlhMZlSVKqwR2Ks6JuxqFJ0zM9p7JIbTQDSlAZnmnZJv1qlItaildiQ==}
+    peerDependencies:
+      zod: ^4.0.0
+
+  '@openai/agents@0.8.5':
+    resolution: {integrity: sha512-OFA7XVV1qXE8lzatvQj080KdSArt8utBExFXRfD5B/R7KT0D+AVaKwg6nLoW3Gxb30vRkIUQf+MaW/Wz+gO3Yg==}
+    peerDependencies:
+      zod: ^4.0.0
+
   '@opentelemetry/api@1.9.0':
     resolution: {integrity: sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==}
     engines: {node: '>=8.0.0'}
@@ -654,56 +721,67 @@ packages:
     resolution: {integrity: sha512-EtP8aquZ0xQg0ETFcxUbU71MZlHaw9MChwrQzatiE8U/bvi5uv/oChExXC4mWhjiqK7azGJBqU0tt5H123SzVA==}
     cpu: [arm]
     os: [linux]
+    libc: [glibc]
 
   '@rollup/rollup-linux-arm-musleabihf@4.46.2':
     resolution: {integrity: sha512-qO7F7U3u1nfxYRPM8HqFtLd+raev2K137dsV08q/LRKRLEc7RsiDWihUnrINdsWQxPR9jqZ8DIIZ1zJJAm5PjQ==}
     cpu: [arm]
     os: [linux]
+    libc: [musl]
 
   '@rollup/rollup-linux-arm64-gnu@4.46.2':
     resolution: {integrity: sha512-3dRaqLfcOXYsfvw5xMrxAk9Lb1f395gkoBYzSFcc/scgRFptRXL9DOaDpMiehf9CO8ZDRJW2z45b6fpU5nwjng==}
     cpu: [arm64]
     os: [linux]
+    libc: [glibc]
 
   '@rollup/rollup-linux-arm64-musl@4.46.2':
     resolution: {integrity: sha512-fhHFTutA7SM+IrR6lIfiHskxmpmPTJUXpWIsBXpeEwNgZzZZSg/q4i6FU4J8qOGyJ0TR+wXBwx/L7Ho9z0+uDg==}
     cpu: [arm64]
     os: [linux]
+    libc: [musl]
 
   '@rollup/rollup-linux-loongarch64-gnu@4.46.2':
     resolution: {integrity: sha512-i7wfGFXu8x4+FRqPymzjD+Hyav8l95UIZ773j7J7zRYc3Xsxy2wIn4x+llpunexXe6laaO72iEjeeGyUFmjKeA==}
     cpu: [loong64]
     os: [linux]
+    libc: [glibc]
 
   '@rollup/rollup-linux-ppc64-gnu@4.46.2':
     resolution: {integrity: sha512-B/l0dFcHVUnqcGZWKcWBSV2PF01YUt0Rvlurci5P+neqY/yMKchGU8ullZvIv5e8Y1C6wOn+U03mrDylP5q9Yw==}
     cpu: [ppc64]
     os: [linux]
+    libc: [glibc]
 
   '@rollup/rollup-linux-riscv64-gnu@4.46.2':
     resolution: {integrity: sha512-32k4ENb5ygtkMwPMucAb8MtV8olkPT03oiTxJbgkJa7lJ7dZMr0GCFJlyvy+K8iq7F/iuOr41ZdUHaOiqyR3iQ==}
     cpu: [riscv64]
     os: [linux]
+    libc: [glibc]
 
   '@rollup/rollup-linux-riscv64-musl@4.46.2':
     resolution: {integrity: sha512-t5B2loThlFEauloaQkZg9gxV05BYeITLvLkWOkRXogP4qHXLkWSbSHKM9S6H1schf/0YGP/qNKtiISlxvfmmZw==}
     cpu: [riscv64]
     os: [linux]
+    libc: [musl]
 
   '@rollup/rollup-linux-s390x-gnu@4.46.2':
     resolution: {integrity: sha512-YKjekwTEKgbB7n17gmODSmJVUIvj8CX7q5442/CK80L8nqOUbMtf8b01QkG3jOqyr1rotrAnW6B/qiHwfcuWQA==}
     cpu: [s390x]
     os: [linux]
+    libc: [glibc]
 
   '@rollup/rollup-linux-x64-gnu@4.46.2':
     resolution: {integrity: sha512-Jj5a9RUoe5ra+MEyERkDKLwTXVu6s3aACP51nkfnK9wJTraCC8IMe3snOfALkrjTYd2G1ViE1hICj0fZ7ALBPA==}
     cpu: [x64]
     os: [linux]
+    libc: [glibc]
 
   '@rollup/rollup-linux-x64-musl@4.46.2':
     resolution: {integrity: sha512-7kX69DIrBeD7yNp4A5b81izs8BqoZkCIaxQaOpumcJ1S/kmqNFjPhDu1LHeVXv0SexfHQv5cqHsxLOjETuqDuA==}
     cpu: [x64]
     os: [linux]
+    libc: [musl]
 
   '@rollup/rollup-win32-arm64-msvc@4.46.2':
     resolution: {integrity: sha512-wiJWMIpeaak/jsbaq2HMh/rzZxHVW1rU6coyeNNpMwk5isiPjSTx0a4YLSlYDwBH/WBvLz+EtsNqQScZTLJy3g==}
@@ -932,6 +1010,9 @@ packages:
   '@types/retry@0.12.0':
     resolution: {integrity: sha512-wWKOClTTiizcZhXnPY4wikVAwmdYHp8q6DmC+EJUzAMsycb7HB32Kh9RN4+0gExjmPmZSAQjgURXIGATPegAvA==}
 
+  '@types/ws@8.18.1':
+    resolution: {integrity: sha512-ThVF6DCVhA8kUGy+aazFQ4kXQ7E1Ty7A3ypFOe0IcJV8O/M511G99AW24irKrW56Wt44yG9+ij8FaqoBGkuBXg==}
+
   '@vercel/oidc@3.1.0':
     resolution: {integrity: sha512-Fw28YZpRnA3cAHHDlkt7xQHiJ0fcL+NRcIqsocZQUSmbzeIKRpwttJjik5ZGanXP+vlA4SbTg+AbA3bP363l+w==}
     engines: {node: '>= 20'}
@@ -974,6 +1055,10 @@ packages:
   '@vitest/utils@4.1.2':
     resolution: {integrity: sha512-xw2/TiX82lQHA06cgbqRKFb5lCAy3axQ4H4SoUFhUsg+wztiet+co86IAMDtF6Vm1hc7J6j09oh/rgDn+JdKIQ==}
 
+  accepts@2.0.0:
+    resolution: {integrity: sha512-5cvg6CtKwfgdmVqY1WIiXKc3Q1bkRqGLi+2W/6ao+6Y7gu/RCwRuAhGEzh5B4KlszSuTLgZYuqFqo5bImjNKng==}
+    engines: {node: '>= 0.6'}
+
   acorn@8.15.0:
     resolution: {integrity: sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==}
     engines: {node: '>=0.4.0'}
@@ -1052,6 +1137,10 @@ packages:
   binary-search@1.3.6:
     resolution: {integrity: sha512-nbE1WxOTTrUWIfsfZ4aHGYu5DOuNkbxGokjV6Z2kxfJK3uaAb8zNK1muzOeipoLHZjInT4Br88BHpzevc681xA==}
 
+  body-parser@2.2.2:
+    resolution: {integrity: sha512-oP5VkATKlNwcgvxi0vM0p/D3n2C3EReYVX+DNYs5TjZFn/oQt2j+4sVJtSMr18pdRr8wjTcBl6LoV+FUwzPmNA==}
+    engines: {node: '>=18'}
+
   bowser@2.14.1:
     resolution: {integrity: sha512-tzPjzCxygAKWFOJP011oxFHs57HzIhOEracIgAePE4pqB3LikALKnSzUyU4MGs9/iCEUuHlAJTjTc5M+u7YEGg==}
 
@@ -1071,10 +1160,22 @@ packages:
     peerDependencies:
       esbuild: '>=0.18'
 
+  bytes@3.1.2:
+    resolution: {integrity: sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg==}
+    engines: {node: '>= 0.8'}
+
   cac@6.7.14:
     resolution: {integrity: sha512-b6Ilus+c3RrdDk+JhLKUAQfzzgLEPy6wcXqS7f/xe1EETvsDP6GORG7SFuOs6cID5YkqchW/LXZbX5bc8j7ZcQ==}
     engines: {node: '>=8'}
 
+  call-bind-apply-helpers@1.0.2:
+    resolution: {integrity: sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==}
+    engines: {node: '>= 0.4'}
+
+  call-bound@1.0.4:
+    resolution: {integrity: sha512-+ys997U96po4Kx/ABpBCqhA9EuxJaQWDQg7295H4hBphv3IZg0boBKuwYpt4YXp6MZ5AmZQnU/tyMTlRpaSejg==}
+    engines: {node: '>= 0.4'}
+
   chai@6.2.2:
     resolution: {integrity: sha512-NUPRluOfOiTKBKvWPtSD4PhFvWCqOi0BGStNWs57X9js7XGTprSmFoz5F0tWhR4WPjNeR9jXqdC7/UpSJTnlRg==}
     engines: {node: '>=18'}
@@ -1136,9 +1237,29 @@ packages:
     resolution: {integrity: sha512-5IKcdX0nnYavi6G7TtOhwkYzyjfJlatbjMjuLSfE2kYT5pMDOilZ4OvMhi637CcDICTmz3wARPoyhqyX1Y+XvA==}
     engines: {node: ^14.18.0 || >=16.10.0}
 
+  content-disposition@1.1.0:
+    resolution: {integrity: sha512-5jRCH9Z/+DRP7rkvY83B+yGIGX96OYdJmzngqnw2SBSxqCFPd0w2km3s5iawpGX8krnwSGmF0FW5Nhr0Hfai3g==}
+    engines: {node: '>=18'}
+
+  content-type@1.0.5:
+    resolution: {integrity: sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA==}
+    engines: {node: '>= 0.6'}
+
   convert-source-map@2.0.0:
     resolution: {integrity: sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==}
 
+  cookie-signature@1.2.2:
+    resolution: {integrity: sha512-D76uU73ulSXrD1UXF4KE2TMxVVwhsnCgfAyTg9k8P6KGZjlXKrOLe4dJQKI3Bxi5wjesZoFXJWElNWBjPZMbhg==}
+    engines: {node: '>=6.6.0'}
+
+  cookie@0.7.2:
+    resolution: {integrity: sha512-yki5XnKuf750l50uGTllt6kKILY4nQ1eNIQatoXEByZ5dWgnKqbnqmTrBE5B4N7lrMJKQ2ytWMiTO2o0v6Ew/w==}
+    engines: {node: '>= 0.6'}
+
+  cors@2.8.6:
+    resolution: {integrity: sha512-tJtZBBHA6vjIAaF6EnIaq6laBBP9aq/Y3ouVJjEfoHbRBcHBAHYcMh/w8LDrk2PvIMMq8gmopa5D4V8RmbrxGw==}
+    engines: {node: '>= 0.10'}
+
   cross-spawn@7.0.6:
     resolution: {integrity: sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==}
     engines: {node: '>= 8'}
@@ -1160,10 +1281,23 @@ packages:
       supports-color:
         optional: true
 
+  debug@4.4.3:
+    resolution: {integrity: sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==}
+    engines: {node: '>=6.0'}
+    peerDependencies:
+      supports-color: '*'
+    peerDependenciesMeta:
+      supports-color:
+        optional: true
+
   degenerator@5.0.1:
     resolution: {integrity: sha512-TllpMR/t0M5sqCXfj85i4XaAzxmS5tVA16dqvdkMwGmzI+dXLXnw3J+3Vdv7VKw+ThlTMboK6i9rnZ6Nntj5CQ==}
     engines: {node: '>= 14'}
 
+  depd@2.0.0:
+    resolution: {integrity: sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==}
+    engines: {node: '>= 0.8'}
+
   dotenv-cli@8.0.0:
     resolution: {integrity: sha512-aLqYbK7xKOiTMIRf1lDPbI+Y+Ip/wo5k3eyp6ePysVaSqbyxjyK3dK35BTxG+rmd7djf5q2UPs4noPNH+cj0Qw==}
     hasBin: true
@@ -1176,12 +1310,19 @@ packages:
     resolution: {integrity: sha512-uBq4egWHTcTt33a72vpSG0z3HnPuIl6NqYcTrKEg2azoEyl2hpW0zqlxysq2pK9HlDIHyHyakeYaYnSAwd8bow==}
     engines: {node: '>=12'}
 
+  dunder-proto@1.0.1:
+    resolution: {integrity: sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==}
+    engines: {node: '>= 0.4'}
+
   eastasianwidth@0.2.0:
     resolution: {integrity: sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA==}
 
   ecdsa-sig-formatter@1.0.11:
     resolution: {integrity: sha512-nagl3RYrbNv6kQkeJIpt6NJZy8twLB/2vtz6yN9Z4vRKHN4/QZJIEbqohALSgwKdnksuY3k5Addp5lg8sVoVcQ==}
 
+  ee-first@1.1.1:
+    resolution: {integrity: sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==}
+
   emoji-regex@10.4.0:
     resolution: {integrity: sha512-EC+0oUMY1Rqm4O6LLrgjtYDvcVYTy7chDnM4Q7030tP4Kwj3u/pR6gP9ygnp2CJMK5Gq+9Q2oqmrFJAz01DXjw==}
 
@@ -1191,18 +1332,37 @@ packages:
   emoji-regex@9.2.2:
     resolution: {integrity: sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==}
 
+  encodeurl@2.0.0:
+    resolution: {integrity: sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg==}
+    engines: {node: '>= 0.8'}
+
   environment@1.1.0:
     resolution: {integrity: sha512-xUtoPkMggbz0MPyPiIWr1Kp4aeWJjDZ6SMvURhimjdZgsRuDplF5/s9hcgGhyXMhs+6vpnuoiZ2kFiu3FMnS8Q==}
     engines: {node: '>=18'}
 
+  es-define-property@1.0.1:
+    resolution: {integrity: sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==}
+    engines: {node: '>= 0.4'}
+
+  es-errors@1.3.0:
+    resolution: {integrity: sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==}
+    engines: {node: '>= 0.4'}
+
   es-module-lexer@2.0.0:
     resolution: {integrity: sha512-5POEcUuZybH7IdmGsD8wlf0AI55wMecM9rVBTI/qEAy2c1kTOm3DjFYjrBdI2K3BaJjJYfYFeRtM0t9ssnRuxw==}
 
+  es-object-atoms@1.1.1:
+    resolution: {integrity: sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==}
+    engines: {node: '>= 0.4'}
+
   esbuild@0.25.8:
     resolution: {integrity: sha512-vVC0USHGtMi8+R4Kz8rt6JhEWLxsv9Rnu/lGYbPR8u47B+DCBksq9JarW0zOO7bs37hyOK1l2/oqtbciutL5+Q==}
     engines: {node: '>=18'}
     hasBin: true
 
+  escape-html@1.0.3:
+    resolution: {integrity: sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow==}
+
   escodegen@2.1.0:
     resolution: {integrity: sha512-2NlIDTwUWJN0mRPQOdtQBzbUHvdGY2P1VXSyU83Q3xKxM7WHX2Ql8dKq782Q9TgQUNOLEzEYu9bzLNj1q88I5w==}
     engines: {node: '>=6.0'}
@@ -1224,6 +1384,10 @@ packages:
     resolution: {integrity: sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==}
     engines: {node: '>=0.10.0'}
 
+  etag@1.8.1:
+    resolution: {integrity: sha512-aIL5Fx7mawVa300al2BnEE4iNvo1qETxLrPI/o05L7z6go7fCw1J6EQmbK4FmJ2AS7kgVF/KEZWufBfdClMcPg==}
+    engines: {node: '>= 0.6'}
+
   eventemitter3@5.0.1:
     resolution: {integrity: sha512-GWkBvjiSZK87ELrYOSESUYeVIc9mvLLf/nXalMOS5dYrgZq9o5OVkbZAVM06CVxYsCwH9BDZFPlQTlPA1j4ahA==}
 
@@ -1231,6 +1395,10 @@ packages:
     resolution: {integrity: sha512-Vo1ab+QXPzZ4tCa8SwIHJFaSzy4R6SHf7BY79rFBDf0idraZWAkYrDjDj8uWaSm3S2TK+hJ7/t1CEmZ7jXw+pg==}
     engines: {node: '>=18.0.0'}
 
+  eventsource@3.0.7:
+    resolution: {integrity: sha512-CRT1WTyuQoD771GW56XEZFQ/ZoSfWid1alKGDYMmkt2yl8UXrVR4pspqWNEcqKvVIzg6PAltWjxcSSPrboA4iA==}
+    engines: {node: '>=18.0.0'}
+
   execa@8.0.1:
     resolution: {integrity: sha512-VyhnebXciFV2DESc+p6B+y0LjSm0krU4OgJN44qFAhBY0TJ+1V61tYD2+wHusZ6F9n5K+vl8k0sTy7PEfV4qpg==}
     engines: {node: '>=16.17'}
@@ -1239,6 +1407,16 @@ packages:
     resolution: {integrity: sha512-knvyeauYhqjOYvQ66MznSMs83wmHrCycNEN6Ao+2AeYEfxUIkuiVxdEa1qlGEPK+We3n0THiDciYSsCcgW/DoA==}
     engines: {node: '>=12.0.0'}
 
+  express-rate-limit@8.4.1:
+    resolution: {integrity: sha512-NGVYwQSAyEQgzxX1iCM978PP9AdO/hW93gMcF6ZwQCm+rFvLsBH6w4xcXWTcliS8La5EPRN3p9wzItqBwJrfNw==}
+    engines: {node: '>= 16'}
+    peerDependencies:
+      express: '>= 4.11'
+
+  express@5.2.1:
+    resolution: {integrity: sha512-hIS4idWWai69NezIdRt2xFVofaF4j+6INOpJlVOLDO8zXGpUVEVzIYk12UUi2JzjEzWL3IOAxcTubgz9Po0yXw==}
+    engines: {node: '>= 18'}
+
   extend@3.0.2:
     resolution: {integrity: sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==}
 
@@ -1283,6 +1461,10 @@ packages:
     resolution: {integrity: sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==}
     engines: {node: '>=8'}
 
+  finalhandler@2.1.1:
+    resolution: {integrity: sha512-S8KoZgRZN+a5rNwqTxlZZePjT/4cnm0ROV70LedRHZ0p8u9fRID0hJUZQpkKLzro8LfmC8sx23bY6tVNxv8pQA==}
+    engines: {node: '>= 18.0.0'}
+
   fix-dts-default-cjs-exports@1.0.1:
     resolution: {integrity: sha512-pVIECanWFC61Hzl2+oOCtoJ3F17kglZC/6N94eRWycFgBH35hHx0Li604ZIzhseh97mf2p0cv7vVrOZGoqhlEg==}
 
@@ -1294,11 +1476,22 @@ packages:
     resolution: {integrity: sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g==}
     engines: {node: '>=12.20.0'}
 
+  forwarded@0.2.0:
+    resolution: {integrity: sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow==}
+    engines: {node: '>= 0.6'}
+
+  fresh@2.0.0:
+    resolution: {integrity: sha512-Rx/WycZ60HOaqLKAi6cHRKKI7zxWbJ31MhntmtwMoaTeF7XFH9hhBp8vITaMidfljRQ6eYWCKkaTK+ykVJHP2A==}
+    engines: {node: '>= 0.8'}
+
   fsevents@2.3.3:
     resolution: {integrity: sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==}
     engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0}
     os: [darwin]
 
+  function-bind@1.1.2:
+    resolution: {integrity: sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==}
+
   gaxios@7.1.4:
     resolution: {integrity: sha512-bTIgTsM2bWn3XklZISBTQX7ZSddGW+IO3bMdGaemHZ3tbqExMENHLx6kKZ/KlejgrMtj8q7wBItt51yegqalrA==}
     engines: {node: '>=18'}
@@ -1311,6 +1504,14 @@ packages:
     resolution: {integrity: sha512-vpeMIQKxczTD/0s2CdEWHcb0eeJe6TFjxb+J5xgX7hScxqrGuyjmv4c1D4A/gelKfyox0gJJwIHF+fLjeaM8kQ==}
     engines: {node: '>=18'}
 
+  get-intrinsic@1.3.0:
+    resolution: {integrity: sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==}
+    engines: {node: '>= 0.4'}
+
+  get-proto@1.0.1:
+    resolution: {integrity: sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==}
+    engines: {node: '>= 0.4'}
+
   get-stream@8.0.1:
     resolution: {integrity: sha512-VaUJspBffn/LMCJVoMvSAdmscJyS1auj5Zulnn5UoYcY531UWmdwhRWkcGKnGU93m5HSXP9LP2usOryrBtQowA==}
     engines: {node: '>=16'}
@@ -1335,13 +1536,33 @@ packages:
     resolution: {integrity: sha512-eAmLkjDjAFCVXg7A1unxHsLf961m6y17QFqXqAXGj/gVkKFrEICfStRfwUlGNfeCEjNRa32JEWOUTlYXPyyKvA==}
     engines: {node: '>=14'}
 
+  gopd@1.2.0:
+    resolution: {integrity: sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==}
+    engines: {node: '>= 0.4'}
+
   has-flag@4.0.0:
     resolution: {integrity: sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==}
     engines: {node: '>=8'}
 
+  has-symbols@1.1.0:
+    resolution: {integrity: sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==}
+    engines: {node: '>= 0.4'}
+
+  hasown@2.0.3:
+    resolution: {integrity: sha512-ej4AhfhfL2Q2zpMmLo7U1Uv9+PyhIZpgQLGT1F9miIGmiCJIoCgSmczFdrc97mWT4kVY72KA+WnnhJ5pghSvSg==}
+    engines: {node: '>= 0.4'}
+
+  hono@4.12.16:
+    resolution: {integrity: sha512-jN0ZewiNAWSe5khM3EyCmBb250+b40wWbwNILNfEvq84VREWwOIkuUsFONk/3i3nqkz7Oe1PcpM2mwQEK2L9Kg==}
+    engines: {node: '>=16.9.0'}
+
   html-escaper@2.0.2:
     resolution: {integrity: sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==}
 
+  http-errors@2.0.1:
+    resolution: {integrity: sha512-4FbRdAX+bSdmo4AUFuS0WNiPz8NgFt+r8ThgNWmlrjQjt1Q7ZR9+zTlce2859x4KSXrwIsaeTqDoKQmtP8pLmQ==}
+    engines: {node: '>= 0.8'}
+
   http-proxy-agent@7.0.2:
     resolution: {integrity: sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==}
     engines: {node: '>= 14'}
@@ -1354,6 +1575,13 @@ packages:
     resolution: {integrity: sha512-AXcZb6vzzrFAUE61HnN4mpLqd/cSIwNQjtNWR0euPm6y0iqx3G4gOXaIDdtdDwZmhwe82LA6+zinmW4UBWVePQ==}
     engines: {node: '>=16.17.0'}
 
+  iconv-lite@0.7.2:
+    resolution: {integrity: sha512-im9DjEDQ55s9fL4EYzOAv0yMqmMBSZp6G0VvFyTMPKWxiSBHUj9NW/qqLmXUwXrrM7AvqSlTCfvqRb0cM8yYqw==}
+    engines: {node: '>=0.10.0'}
+
+  inherits@2.0.4:
+    resolution: {integrity: sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==}
+
   install@0.13.0:
     resolution: {integrity: sha512-zDml/jzr2PKU9I8J/xyZBQn8rPCAY//UOYNmR01XwNwyfhEWObo2SWfSl1+0tm1u6PhxLwDnfsT/6jB7OUxqFA==}
     engines: {node: '>= 0.10'}
@@ -1362,6 +1590,10 @@ packages:
     resolution: {integrity: sha512-XXADHxXmvT9+CRxhXg56LJovE+bmWnEWB78LB83VZTprKTmaC5QfruXocxzTZ2Kl0DNwKuBdlIhjL8LeY8Sf8Q==}
     engines: {node: '>= 12'}
 
+  ipaddr.js@1.9.1:
+    resolution: {integrity: sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==}
+    engines: {node: '>= 0.10'}
+
   is-any-array@2.0.1:
     resolution: {integrity: sha512-UtilS7hLRu++wb/WBAw9bNuP1Eg04Ivn1vERJck8zJthEvXCBEBpGR/33u/xLKWEQf95803oalHrVDptcAvFdQ==}
 
@@ -1381,6 +1613,9 @@ packages:
     resolution: {integrity: sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==}
     engines: {node: '>=0.12.0'}
 
+  is-promise@4.0.0:
+    resolution: {integrity: sha512-hvpoI6korhJMnej285dSg6nu1+e6uxs7zG3BYAm5byqDsgJNWwxzM6z6iZiAgQR4TJ30JmBTOwqZUw3WlyH3AQ==}
+
   is-stream@3.0.0:
     resolution: {integrity: sha512-LnQR4bZ9IADDRSkvpqMGvt/tEJWclzklNgSw48V5EAaAeDd6qGvN8ei6k5p0tvxSR171VmGyHuTiAOfxAbr8kA==}
     engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0}
@@ -1403,6 +1638,9 @@ packages:
   jackspeak@3.4.3:
     resolution: {integrity: sha512-OGlZQpz2yfahA/Rd1Y8Cd9SIEsqvXkLVoSw/cgwhnhFMDbsQFeZYoJJ7bIZBS9BcamUW96asq/npPWugM+RQBw==}
 
+  jose@6.2.3:
+    resolution: {integrity: sha512-YYVDInQKFJfR/xa3ojUTl8c2KoTwiL1R5Wg9YCydwH0x0B9grbzlg5HC7mMjCtUJjbQ/YnGEZIhI5tCgfTb4Hw==}
+
   joycon@3.1.1:
     resolution: {integrity: sha512-34wB/Y7MW7bzjKRjUKTa46I2Z7eV62Rkhva+KkopW7Qvv/OSWBqvkSY7vusOPrNuZcUG3tApvdVgNB8POj3SPw==}
     engines: {node: '>=10'}
@@ -1428,6 +1666,9 @@ packages:
   json-schema-traverse@1.0.0:
     resolution: {integrity: sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==}
 
+  json-schema-typed@8.0.2:
+    resolution: {integrity: sha512-fQhoXdcvc3V28x7C7BMs4P5+kNlgUURe2jmUT1T//oBRMDrqy1QPelJimwZGo7Hg9VPV3EQV5Bnq4hbFy2vetA==}
+
   json-schema@0.4.0:
     resolution: {integrity: sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA==}
 
@@ -1490,6 +1731,18 @@ packages:
     resolution: {integrity: sha512-hXdUTZYIVOt1Ex//jAQi+wTZZpUpwBj/0QsOzqegb3rGMMeJiSEu5xLHnYfBrRV4RH2+OCSOO95Is/7x1WJ4bw==}
     engines: {node: '>=10'}
 
+  math-intrinsics@1.1.0:
+    resolution: {integrity: sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==}
+    engines: {node: '>= 0.4'}
+
+  media-typer@1.1.0:
+    resolution: {integrity: sha512-aisnrDP4GNe06UcKFnV5bfMNPBUw4jsLGaWwWfnH3v02GnBuXX2MCVn5RbrWo0j3pczUilYblq7fQ7Nw2t5XKw==}
+    engines: {node: '>= 0.8'}
+
+  merge-descriptors@2.0.0:
+    resolution: {integrity: sha512-Snk314V5ayFLhp3fkUREub6WtjBfPdCPY1Ln8/8munuLuiYhsABgBVWsozAG+MWMbVEvcdcpbi9R7ww22l9Q3g==}
+    engines: {node: '>=18'}
+
   merge-stream@2.0.0:
     resolution: {integrity: sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w==}
 
@@ -1497,6 +1750,14 @@ packages:
     resolution: {integrity: sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==}
     engines: {node: '>=8.6'}
 
+  mime-db@1.54.0:
+    resolution: {integrity: sha512-aU5EJuIN2WDemCcAp2vFBfp/m4EAhWJnUNSSw0ixs7/kXbd6Pg64EmwJkNdFhB8aWt1sH2CTXrLxo/iAGV3oPQ==}
+    engines: {node: '>= 0.6'}
+
+  mime-types@3.0.2:
+    resolution: {integrity: sha512-Lbgzdk0h4juoQ9fCKXW4by0UJqj+nOOrI9MJ1sSj4nI8aI2eo1qmvQEie4VD1glsS250n15LsWsYtCugiStS5A==}
+    engines: {node: '>=18'}
+
   mimic-fn@4.0.0:
     resolution: {integrity: sha512-vqiC06CuhBTUdZH+RYl8sFrL096vA45Ok5ISO6sE/Mr1jRbGH4Csnhi8f3wKVl7x8mO4Au7Ir9D3Oyv1VYMFJw==}
     engines: {node: '>=12'}
@@ -1552,6 +1813,10 @@ packages:
     engines: {node: ^10 || ^12 || ^13.7 || ^14 || >=15.0.1}
     hasBin: true
 
+  negotiator@1.0.0:
+    resolution: {integrity: sha512-8Ofs/AUQh8MaEcrlq5xOX0CQ9ypTF5dl78mjlMNfOK08fzpgTHQRQPBxcPlEtIw0yRpws+Zo/3r+5WRby7u3Gg==}
+    engines: {node: '>= 0.6'}
+
   netmask@2.1.1:
     resolution: {integrity: sha512-eonl3sLUha+S1GzTPxychyhnUzKyeQkZ7jLjKrBagJgPla13F+uQ71HgpFefyHgqrjEbCPkDArxYsjY8/+gLKA==}
     engines: {node: '>= 0.4.0'}
@@ -1573,9 +1838,20 @@ packages:
     resolution: {integrity: sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==}
     engines: {node: '>=0.10.0'}
 
+  object-inspect@1.13.4:
+    resolution: {integrity: sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew==}
+    engines: {node: '>= 0.4'}
+
   obug@2.1.1:
     resolution: {integrity: sha512-uTqF9MuPraAQ+IsnPf366RG4cP9RtUi7MLO1N3KEc+wb0a6yKpeL0lmk2IB1jY5KHPAlTc6T/JRdC/YqxHNwkQ==}
 
+  on-finished@2.4.1:
+    resolution: {integrity: sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg==}
+    engines: {node: '>= 0.8'}
+
+  once@1.4.0:
+    resolution: {integrity: sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==}
+
   onetime@6.0.0:
     resolution: {integrity: sha512-1FlR+gjXK7X+AsAHso35MnyN5KqGwJRi/31ft6x0M194ht7S+rWAvd7PHss9xSKMzE0asv1pyIHaJYq+BbacAQ==}
     engines: {node: '>=12'}
@@ -1623,6 +1899,10 @@ packages:
   package-json-from-dist@1.0.1:
     resolution: {integrity: sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw==}
 
+  parseurl@1.3.3:
+    resolution: {integrity: sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==}
+    engines: {node: '>= 0.8'}
+
   partial-json@0.1.7:
     resolution: {integrity: sha512-Njv/59hHaokb/hRUjce3Hdv12wd60MtM9Z5Olmn+nehe0QDAsRtRbJPvJ0Z91TusF0SuZRIvnM+S4l6EIP8leA==}
 
@@ -1642,6 +1922,9 @@ packages:
     resolution: {integrity: sha512-Xa4Nw17FS9ApQFJ9umLiJS4orGjm7ZzwUrwamcGQuHSzDyth9boKDaycYdDcZDuqYATXw4HFXgaqWTctW/v1HA==}
     engines: {node: '>=16 || 14 >=14.18'}
 
+  path-to-regexp@8.4.2:
+    resolution: {integrity: sha512-qRcuIdP69NPm4qbACK+aDogI5CBDMi1jKe0ry5rSQJz8JVLsC7jV8XpiJjGRLLol3N+R5ihGYcrPLTno6pAdBA==}
+
   pathe@2.0.3:
     resolution: {integrity: sha512-WUjGcAqP1gQacoQe+OBJsFA7Ld4DyXuUIjZ5cc75cLHvJ7dtNsTugphxIADwspS+AraAUePCKrSVtPLFj/F88w==}
 
@@ -1665,6 +1948,10 @@ packages:
     resolution: {integrity: sha512-TfySrs/5nm8fQJDcBDuUng3VOUKsd7S+zqvbOTiGXHfxX4wK31ard+hoNuvkicM/2YFzlpDgABOevKSsB4G/FA==}
     engines: {node: '>= 6'}
 
+  pkce-challenge@5.0.1:
+    resolution: {integrity: sha512-wQ0b/W4Fr01qtpHlqSqspcj3EhBvimsdh0KlHhH8HRZnMsEa0ea2fTULOXOS9ccQr3om+GcGRk4e+isrZWV8qQ==}
+    engines: {node: '>=16.20.0'}
+
   pkg-types@1.3.1:
     resolution: {integrity: sha512-/Jm5M4RvtBFVkKWRu2BLUTNP8/M2a+UwuAX+ae4770q1qVGtfjG+WTCupoZixokjmHiry8uI+dlY8KXYV5HVVQ==}
 
@@ -1694,6 +1981,10 @@ packages:
     resolution: {integrity: sha512-3wY1AxV+VBNW8Yypfd1yQY9pXnqTAN+KwQxL8iYm3/BjKYMNg4i0owhEe26PWDOMaIrzeeF98Lqd5NGz4omiIg==}
     engines: {node: '>=12.0.0'}
 
+  proxy-addr@2.0.7:
+    resolution: {integrity: sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg==}
+    engines: {node: '>= 0.10'}
+
   proxy-agent@6.5.0:
     resolution: {integrity: sha512-TmatMXdr2KlRiA2CyDu8GqR8EjahTG3aY3nXjdzFyoZbmB8hrBsTyMezhULIXKnC0jpfjlmiZ3+EaCzoInSu/A==}
     engines: {node: '>= 14'}
@@ -1705,6 +1996,18 @@ packages:
     resolution: {integrity: sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==}
     engines: {node: '>=6'}
 
+  qs@6.15.1:
+    resolution: {integrity: sha512-6YHEFRL9mfgcAvql/XhwTvf5jKcOiiupt2FiJxHkiX1z4j7WL8J/jRHYLluORvc1XxB5rV20KoeK00gVJamspg==}
+    engines: {node: '>=0.6'}
+
+  range-parser@1.2.1:
+    resolution: {integrity: sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==}
+    engines: {node: '>= 0.6'}
+
+  raw-body@3.0.2:
+    resolution: {integrity: sha512-K5zQjDllxWkf7Z5xJdV0/B0WTNqx6vxG70zJE4N0kBs4LovmEYWJzQGxC9bS9RAKu3bgM40lrd5zoLJ12MQ5BA==}
+    engines: {node: '>= 0.10'}
+
   readdirp@4.1.2:
     resolution: {integrity: sha512-GDhwkLfywWL2s6vEjyhri+eXmfH6j1L7JE27WhqLeYzoh/A3DBaYGEj2H/HFZCn/kMfim73FXxEJTw06WtxQwg==}
     engines: {node: '>= 14.18.0'}
@@ -1733,14 +2036,32 @@ packages:
     engines: {node: '>=18.0.0', npm: '>=8.0.0'}
     hasBin: true
 
+  router@2.2.0:
+    resolution: {integrity: sha512-nLTrUKm2UyiL7rlhapu/Zl45FwNgkZGaCpZbIHajDYgwlJCOzLSk+cIPAnsEqV955GjILJnKbdQC1nVPz+gAYQ==}
+    engines: {node: '>= 18'}
+
   safe-buffer@5.2.1:
     resolution: {integrity: sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==}
 
+  safer-buffer@2.1.2:
+    resolution: {integrity: sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==}
+
   semver@7.7.2:
     resolution: {integrity: sha512-RF0Fw+rO5AMf9MAyaRXI4AV0Ulj5lMHqVxxdSgiVbixSCXoEmmX/jk0CuJw4+3SqroYO9VoUh+HcuJivvtJemA==}
     engines: {node: '>=10'}
     hasBin: true
 
+  send@1.2.1:
+    resolution: {integrity: sha512-1gnZf7DFcoIcajTjTwjwuDjzuz4PPcY2StKPlsGAQ1+YH20IRVrBaXSWmdjowTJ6u8Rc01PoYOGHXfP1mYcZNQ==}
+    engines: {node: '>= 18'}
+
+  serve-static@2.2.1:
+    resolution: {integrity: sha512-xRXBn0pPqQTVQiC8wyQrKs2MOlX24zQ0POGaj0kultvoOCstBQM5yvOhAVSUwOMjQtTvsPWoNCHfPGwaaQJhTw==}
+    engines: {node: '>= 18'}
+
+  setprototypeof@1.2.0:
+    resolution: {integrity: sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==}
+
   shebang-command@2.0.0:
     resolution: {integrity: sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==}
     engines: {node: '>=8'}
@@ -1749,6 +2070,22 @@ packages:
     resolution: {integrity: sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==}
     engines: {node: '>=8'}
 
+  side-channel-list@1.0.1:
+    resolution: {integrity: sha512-mjn/0bi/oUURjc5Xl7IaWi/OJJJumuoJFQJfDDyO46+hBWsfaVM65TBHq2eoZBhzl9EchxOijpkbRC8SVBQU0w==}
+    engines: {node: '>= 0.4'}
+
+  side-channel-map@1.0.1:
+    resolution: {integrity: sha512-VCjCNfgMsby3tTdo02nbjtM/ewra6jPHmpThenkTYh8pG9ucZ/1P8So4u4FGBek/BjpOVsDCMoLA/iuBKIFXRA==}
+    engines: {node: '>= 0.4'}
+
+  side-channel-weakmap@1.0.2:
+    resolution: {integrity: sha512-WPS/HvHQTYnHisLo9McqBHOJk2FkHO/tlpvldyrnem4aeQp4hai3gythswg6p01oSoTl58rcpiFAjF2br2Ak2A==}
+    engines: {node: '>= 0.4'}
+
+  side-channel@1.1.0:
+    resolution: {integrity: sha512-ZX99e6tRweoUXqR+VBrslhda51Nh5MTQwou5tnUDgbtyM0dBgmhEDtWGP/xbKn6hqfPRHujUNwz5fy/wbbhnpw==}
+    engines: {node: '>= 0.4'}
+
   siginfo@2.0.0:
     resolution: {integrity: sha512-ybx0WO1/8bSBLEWXZvEd7gMW3Sn3JFlW3TvX1nREbDLRNQNaeNN8WK0meBwPdAaOI7TtRRRJn/Es1zhrrCHu7g==}
 
@@ -1796,6 +2133,10 @@ packages:
   stackback@0.0.2:
     resolution: {integrity: sha512-1XMJE5fQo1jGH6Y/7ebnwPOBEkIEnT4QF32d5R1+VXdXveM0IBMJt8zfaxX1P3QhVwrYe+576+jkANtSS2mBbw==}
 
+  statuses@2.0.2:
+    resolution: {integrity: sha512-DvEy55V3DB7uknRo+4iOGT5fP1slR8wQohVdknigZPMpMstaKJQWhwiYBACJE3Ul2pTnATihhBYnRhZQHGBiRw==}
+    engines: {node: '>= 0.8'}
+
   std-env@4.0.0:
     resolution: {integrity: sha512-zUMPtQ/HBY3/50VbpkupYHbRroTRZJPRLvreamgErJVys0ceuzMkD44J/QjqhHjOzK42GQ3QZIeFG1OYfOtKqQ==}
 
@@ -1872,6 +2213,10 @@ packages:
     resolution: {integrity: sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==}
     engines: {node: '>=8.0'}
 
+  toidentifier@1.0.1:
+    resolution: {integrity: sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==}
+    engines: {node: '>=0.6'}
+
   tr46@1.0.1:
     resolution: {integrity: sha512-dTpowEjclQ7Kgx5SdBkqRzVhERQXov8/l9Ft9dVM9fmg0W0KQSVaXX9T4i6twCPNtYiZM53lpSSUAwJbFPOHxA==}
 
@@ -1917,6 +2262,10 @@ packages:
       typescript:
         optional: true
 
+  type-is@2.0.1:
+    resolution: {integrity: sha512-OZs6gsjF4vMp32qrCbiVSkrFmXtG/AZhY3t0iAMrMBiAZyV9oALtXO8hsrHbMXF9x6L3grlFuwW2oAz7cav+Gw==}
+    engines: {node: '>= 0.6'}
+
   typescript@5.8.3:
     resolution: {integrity: sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==}
     engines: {node: '>=14.17'}
@@ -1932,12 +2281,20 @@ packages:
     resolution: {integrity: sha512-xXnp4kTyor2Zq+J1FfPI6Eq3ew5h6Vl0F/8d9XU5zZQf1tX9s2Su1/3PiMmUANFULpmksxkClamIZcaUqryHsQ==}
     engines: {node: '>=20.18.1'}
 
+  unpipe@1.0.0:
+    resolution: {integrity: sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ==}
+    engines: {node: '>= 0.8'}
+
   validate.io-array@1.0.6:
     resolution: {integrity: sha512-DeOy7CnPEziggrOO5CZhVKJw6S3Yi7e9e65R1Nl/RTN1vTQKnzjfvks0/8kQ40FP/dsjRAOd4hxmJ7uLa6vxkg==}
 
   validate.io-function@1.0.2:
     resolution: {integrity: sha512-LlFybRJEriSuBnUhQyG5bwglhh50EpTL2ul23MPIuR1odjO7XaMLFV8vHGwp7AZciFxtYOeiSCT5st+XSPONiQ==}
 
+  vary@1.1.2:
+    resolution: {integrity: sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg==}
+    engines: {node: '>= 0.8'}
+
   vite-tsconfig-paths@6.1.1:
     resolution: {integrity: sha512-2cihq7zliibCCZ8P9cKJrQBkfgdvcFkOOc3Y02o3GWUDLgqjWsZudaoiuOwO/gzTzy17cS5F7ZPo4bsnS4DGkg==}
     peerDependencies:
@@ -2050,6 +2407,9 @@ packages:
     resolution: {integrity: sha512-G8ura3S+3Z2G+mkgNRq8dqaFZAuxfsxpBB8OCTGRTCtp+l/v9nbFNmCUP1BZMts3G1142MsZfn6eeUKrr4PD1Q==}
     engines: {node: '>=18'}
 
+  wrappy@1.0.2:
+    resolution: {integrity: sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==}
+
   ws@8.20.0:
     resolution: {integrity: sha512-sAt8BhgNbzCtgGbt2OxmpuryO63ZoDk/sqaB/znQm94T4fCEsy/yV+7CdC1kJhOU9lboAEU7R3kquuycDoibVA==}
     engines: {node: '>=10.0.0'}
@@ -2634,17 +2994,24 @@ snapshots:
   '@esbuild/win32-x64@0.25.8':
     optional: true
 
-  '@google/genai@1.50.1':
+  '@google/genai@1.50.1(@modelcontextprotocol/sdk@1.29.0(zod@4.3.6))':
     dependencies:
       google-auth-library: 10.6.2
       p-retry: 4.6.2
       protobufjs: 7.5.5
       ws: 8.20.0
+    optionalDependencies:
+      '@modelcontextprotocol/sdk': 1.29.0(zod@4.3.6)
     transitivePeerDependencies:
       - bufferutil
       - supports-color
       - utf-8-validate
 
+  '@hono/node-server@1.19.14(hono@4.12.16)':
+    dependencies:
+      hono: 4.12.16
+    optional: true
+
   '@isaacs/cliui@8.0.2':
     dependencies:
       string-width: 5.1.2
@@ -2675,9 +3042,9 @@ snapshots:
       '@jridgewell/resolve-uri': 3.1.2
       '@jridgewell/sourcemap-codec': 1.5.4
 
-  '@mariozechner/pi-agent-core@0.67.68(ws@8.20.0)(zod@4.3.6)':
+  '@mariozechner/pi-agent-core@0.67.68(@modelcontextprotocol/sdk@1.29.0(zod@4.3.6))(ws@8.20.0)(zod@4.3.6)':
     dependencies:
-      '@mariozechner/pi-ai': 0.67.68(ws@8.20.0)(zod@4.3.6)
+      '@mariozechner/pi-ai': 0.67.68(@modelcontextprotocol/sdk@1.29.0(zod@4.3.6))(ws@8.20.0)(zod@4.3.6)
     transitivePeerDependencies:
       - '@modelcontextprotocol/sdk'
       - aws-crt
@@ -2687,11 +3054,11 @@ snapshots:
       - ws
       - zod
 
-  '@mariozechner/pi-ai@0.67.68(ws@8.20.0)(zod@4.3.6)':
+  '@mariozechner/pi-ai@0.67.68(@modelcontextprotocol/sdk@1.29.0(zod@4.3.6))(ws@8.20.0)(zod@4.3.6)':
     dependencies:
       '@anthropic-ai/sdk': 0.90.0(zod@4.3.6)
       '@aws-sdk/client-bedrock-runtime': 3.1032.0
-      '@google/genai': 1.50.1
+      '@google/genai': 1.50.1(@modelcontextprotocol/sdk@1.29.0(zod@4.3.6))
       '@mistralai/mistralai': 2.2.0
       '@sinclair/typebox': 0.34.49
       ajv: 8.17.1
@@ -2720,6 +3087,80 @@ snapshots:
       - bufferutil
       - utf-8-validate
 
+  '@modelcontextprotocol/sdk@1.29.0(zod@4.3.6)':
+    dependencies:
+      '@hono/node-server': 1.19.14(hono@4.12.16)
+      ajv: 8.17.1
+      ajv-formats: 3.0.1(ajv@8.17.1)
+      content-type: 1.0.5
+      cors: 2.8.6
+      cross-spawn: 7.0.6
+      eventsource: 3.0.7
+      eventsource-parser: 3.0.6
+      express: 5.2.1
+      express-rate-limit: 8.4.1(express@5.2.1)
+      hono: 4.12.16
+      jose: 6.2.3
+      json-schema-typed: 8.0.2
+      pkce-challenge: 5.0.1
+      raw-body: 3.0.2
+      zod: 4.3.6
+      zod-to-json-schema: 3.25.2(zod@4.3.6)
+    transitivePeerDependencies:
+      - supports-color
+    optional: true
+
+  '@openai/agents-core@0.8.5(ws@8.20.0)(zod@4.3.6)':
+    dependencies:
+      debug: 4.4.3
+      openai: 6.33.0(ws@8.20.0)(zod@4.3.6)
+    optionalDependencies:
+      '@modelcontextprotocol/sdk': 1.29.0(zod@4.3.6)
+      zod: 4.3.6
+    transitivePeerDependencies:
+      - '@cfworker/json-schema'
+      - supports-color
+      - ws
+
+  '@openai/agents-openai@0.8.5(ws@8.20.0)(zod@4.3.6)':
+    dependencies:
+      '@openai/agents-core': 0.8.5(ws@8.20.0)(zod@4.3.6)
+      debug: 4.4.3
+      openai: 6.33.0(ws@8.20.0)(zod@4.3.6)
+      zod: 4.3.6
+    transitivePeerDependencies:
+      - '@cfworker/json-schema'
+      - supports-color
+      - ws
+
+  '@openai/agents-realtime@0.8.5(zod@4.3.6)':
+    dependencies:
+      '@openai/agents-core': 0.8.5(ws@8.20.0)(zod@4.3.6)
+      '@types/ws': 8.18.1
+      debug: 4.4.3
+      ws: 8.20.0
+      zod: 4.3.6
+    transitivePeerDependencies:
+      - '@cfworker/json-schema'
+      - bufferutil
+      - supports-color
+      - utf-8-validate
+
+  '@openai/agents@0.8.5(ws@8.20.0)(zod@4.3.6)':
+    dependencies:
+      '@openai/agents-core': 0.8.5(ws@8.20.0)(zod@4.3.6)
+      '@openai/agents-openai': 0.8.5(ws@8.20.0)(zod@4.3.6)
+      '@openai/agents-realtime': 0.8.5(zod@4.3.6)
+      debug: 4.4.3
+      openai: 6.33.0(ws@8.20.0)(zod@4.3.6)
+      zod: 4.3.6
+    transitivePeerDependencies:
+      - '@cfworker/json-schema'
+      - bufferutil
+      - supports-color
+      - utf-8-validate
+      - ws
+
   '@opentelemetry/api@1.9.0': {}
 
   '@pkgjs/parseargs@0.11.0':
@@ -3128,6 +3569,10 @@ snapshots:
 
   '@types/retry@0.12.0': {}
 
+  '@types/ws@8.18.1':
+    dependencies:
+      '@types/node': 25.5.0
+
   '@vercel/oidc@3.1.0': {}
 
   '@vitest/coverage-v8@4.1.2(vitest@4.1.2(@opentelemetry/api@1.9.0)(@types/node@25.5.0)(vite@7.0.6(@types/node@25.5.0)(yaml@2.8.0)))':
@@ -3185,6 +3630,12 @@ snapshots:
       convert-source-map: 2.0.0
       tinyrainbow: 3.1.0
 
+  accepts@2.0.0:
+    dependencies:
+      mime-types: 3.0.2
+      negotiator: 1.0.0
+    optional: true
+
   acorn@8.15.0: {}
 
   agent-base@7.1.4: {}
@@ -3260,6 +3711,21 @@ snapshots:
 
   binary-search@1.3.6: {}
 
+  body-parser@2.2.2:
+    dependencies:
+      bytes: 3.1.2
+      content-type: 1.0.5
+      debug: 4.4.3
+      http-errors: 2.0.1
+      iconv-lite: 0.7.2
+      on-finished: 2.4.1
+      qs: 6.15.1
+      raw-body: 3.0.2
+      type-is: 2.0.1
+    transitivePeerDependencies:
+      - supports-color
+    optional: true
+
   bowser@2.14.1: {}
 
   brace-expansion@2.0.2:
@@ -3277,8 +3743,23 @@ snapshots:
       esbuild: 0.25.8
       load-tsconfig: 0.2.5
 
+  bytes@3.1.2:
+    optional: true
+
   cac@6.7.14: {}
 
+  call-bind-apply-helpers@1.0.2:
+    dependencies:
+      es-errors: 1.3.0
+      function-bind: 1.1.2
+    optional: true
+
+  call-bound@1.0.4:
+    dependencies:
+      call-bind-apply-helpers: 1.0.2
+      get-intrinsic: 1.3.0
+    optional: true
+
   chai@6.2.2: {}
 
   chalk@5.4.1: {}
@@ -3333,8 +3814,26 @@ snapshots:
 
   consola@3.4.2: {}
 
+  content-disposition@1.1.0:
+    optional: true
+
+  content-type@1.0.5:
+    optional: true
+
   convert-source-map@2.0.0: {}
 
+  cookie-signature@1.2.2:
+    optional: true
+
+  cookie@0.7.2:
+    optional: true
+
+  cors@2.8.6:
+    dependencies:
+      object-assign: 4.1.1
+      vary: 1.1.2
+    optional: true
+
   cross-spawn@7.0.6:
     dependencies:
       path-key: 3.1.1
@@ -3349,12 +3848,19 @@ snapshots:
     dependencies:
       ms: 2.1.3
 
+  debug@4.4.3:
+    dependencies:
+      ms: 2.1.3
+
   degenerator@5.0.1:
     dependencies:
       ast-types: 0.13.4
       escodegen: 2.1.0
       esprima: 4.0.1
 
+  depd@2.0.0:
+    optional: true
+
   dotenv-cli@8.0.0:
     dependencies:
       cross-spawn: 7.0.6
@@ -3366,22 +3872,46 @@ snapshots:
 
   dotenv@16.6.1: {}
 
+  dunder-proto@1.0.1:
+    dependencies:
+      call-bind-apply-helpers: 1.0.2
+      es-errors: 1.3.0
+      gopd: 1.2.0
+    optional: true
+
   eastasianwidth@0.2.0: {}
 
   ecdsa-sig-formatter@1.0.11:
     dependencies:
       safe-buffer: 5.2.1
 
+  ee-first@1.1.1:
+    optional: true
+
   emoji-regex@10.4.0: {}
 
   emoji-regex@8.0.0: {}
 
   emoji-regex@9.2.2: {}
 
+  encodeurl@2.0.0:
+    optional: true
+
   environment@1.1.0: {}
 
+  es-define-property@1.0.1:
+    optional: true
+
+  es-errors@1.3.0:
+    optional: true
+
   es-module-lexer@2.0.0: {}
 
+  es-object-atoms@1.1.1:
+    dependencies:
+      es-errors: 1.3.0
+    optional: true
+
   esbuild@0.25.8:
     optionalDependencies:
       '@esbuild/aix-ppc64': 0.25.8
@@ -3411,6 +3941,9 @@ snapshots:
       '@esbuild/win32-ia32': 0.25.8
       '@esbuild/win32-x64': 0.25.8
 
+  escape-html@1.0.3:
+    optional: true
+
   escodegen@2.1.0:
     dependencies:
       esprima: 4.0.1
@@ -3429,10 +3962,18 @@ snapshots:
 
   esutils@2.0.3: {}
 
+  etag@1.8.1:
+    optional: true
+
   eventemitter3@5.0.1: {}
 
   eventsource-parser@3.0.6: {}
 
+  eventsource@3.0.7:
+    dependencies:
+      eventsource-parser: 3.0.6
+    optional: true
+
   execa@8.0.1:
     dependencies:
       cross-spawn: 7.0.6
@@ -3447,6 +3988,46 @@ snapshots:
 
   expect-type@1.3.0: {}
 
+  express-rate-limit@8.4.1(express@5.2.1):
+    dependencies:
+      express: 5.2.1
+      ip-address: 10.1.0
+    optional: true
+
+  express@5.2.1:
+    dependencies:
+      accepts: 2.0.0
+      body-parser: 2.2.2
+      content-disposition: 1.1.0
+      content-type: 1.0.5
+      cookie: 0.7.2
+      cookie-signature: 1.2.2
+      debug: 4.4.3
+      depd: 2.0.0
+      encodeurl: 2.0.0
+      escape-html: 1.0.3
+      etag: 1.8.1
+      finalhandler: 2.1.1
+      fresh: 2.0.0
+      http-errors: 2.0.1
+      merge-descriptors: 2.0.0
+      mime-types: 3.0.2
+      on-finished: 2.4.1
+      once: 1.4.0
+      parseurl: 1.3.3
+      proxy-addr: 2.0.7
+      qs: 6.15.1
+      range-parser: 1.2.1
+      router: 2.2.0
+      send: 1.2.1
+      serve-static: 2.2.1
+      statuses: 2.0.2
+      type-is: 2.0.1
+      vary: 1.1.2
+    transitivePeerDependencies:
+      - supports-color
+    optional: true
+
   extend@3.0.2: {}
 
   fast-deep-equal@3.1.3: {}
@@ -3482,6 +4063,18 @@ snapshots:
     dependencies:
       to-regex-range: 5.0.1
 
+  finalhandler@2.1.1:
+    dependencies:
+      debug: 4.4.3
+      encodeurl: 2.0.0
+      escape-html: 1.0.3
+      on-finished: 2.4.1
+      parseurl: 1.3.3
+      statuses: 2.0.2
+    transitivePeerDependencies:
+      - supports-color
+    optional: true
+
   fix-dts-default-cjs-exports@1.0.1:
     dependencies:
       magic-string: 0.30.17
@@ -3497,9 +4090,18 @@ snapshots:
     dependencies:
       fetch-blob: 3.2.0
 
+  forwarded@0.2.0:
+    optional: true
+
+  fresh@2.0.0:
+    optional: true
+
   fsevents@2.3.3:
     optional: true
 
+  function-bind@1.1.2:
+    optional: true
+
   gaxios@7.1.4:
     dependencies:
       extend: 3.0.2
@@ -3518,6 +4120,26 @@ snapshots:
 
   get-east-asian-width@1.3.0: {}
 
+  get-intrinsic@1.3.0:
+    dependencies:
+      call-bind-apply-helpers: 1.0.2
+      es-define-property: 1.0.1
+      es-errors: 1.3.0
+      es-object-atoms: 1.1.1
+      function-bind: 1.1.2
+      get-proto: 1.0.1
+      gopd: 1.2.0
+      has-symbols: 1.1.0
+      hasown: 2.0.3
+      math-intrinsics: 1.1.0
+    optional: true
+
+  get-proto@1.0.1:
+    dependencies:
+      dunder-proto: 1.0.1
+      es-object-atoms: 1.1.1
+    optional: true
+
   get-stream@8.0.1: {}
 
   get-uri@6.0.5:
@@ -3552,10 +4174,33 @@ snapshots:
 
   google-logging-utils@1.1.3: {}
 
+  gopd@1.2.0:
+    optional: true
+
   has-flag@4.0.0: {}
 
+  has-symbols@1.1.0:
+    optional: true
+
+  hasown@2.0.3:
+    dependencies:
+      function-bind: 1.1.2
+    optional: true
+
+  hono@4.12.16:
+    optional: true
+
   html-escaper@2.0.2: {}
 
+  http-errors@2.0.1:
+    dependencies:
+      depd: 2.0.0
+      inherits: 2.0.4
+      setprototypeof: 1.2.0
+      statuses: 2.0.2
+      toidentifier: 1.0.1
+    optional: true
+
   http-proxy-agent@7.0.2:
     dependencies:
       agent-base: 7.1.4
@@ -3572,10 +4217,21 @@ snapshots:
 
   human-signals@5.0.0: {}
 
+  iconv-lite@0.7.2:
+    dependencies:
+      safer-buffer: 2.1.2
+    optional: true
+
+  inherits@2.0.4:
+    optional: true
+
   install@0.13.0: {}
 
   ip-address@10.1.0: {}
 
+  ipaddr.js@1.9.1:
+    optional: true
+
   is-any-array@2.0.1: {}
 
   is-fullwidth-code-point@3.0.0: {}
@@ -3588,6 +4244,9 @@ snapshots:
 
   is-number@7.0.0: {}
 
+  is-promise@4.0.0:
+    optional: true
+
   is-stream@3.0.0: {}
 
   isexe@2.0.0: {}
@@ -3611,6 +4270,9 @@ snapshots:
     optionalDependencies:
       '@pkgjs/parseargs': 0.11.0
 
+  jose@6.2.3:
+    optional: true
+
   joycon@3.1.1: {}
 
   js-levenshtein@1.1.6: {}
@@ -3632,6 +4294,9 @@ snapshots:
 
   json-schema-traverse@1.0.0: {}
 
+  json-schema-typed@8.0.2:
+    optional: true
+
   json-schema@0.4.0: {}
 
   jwa@2.0.1:
@@ -3716,6 +4381,15 @@ snapshots:
     dependencies:
       semver: 7.7.2
 
+  math-intrinsics@1.1.0:
+    optional: true
+
+  media-typer@1.1.0:
+    optional: true
+
+  merge-descriptors@2.0.0:
+    optional: true
+
   merge-stream@2.0.0: {}
 
   micromatch@4.0.8:
@@ -3723,6 +4397,14 @@ snapshots:
       braces: 3.0.3
       picomatch: 2.3.1
 
+  mime-db@1.54.0:
+    optional: true
+
+  mime-types@3.0.2:
+    dependencies:
+      mime-db: 1.54.0
+    optional: true
+
   mimic-fn@4.0.0: {}
 
   mimic-function@5.0.1: {}
@@ -3784,6 +4466,9 @@ snapshots:
 
   nanoid@3.3.11: {}
 
+  negotiator@1.0.0:
+    optional: true
+
   netmask@2.1.1: {}
 
   node-domexception@1.0.0: {}
@@ -3800,8 +4485,21 @@ snapshots:
 
   object-assign@4.1.1: {}
 
+  object-inspect@1.13.4:
+    optional: true
+
   obug@2.1.1: {}
 
+  on-finished@2.4.1:
+    dependencies:
+      ee-first: 1.1.1
+    optional: true
+
+  once@1.4.0:
+    dependencies:
+      wrappy: 1.0.2
+    optional: true
+
   onetime@6.0.0:
     dependencies:
       mimic-fn: 4.0.0
@@ -3850,6 +4548,9 @@ snapshots:
 
   package-json-from-dist@1.0.1: {}
 
+  parseurl@1.3.3:
+    optional: true
+
   partial-json@0.1.7: {}
 
   path-expression-matcher@1.5.0: {}
@@ -3863,6 +4564,9 @@ snapshots:
       lru-cache: 10.4.3
       minipass: 7.1.2
 
+  path-to-regexp@8.4.2:
+    optional: true
+
   pathe@2.0.3: {}
 
   picocolors@1.1.1: {}
@@ -3875,6 +4579,9 @@ snapshots:
 
   pirates@4.0.7: {}
 
+  pkce-challenge@5.0.1:
+    optional: true
+
   pkg-types@1.3.1:
     dependencies:
       confbox: 0.1.8
@@ -3909,6 +4616,12 @@ snapshots:
       '@types/node': 25.5.0
       long: 5.3.2
 
+  proxy-addr@2.0.7:
+    dependencies:
+      forwarded: 0.2.0
+      ipaddr.js: 1.9.1
+    optional: true
+
   proxy-agent@6.5.0:
     dependencies:
       agent-base: 7.1.4
@@ -3926,6 +4639,22 @@ snapshots:
 
   punycode@2.3.1: {}
 
+  qs@6.15.1:
+    dependencies:
+      side-channel: 1.1.0
+    optional: true
+
+  range-parser@1.2.1:
+    optional: true
+
+  raw-body@3.0.2:
+    dependencies:
+      bytes: 3.1.2
+      http-errors: 2.0.1
+      iconv-lite: 0.7.2
+      unpipe: 1.0.0
+    optional: true
+
   readdirp@4.1.2: {}
 
   require-from-string@2.0.2: {}
@@ -3967,16 +4696,92 @@ snapshots:
       '@rollup/rollup-win32-x64-msvc': 4.46.2
       fsevents: 2.3.3
 
+  router@2.2.0:
+    dependencies:
+      debug: 4.4.3
+      depd: 2.0.0
+      is-promise: 4.0.0
+      parseurl: 1.3.3
+      path-to-regexp: 8.4.2
+    transitivePeerDependencies:
+      - supports-color
+    optional: true
+
   safe-buffer@5.2.1: {}
 
+  safer-buffer@2.1.2:
+    optional: true
+
   semver@7.7.2: {}
 
+  send@1.2.1:
+    dependencies:
+      debug: 4.4.3
+      encodeurl: 2.0.0
+      escape-html: 1.0.3
+      etag: 1.8.1
+      fresh: 2.0.0
+      http-errors: 2.0.1
+      mime-types: 3.0.2
+      ms: 2.1.3
+      on-finished: 2.4.1
+      range-parser: 1.2.1
+      statuses: 2.0.2
+    transitivePeerDependencies:
+      - supports-color
+    optional: true
+
+  serve-static@2.2.1:
+    dependencies:
+      encodeurl: 2.0.0
+      escape-html: 1.0.3
+      parseurl: 1.3.3
+      send: 1.2.1
+    transitivePeerDependencies:
+      - supports-color
+    optional: true
+
+  setprototypeof@1.2.0:
+    optional: true
+
   shebang-command@2.0.0:
     dependencies:
       shebang-regex: 3.0.0
 
   shebang-regex@3.0.0: {}
 
+  side-channel-list@1.0.1:
+    dependencies:
+      es-errors: 1.3.0
+      object-inspect: 1.13.4
+    optional: true
+
+  side-channel-map@1.0.1:
+    dependencies:
+      call-bound: 1.0.4
+      es-errors: 1.3.0
+      get-intrinsic: 1.3.0
+      object-inspect: 1.13.4
+    optional: true
+
+  side-channel-weakmap@1.0.2:
+    dependencies:
+      call-bound: 1.0.4
+      es-errors: 1.3.0
+      get-intrinsic: 1.3.0
+      object-inspect: 1.13.4
+      side-channel-map: 1.0.1
+    optional: true
+
+  side-channel@1.1.0:
+    dependencies:
+      es-errors: 1.3.0
+      object-inspect: 1.13.4
+      side-channel-list: 1.0.1
+      side-channel-map: 1.0.1
+      side-channel-weakmap: 1.0.2
+    optional: true
+
   siginfo@2.0.0: {}
 
   signal-exit@4.1.0: {}
@@ -4019,6 +4824,9 @@ snapshots:
 
   stackback@0.0.2: {}
 
+  statuses@2.0.2:
+    optional: true
+
   std-env@4.0.0: {}
 
   string-argv@0.3.2: {}
@@ -4097,6 +4905,9 @@ snapshots:
     dependencies:
       is-number: 7.0.0
 
+  toidentifier@1.0.1:
+    optional: true
+
   tr46@1.0.1:
     dependencies:
       punycode: 2.3.1
@@ -4141,6 +4952,13 @@ snapshots:
       - tsx
       - yaml
 
+  type-is@2.0.1:
+    dependencies:
+      content-type: 1.0.5
+      media-typer: 1.1.0
+      mime-types: 3.0.2
+    optional: true
+
   typescript@5.8.3: {}
 
   ufo@1.6.1: {}
@@ -4149,10 +4967,16 @@ snapshots:
 
   undici@7.25.0: {}
 
+  unpipe@1.0.0:
+    optional: true
+
   validate.io-array@1.0.6: {}
 
   validate.io-function@1.0.2: {}
 
+  vary@1.1.2:
+    optional: true
+
   vite-tsconfig-paths@6.1.1(typescript@5.8.3)(vite@7.0.6(@types/node@25.5.0)(yaml@2.8.0)):
     dependencies:
       debug: 4.4.1
@@ -4241,6 +5065,9 @@ snapshots:
       string-width: 7.2.0
       strip-ansi: 7.1.0
 
+  wrappy@1.0.2:
+    optional: true
+
   ws@8.20.0: {}
 
   yaml@2.8.0: {}
diff --git a/scripts/bump-release-versions.mjs b/scripts/bump-release-versions.mjs
index afda7ab..d245d10 100644
--- a/scripts/bump-release-versions.mjs
+++ b/scripts/bump-release-versions.mjs
@@ -11,6 +11,7 @@ if (!newVersion) {
 const files = [
   "packages/vitest-evals/package.json",
   "packages/harness-ai-sdk/package.json",
+  "packages/harness-openai-agents/package.json",
   "packages/harness-pi-ai/package.json",
 ];
 
diff --git a/scripts/eval-cli.mjs b/scripts/eval-cli.mjs
index 7616cc7..06da697 100644
--- a/scripts/eval-cli.mjs
+++ b/scripts/eval-cli.mjs
@@ -31,6 +31,9 @@ export function parseEvalCliArgs(args) {
 export function createEvalEnv(baseEnv, toolDetailLevel) {
   return {
     ...baseEnv,
+    VITEST_EVALS_REPLAY_MODE: baseEnv.VITEST_EVALS_REPLAY_MODE ?? "auto",
+    VITEST_EVALS_REPLAY_DIR:
+      baseEnv.VITEST_EVALS_REPLAY_DIR ?? ".vitest-evals/recordings",
     ...(toolDetailLevel > 0
       ? {
           VITEST_EVALS_TOOL_DETAILS: "1",
diff --git a/scripts/eval-cli.test.mjs b/scripts/eval-cli.test.mjs
new file mode 100644
index 0000000..ee7ecd4
--- /dev/null
+++ b/scripts/eval-cli.test.mjs
@@ -0,0 +1,34 @@
+import { describe, expect, test } from "vitest";
+import { createEvalEnv, parseEvalCliArgs } from "./eval-cli.mjs";
+
+describe("eval CLI helpers", () => {
+  test("defaults demo evals to replay auto mode", () => {
+    expect(createEvalEnv({}, 0)).toMatchObject({
+      VITEST_EVALS_REPLAY_MODE: "auto",
+      VITEST_EVALS_REPLAY_DIR: ".vitest-evals/recordings",
+    });
+  });
+
+  test("preserves explicit replay overrides", () => {
+    expect(
+      createEvalEnv(
+        {
+          VITEST_EVALS_REPLAY_MODE: "strict",
+          VITEST_EVALS_REPLAY_DIR: "/tmp/replay",
+        },
+        0,
+      ),
+    ).toMatchObject({
+      VITEST_EVALS_REPLAY_MODE: "strict",
+      VITEST_EVALS_REPLAY_DIR: "/tmp/replay",
+    });
+  });
+
+  test("keeps verbose flags separate from forwarded Vitest args", () => {
+    expect(parseEvalCliArgs(["--", "-vv", "--pool=forks"])).toEqual({
+      failMode: false,
+      forwardedArgs: ["--pool=forks"],
+      toolDetailLevel: 2,
+    });
+  });
+});
diff --git a/tsconfig.base.json b/tsconfig.base.json
index 31e6d3f..64692ff 100644
--- a/tsconfig.base.json
+++ b/tsconfig.base.json
@@ -11,6 +11,9 @@
       "vitest-evals": ["packages/vitest-evals/src/index.ts"],
       "vitest-evals/*": ["packages/vitest-evals/src/*"],
       "@vitest-evals/harness-ai-sdk": ["packages/harness-ai-sdk/src/index.ts"],
+      "@vitest-evals/harness-openai-agents": [
+        "packages/harness-openai-agents/src/index.ts"
+      ],
       "@vitest-evals/harness-pi-ai": ["packages/harness-pi-ai/src/index.ts"]
     }
   }
diff --git a/vitest.config.ts b/vitest.config.ts
index 10976f9..c824002 100644
--- a/vitest.config.ts
+++ b/vitest.config.ts
@@ -9,6 +9,7 @@ export default defineConfig({
       "packages/**/*.eval.ts",
       "apps/**/*.test.ts",
       "apps/**/*.eval.ts",
+      "scripts/**/*.test.mjs",
     ],
   },
 });