Joshkop · Joshkop · May 21, 2026 · May 21, 2026
diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json
@@ -8,7 +8,7 @@
     {
       "name": "claude-code-ai-observability",
       "description": "Realtime Sentry traces with per-turn tokens, cost, subagent and error instrumentation, and rich auto-tagging (session / git / host). Forked from sergical/claude-code-sentry-monitor (MIT).",
-      "version": "0.2.10",
+      "version": "0.2.11",
       "author": {
         "name": "Joshkop"
       },

diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json
@@ -1,7 +1,7 @@
 {
   "name": "claude-code-ai-observability",
   "description": "Comprehensive AI Agent Observability plugin for Claude Code — realtime Sentry traces with per-turn tokens, cost, subagent and error instrumentation, and rich auto-tagging (session / git / host). Forked from sergical/claude-code-sentry-monitor (MIT).",
-  "version": "0.2.10",
+  "version": "0.2.11",
   "author": {
     "name": "Joshkop"
   },

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,14 @@ All notable changes to this project will be documented in this file.
 
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 
+## [0.2.11] - 2026-05-21
+
+### Fixed
+
+- **Sentry AI Conversations now shows every assistant output, not just the first.** Two bugs combined to drop most assistant messages from the Conversations view:
+  - `closeTurnSpan` previously ran only on the *next* `UserPromptSubmit` (or `SessionEnd`), racing the transcript JSONL flush. If assistant text wasn't on disk at close time, `tokens.response` was `null` and the `gen_ai.output.messages` attribute was silently skipped — and the existing 200 ms retry only triggered when usage was zero, not when text was missing. The `Stop` hook now closes the current turn (it fires right after assistant text lands, so the transcript is freshest). `closeCurrentTurn` is idempotent, so the follow-up close from UserPromptSubmit / SessionEnd is a no-op. The late-flush retry now also triggers when `recordOutputs` is on and `response` is null.
+  - When the collector spawned (or self-healed) mid-session, its local `turnIndex` no longer aligned with the transcript's real-turn ordinal. `selectTurn`'s ordinal fallback would silently attribute an earlier turn's response to the current turn (the "how are you" turn would surface turn 0's "Hi! How can I help today?" output). Synthesized sessions now refuse ordinal matches and record `claude_code.usage_extraction.status = no_matching_turn_synthesized_ordinal` so the gap is observable in Sentry.
+
 ## [0.2.10] - 2026-05-21
 
 ### Fixed

diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "claude-code-ai-observability",
-  "version": "0.2.10",
+  "version": "0.2.11",
   "description": "Comprehensive AI Agent Observability plugin for Claude Code — realtime Sentry traces with per-turn tokens, cost, subagent and error instrumentation, and rich auto-tagging (session / git / host).",
   "type": "module",
   "scripts": {

diff --git a/scripts/server.js b/scripts/server.js
@@ -169,26 +169,47 @@ export function startServer(sentry, config, baseAutoTags) {
             // UserPromptSubmit corresponds to exactly one real (non-sidechain,
             // non-tool_result) user line.
             const selected = selectTurn(result, record.currentPromptId, record.turnIndex);
-            if (!selected.turn) {
-                tokenExtractionStatus = "no_matching_turn";
+            // Synthesized sessions start record.turnIndex at -1 even when the
+            // transcript already holds prior real turns (collector spawned mid-
+            // session or self-healed after a restart). The local ordinal no longer
+            // aligns with the transcript's real-turn index, so an ordinal fallback
+            // can silently attach an earlier turn's response to a later turn.
+            // Better to drop one turn's data than to attribute the wrong assistant
+            // text to it — refuse ordinal matches under synthesis.
+            const unsafeOrdinal = record.synthesized && selected.matchedBy === "ordinal";
+            if (!selected.turn || unsafeOrdinal) {
+                tokenExtractionStatus = unsafeOrdinal
+                    ? "no_matching_turn_synthesized_ordinal"
+                    : "no_matching_turn";
             }
             else {
                 tokens = selected.turn;
-                if (tokens.inputTokens + tokens.outputTokens === 0) {
-                    // Late-flush hypothesis: assistant usage may not yet be on disk.
-                    // Sleep briefly and try once more.
+                // Late-flush hypothesis: assistant usage OR assistant text may not yet
+                // be on disk when the prior turn closes. Retry once if either is
+                // missing — without the response retry, turns whose usage flushed
+                // before text silently drop gen_ai.output.messages, leaving Sentry AI
+                // Conversations with only the inputs.
+                const usageMissing = tokens.inputTokens + tokens.outputTokens === 0;
+                const responseMissing = config.recordOutputs && !tokens.response;
+                if (usageMissing || responseMissing) {
                     await new Promise((r) => setTimeout(r, 200));
                     const retry = readTranscript(record.transcriptPath);
                     parseDegraded = retry.degraded;
                     sessionDims = retry.session;
                     const retrySelected = selectTurn(retry, record.currentPromptId, record.turnIndex);
-                    if (retrySelected.turn && (retrySelected.turn.inputTokens + retrySelected.turn.outputTokens) > 0) {
-                        tokens = retrySelected.turn;
+                    const retryTurn = retrySelected.turn;
+                    const retryUsageOk = retryTurn && (retryTurn.inputTokens + retryTurn.outputTokens) > 0;
+                    const retryResponseOk = retryTurn && (!config.recordOutputs || retryTurn.response);
+                    if (retryTurn && (usageMissing ? retryUsageOk : true) && (responseMissing ? retryResponseOk : true)) {
+                        tokens = retryTurn;
                         tokenExtractionStatus = "ok|matched_after_retry";
                     }
-                    else {
+                    else if (usageMissing) {
                         tokenExtractionStatus = "turn_had_no_usage";
                     }
+                    else {
+                        tokenExtractionStatus = "turn_had_no_response";
+                    }
                 }
                 else {
                     tokenExtractionStatus = "ok";
@@ -424,6 +445,21 @@ export function startServer(sentry, config, baseAutoTags) {
         span.end();
         record.pendingTools.delete(key);
     };
+    // Stop fires right after the assistant text lands in the transcript — the
+    // freshest moment to harvest the response. Without this, closeCurrentTurn
+    // only runs on the next UserPromptSubmit (or SessionEnd) and races the
+    // transcript flush, silently dropping gen_ai.output.messages for any turn
+    // where text wasn't on disk yet. closeCurrentTurn is idempotent (clears
+    // currentTurnSpans), so a follow-up close from UserPromptSubmit is a no-op.
+    const handleStop = async (event) => {
+        const record = sessions.get(event.session_id);
+        if (!record)
+            return;
+        if (event.transcript_path && !record.transcriptPath) {
+            record.transcriptPath = event.transcript_path;
+        }
+        await closeCurrentTurn(record);
+    };
     const handleSessionEnd = async (event) => {
         const record = sessions.get(event.session_id);
         if (!record)
@@ -500,6 +536,8 @@ export function startServer(sentry, config, baseAutoTags) {
                     await handleSessionEnd(event);
                     return;
                 case "Stop":
+                    await handleStop(event);
+                    return;
                 case "PreCompact":
                     return;
             }

diff --git a/src/server.ts b/src/server.ts
@@ -10,6 +10,7 @@ import type {
   ResolvedPluginConfig,
   SessionEndEvent,
   SessionStartEvent,
+  StopEvent,
   UserPromptSubmitEvent,
 } from "./types.js";
 import { reportPluginError } from "./sentry-errors.js";
@@ -225,23 +226,43 @@ export function startServer(
       // UserPromptSubmit corresponds to exactly one real (non-sidechain,
       // non-tool_result) user line.
       const selected = selectTurn(result, record.currentPromptId, record.turnIndex);
-      if (!selected.turn) {
-        tokenExtractionStatus = "no_matching_turn";
+      // Synthesized sessions start record.turnIndex at -1 even when the
+      // transcript already holds prior real turns (collector spawned mid-
+      // session or self-healed after a restart). The local ordinal no longer
+      // aligns with the transcript's real-turn index, so an ordinal fallback
+      // can silently attach an earlier turn's response to a later turn.
+      // Better to drop one turn's data than to attribute the wrong assistant
+      // text to it — refuse ordinal matches under synthesis.
+      const unsafeOrdinal = record.synthesized && selected.matchedBy === "ordinal";
+      if (!selected.turn || unsafeOrdinal) {
+        tokenExtractionStatus = unsafeOrdinal
+          ? "no_matching_turn_synthesized_ordinal"
+          : "no_matching_turn";
       } else {
         tokens = selected.turn;
-        if (tokens.inputTokens + tokens.outputTokens === 0) {
-          // Late-flush hypothesis: assistant usage may not yet be on disk.
-          // Sleep briefly and try once more.
+        // Late-flush hypothesis: assistant usage OR assistant text may not yet
+        // be on disk when the prior turn closes. Retry once if either is
+        // missing — without the response retry, turns whose usage flushed
+        // before text silently drop gen_ai.output.messages, leaving Sentry AI
+        // Conversations with only the inputs.
+        const usageMissing = tokens.inputTokens + tokens.outputTokens === 0;
+        const responseMissing = config.recordOutputs && !tokens.response;
+        if (usageMissing || responseMissing) {
           await new Promise((r) => setTimeout(r, 200));
           const retry = readTranscript(record.transcriptPath);
           parseDegraded = retry.degraded;
           sessionDims = retry.session;
           const retrySelected = selectTurn(retry, record.currentPromptId, record.turnIndex);
-          if (retrySelected.turn && (retrySelected.turn.inputTokens + retrySelected.turn.outputTokens) > 0) {
-            tokens = retrySelected.turn;
+          const retryTurn = retrySelected.turn;
+          const retryUsageOk = retryTurn && (retryTurn.inputTokens + retryTurn.outputTokens) > 0;
+          const retryResponseOk = retryTurn && (!config.recordOutputs || retryTurn.response);
+          if (retryTurn && (usageMissing ? retryUsageOk : true) && (responseMissing ? retryResponseOk : true)) {
+            tokens = retryTurn;
             tokenExtractionStatus = "ok|matched_after_retry";
-          } else {
+          } else if (usageMissing) {
             tokenExtractionStatus = "turn_had_no_usage";
+          } else {
+            tokenExtractionStatus = "turn_had_no_response";
           }
         } else {
           tokenExtractionStatus = "ok";
@@ -499,6 +520,21 @@ export function startServer(
     record.pendingTools.delete(key);
   };
 
+  // Stop fires right after the assistant text lands in the transcript — the
+  // freshest moment to harvest the response. Without this, closeCurrentTurn
+  // only runs on the next UserPromptSubmit (or SessionEnd) and races the
+  // transcript flush, silently dropping gen_ai.output.messages for any turn
+  // where text wasn't on disk yet. closeCurrentTurn is idempotent (clears
+  // currentTurnSpans), so a follow-up close from UserPromptSubmit is a no-op.
+  const handleStop = async (event: StopEvent): Promise<void> => {
+    const record = sessions.get(event.session_id);
+    if (!record) return;
+    if (event.transcript_path && !record.transcriptPath) {
+      record.transcriptPath = event.transcript_path;
+    }
+    await closeCurrentTurn(record);
+  };
+
   const handleSessionEnd = async (event: SessionEndEvent): Promise<void> => {
     const record = sessions.get(event.session_id);
     if (!record) return;
@@ -566,6 +602,8 @@ export function startServer(
           await handleSessionEnd(event);
           return;
         case "Stop":
+          await handleStop(event);
+          return;
         case "PreCompact":
           return;
       }

diff --git a/src/types.ts b/src/types.ts
@@ -153,6 +153,7 @@ export interface PreCompactEvent extends AiobsEnvelope {
 export interface StopEvent extends AiobsEnvelope {
   hook_event_name: "Stop";
   session_id: string;
+  transcript_path?: string;
 }
 
 export type HookEvent =

diff --git a/tests/server-lifecycle.test.ts b/tests/server-lifecycle.test.ts
@@ -269,6 +269,52 @@ describe("server: reader integration (C1/C6)", () => {
       rmSync(dir, { recursive: true, force: true });
     }
   });
+
+});
+
+describe("server: Stop hook closes the turn with response", () => {
+  let port: number;
+  let sentry: ReturnType<typeof makeFakeSentry>;
+  let close: () => Promise<void>;
+
+  beforeEach(async () => {
+    port = await findFreePort();
+    process.env.SENTRY_COLLECTOR_PORT = String(port);
+    sentry = makeFakeSentry();
+    // Stop's whole job is to make output capture reliable, so recordOutputs
+    // must be on for the assertion to mean anything.
+    const cfg: ResolvedPluginConfig = { ...baseConfig, recordOutputs: true, recordInputs: true };
+    const server = startServer(sentry as never, cfg, baseTags);
+    close = server.close;
+    for (let i = 0; i < 25; i++) {
+      try { const r = await fetch(`http://127.0.0.1:${port}/health`); if (r.ok) break; } catch { /* ignore */ }
+      await new Promise((r) => setTimeout(r, 50));
+    }
+  });
+  afterEach(async () => { await close(); delete process.env.SENTRY_COLLECTOR_PORT; });
+
+  it("Stop captures gen_ai.output.messages and SessionEnd is idempotent", async () => {
+    const dir = mkdtempSync(join(tmpdir(), "srv-stop-"));
+    const tx = join(dir, "s.jsonl");
+    writeFileSync(tx, [
+      JSON.stringify({ type: "user", promptId: "P1", message: { content: "hello" } }),
+      JSON.stringify({ type: "assistant", message: { model: "claude-opus-4-7", usage: { input_tokens: 5, output_tokens: 7 }, content: [{ type: "text", text: "hi there" }] } }),
+    ].join("\n"), "utf8");
+    try {
+      await postHook(port, { hook_event_name: "SessionStart", session_id: "stop1", transcript_path: tx });
+      await postHook(port, { hook_event_name: "UserPromptSubmit", session_id: "stop1", prompt: "hello", prompt_id: "P1" });
+      await postHook(port, { hook_event_name: "Stop", session_id: "stop1", transcript_path: tx });
+      const chat = sentry.spans.find((s) => s.op === "gen_ai.chat");
+      expect(chat).toBeTruthy();
+      expect(chat!.attrs["gen_ai.output.messages"]).toBe(
+        JSON.stringify([{ role: "assistant", content: "hi there" }]),
+      );
+      await postHook(port, { hook_event_name: "SessionEnd", session_id: "stop1", transcript_path: tx });
+      expect(sentry.spans.filter((s) => s.op === "gen_ai.chat")).toHaveLength(1);
+    } finally {
+      rmSync(dir, { recursive: true, force: true });
+    }
+  });
 });
 
 describe("server: per-session git cwd (C4)", () => {
@@ -355,6 +401,36 @@ describe("server: lazy session synthesis (R2)", () => {
       rmSync(dir, { recursive: true, force: true });
     }
   });
+
+  it("refuses ordinal fallback when the session is synthesized to avoid wrong-turn attribution", async () => {
+    const dir = mkdtempSync(join(tmpdir(), "srv-r2-ord-"));
+    const tx = join(dir, "s.jsonl");
+    try {
+      // Transcript already holds two prior real turns from before the
+      // collector spawned. Synthesized session's local turnIndex starts at
+      // 0 on its first UserPromptSubmit, which would ordinal-match the
+      // FIRST transcript turn's response — exactly the bug this guards.
+      writeFileSync(tx, [
+        JSON.stringify({ type: "user", promptId: "OLD1", message: { content: "old1" } }),
+        JSON.stringify({ type: "assistant", message: { model: "m", usage: { input_tokens: 5, output_tokens: 5 }, content: [{ type: "text", text: "OLD RESPONSE 1" }] } }),
+        JSON.stringify({ type: "user", promptId: "OLD2", message: { content: "old2" } }),
+        JSON.stringify({ type: "assistant", message: { model: "m", usage: { input_tokens: 5, output_tokens: 5 }, content: [{ type: "text", text: "OLD RESPONSE 2" }] } }),
+      ].join("\n"), "utf8");
+      // Skip SessionStart; UserPromptSubmit with a prompt_id that does NOT
+      // exist in the transcript forces ordinal fallback.
+      await postHook(port, { hook_event_name: "UserPromptSubmit", session_id: "syn-ord", prompt: "new", prompt_id: "NEW_NOT_IN_TRANSCRIPT", _aiobs: { context: { cwd: dir } } });
+      await postHook(port, { hook_event_name: "SessionEnd", session_id: "syn-ord", transcript_path: tx });
+      const chat = sentry.spans.find((s) => s.op === "gen_ai.chat");
+      expect(chat).toBeTruthy();
+      // We must NOT have inherited OLD1's response.
+      expect(chat!.attrs["gen_ai.output.messages"]).toBeUndefined();
+      expect(chat!.attrs["claude_code.usage_extraction.status"]).toBe(
+        "no_matching_turn_synthesized_ordinal",
+      );
+    } finally {
+      rmSync(dir, { recursive: true, force: true });
+    }
+  });
 });
 
 describe("server: dropped attr + heartbeat (R3/R4)", () => {