diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 9b3adc8..2925bdb 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -8,7 +8,7 @@ { "name": "claude-code-ai-observability", "description": "Realtime Sentry traces with per-turn tokens, cost, subagent and error instrumentation, and rich auto-tagging (session / git / host). Forked from sergical/claude-code-sentry-monitor (MIT).", - "version": "0.2.10", + "version": "0.2.11", "author": { "name": "Joshkop" }, diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index 3be6f2e..d09307a 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "claude-code-ai-observability", "description": "Comprehensive AI Agent Observability plugin for Claude Code — realtime Sentry traces with per-turn tokens, cost, subagent and error instrumentation, and rich auto-tagging (session / git / host). Forked from sergical/claude-code-sentry-monitor (MIT).", - "version": "0.2.10", + "version": "0.2.11", "author": { "name": "Joshkop" }, diff --git a/CHANGELOG.md b/CHANGELOG.md index 4817583..fe05226 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,14 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). +## [0.2.11] - 2026-05-21 + +### Fixed + +- **Sentry AI Conversations now shows every assistant output, not just the first.** Two bugs combined to drop most assistant messages from the Conversations view: + - `closeTurnSpan` previously ran only on the *next* `UserPromptSubmit` (or `SessionEnd`), racing the transcript JSONL flush. If assistant text wasn't on disk at close time, `tokens.response` was `null` and the `gen_ai.output.messages` attribute was silently skipped — and the existing 200 ms retry only triggered when usage was zero, not when text was missing. The `Stop` hook now closes the current turn (it fires right after assistant text lands, so the transcript is freshest). `closeCurrentTurn` is idempotent, so the follow-up close from UserPromptSubmit / SessionEnd is a no-op. The late-flush retry now also triggers when `recordOutputs` is on and `response` is null. + - When the collector spawned (or self-healed) mid-session, its local `turnIndex` no longer aligned with the transcript's real-turn ordinal. `selectTurn`'s ordinal fallback would silently attribute an earlier turn's response to the current turn (the "how are you" turn would surface turn 0's "Hi! How can I help today?" output). Synthesized sessions now refuse ordinal matches and record `claude_code.usage_extraction.status = no_matching_turn_synthesized_ordinal` so the gap is observable in Sentry. + ## [0.2.10] - 2026-05-21 ### Fixed diff --git a/package.json b/package.json index 1a41e45..5a33a67 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "claude-code-ai-observability", - "version": "0.2.10", + "version": "0.2.11", "description": "Comprehensive AI Agent Observability plugin for Claude Code — realtime Sentry traces with per-turn tokens, cost, subagent and error instrumentation, and rich auto-tagging (session / git / host).", "type": "module", "scripts": { diff --git a/scripts/server.js b/scripts/server.js index 3538df9..4941e61 100644 --- a/scripts/server.js +++ b/scripts/server.js @@ -169,26 +169,47 @@ export function startServer(sentry, config, baseAutoTags) { // UserPromptSubmit corresponds to exactly one real (non-sidechain, // non-tool_result) user line. const selected = selectTurn(result, record.currentPromptId, record.turnIndex); - if (!selected.turn) { - tokenExtractionStatus = "no_matching_turn"; + // Synthesized sessions start record.turnIndex at -1 even when the + // transcript already holds prior real turns (collector spawned mid- + // session or self-healed after a restart). The local ordinal no longer + // aligns with the transcript's real-turn index, so an ordinal fallback + // can silently attach an earlier turn's response to a later turn. + // Better to drop one turn's data than to attribute the wrong assistant + // text to it — refuse ordinal matches under synthesis. + const unsafeOrdinal = record.synthesized && selected.matchedBy === "ordinal"; + if (!selected.turn || unsafeOrdinal) { + tokenExtractionStatus = unsafeOrdinal + ? "no_matching_turn_synthesized_ordinal" + : "no_matching_turn"; } else { tokens = selected.turn; - if (tokens.inputTokens + tokens.outputTokens === 0) { - // Late-flush hypothesis: assistant usage may not yet be on disk. - // Sleep briefly and try once more. + // Late-flush hypothesis: assistant usage OR assistant text may not yet + // be on disk when the prior turn closes. Retry once if either is + // missing — without the response retry, turns whose usage flushed + // before text silently drop gen_ai.output.messages, leaving Sentry AI + // Conversations with only the inputs. + const usageMissing = tokens.inputTokens + tokens.outputTokens === 0; + const responseMissing = config.recordOutputs && !tokens.response; + if (usageMissing || responseMissing) { await new Promise((r) => setTimeout(r, 200)); const retry = readTranscript(record.transcriptPath); parseDegraded = retry.degraded; sessionDims = retry.session; const retrySelected = selectTurn(retry, record.currentPromptId, record.turnIndex); - if (retrySelected.turn && (retrySelected.turn.inputTokens + retrySelected.turn.outputTokens) > 0) { - tokens = retrySelected.turn; + const retryTurn = retrySelected.turn; + const retryUsageOk = retryTurn && (retryTurn.inputTokens + retryTurn.outputTokens) > 0; + const retryResponseOk = retryTurn && (!config.recordOutputs || retryTurn.response); + if (retryTurn && (usageMissing ? retryUsageOk : true) && (responseMissing ? retryResponseOk : true)) { + tokens = retryTurn; tokenExtractionStatus = "ok|matched_after_retry"; } - else { + else if (usageMissing) { tokenExtractionStatus = "turn_had_no_usage"; } + else { + tokenExtractionStatus = "turn_had_no_response"; + } } else { tokenExtractionStatus = "ok"; @@ -424,6 +445,21 @@ export function startServer(sentry, config, baseAutoTags) { span.end(); record.pendingTools.delete(key); }; + // Stop fires right after the assistant text lands in the transcript — the + // freshest moment to harvest the response. Without this, closeCurrentTurn + // only runs on the next UserPromptSubmit (or SessionEnd) and races the + // transcript flush, silently dropping gen_ai.output.messages for any turn + // where text wasn't on disk yet. closeCurrentTurn is idempotent (clears + // currentTurnSpans), so a follow-up close from UserPromptSubmit is a no-op. + const handleStop = async (event) => { + const record = sessions.get(event.session_id); + if (!record) + return; + if (event.transcript_path && !record.transcriptPath) { + record.transcriptPath = event.transcript_path; + } + await closeCurrentTurn(record); + }; const handleSessionEnd = async (event) => { const record = sessions.get(event.session_id); if (!record) @@ -500,6 +536,8 @@ export function startServer(sentry, config, baseAutoTags) { await handleSessionEnd(event); return; case "Stop": + await handleStop(event); + return; case "PreCompact": return; } diff --git a/src/server.ts b/src/server.ts index ed84f17..ce4e915 100644 --- a/src/server.ts +++ b/src/server.ts @@ -10,6 +10,7 @@ import type { ResolvedPluginConfig, SessionEndEvent, SessionStartEvent, + StopEvent, UserPromptSubmitEvent, } from "./types.js"; import { reportPluginError } from "./sentry-errors.js"; @@ -225,23 +226,43 @@ export function startServer( // UserPromptSubmit corresponds to exactly one real (non-sidechain, // non-tool_result) user line. const selected = selectTurn(result, record.currentPromptId, record.turnIndex); - if (!selected.turn) { - tokenExtractionStatus = "no_matching_turn"; + // Synthesized sessions start record.turnIndex at -1 even when the + // transcript already holds prior real turns (collector spawned mid- + // session or self-healed after a restart). The local ordinal no longer + // aligns with the transcript's real-turn index, so an ordinal fallback + // can silently attach an earlier turn's response to a later turn. + // Better to drop one turn's data than to attribute the wrong assistant + // text to it — refuse ordinal matches under synthesis. + const unsafeOrdinal = record.synthesized && selected.matchedBy === "ordinal"; + if (!selected.turn || unsafeOrdinal) { + tokenExtractionStatus = unsafeOrdinal + ? "no_matching_turn_synthesized_ordinal" + : "no_matching_turn"; } else { tokens = selected.turn; - if (tokens.inputTokens + tokens.outputTokens === 0) { - // Late-flush hypothesis: assistant usage may not yet be on disk. - // Sleep briefly and try once more. + // Late-flush hypothesis: assistant usage OR assistant text may not yet + // be on disk when the prior turn closes. Retry once if either is + // missing — without the response retry, turns whose usage flushed + // before text silently drop gen_ai.output.messages, leaving Sentry AI + // Conversations with only the inputs. + const usageMissing = tokens.inputTokens + tokens.outputTokens === 0; + const responseMissing = config.recordOutputs && !tokens.response; + if (usageMissing || responseMissing) { await new Promise((r) => setTimeout(r, 200)); const retry = readTranscript(record.transcriptPath); parseDegraded = retry.degraded; sessionDims = retry.session; const retrySelected = selectTurn(retry, record.currentPromptId, record.turnIndex); - if (retrySelected.turn && (retrySelected.turn.inputTokens + retrySelected.turn.outputTokens) > 0) { - tokens = retrySelected.turn; + const retryTurn = retrySelected.turn; + const retryUsageOk = retryTurn && (retryTurn.inputTokens + retryTurn.outputTokens) > 0; + const retryResponseOk = retryTurn && (!config.recordOutputs || retryTurn.response); + if (retryTurn && (usageMissing ? retryUsageOk : true) && (responseMissing ? retryResponseOk : true)) { + tokens = retryTurn; tokenExtractionStatus = "ok|matched_after_retry"; - } else { + } else if (usageMissing) { tokenExtractionStatus = "turn_had_no_usage"; + } else { + tokenExtractionStatus = "turn_had_no_response"; } } else { tokenExtractionStatus = "ok"; @@ -499,6 +520,21 @@ export function startServer( record.pendingTools.delete(key); }; + // Stop fires right after the assistant text lands in the transcript — the + // freshest moment to harvest the response. Without this, closeCurrentTurn + // only runs on the next UserPromptSubmit (or SessionEnd) and races the + // transcript flush, silently dropping gen_ai.output.messages for any turn + // where text wasn't on disk yet. closeCurrentTurn is idempotent (clears + // currentTurnSpans), so a follow-up close from UserPromptSubmit is a no-op. + const handleStop = async (event: StopEvent): Promise => { + const record = sessions.get(event.session_id); + if (!record) return; + if (event.transcript_path && !record.transcriptPath) { + record.transcriptPath = event.transcript_path; + } + await closeCurrentTurn(record); + }; + const handleSessionEnd = async (event: SessionEndEvent): Promise => { const record = sessions.get(event.session_id); if (!record) return; @@ -566,6 +602,8 @@ export function startServer( await handleSessionEnd(event); return; case "Stop": + await handleStop(event); + return; case "PreCompact": return; } diff --git a/src/types.ts b/src/types.ts index 733ff62..b76db86 100644 --- a/src/types.ts +++ b/src/types.ts @@ -153,6 +153,7 @@ export interface PreCompactEvent extends AiobsEnvelope { export interface StopEvent extends AiobsEnvelope { hook_event_name: "Stop"; session_id: string; + transcript_path?: string; } export type HookEvent = diff --git a/tests/server-lifecycle.test.ts b/tests/server-lifecycle.test.ts index 7ded350..af0be11 100644 --- a/tests/server-lifecycle.test.ts +++ b/tests/server-lifecycle.test.ts @@ -269,6 +269,52 @@ describe("server: reader integration (C1/C6)", () => { rmSync(dir, { recursive: true, force: true }); } }); + +}); + +describe("server: Stop hook closes the turn with response", () => { + let port: number; + let sentry: ReturnType; + let close: () => Promise; + + beforeEach(async () => { + port = await findFreePort(); + process.env.SENTRY_COLLECTOR_PORT = String(port); + sentry = makeFakeSentry(); + // Stop's whole job is to make output capture reliable, so recordOutputs + // must be on for the assertion to mean anything. + const cfg: ResolvedPluginConfig = { ...baseConfig, recordOutputs: true, recordInputs: true }; + const server = startServer(sentry as never, cfg, baseTags); + close = server.close; + for (let i = 0; i < 25; i++) { + try { const r = await fetch(`http://127.0.0.1:${port}/health`); if (r.ok) break; } catch { /* ignore */ } + await new Promise((r) => setTimeout(r, 50)); + } + }); + afterEach(async () => { await close(); delete process.env.SENTRY_COLLECTOR_PORT; }); + + it("Stop captures gen_ai.output.messages and SessionEnd is idempotent", async () => { + const dir = mkdtempSync(join(tmpdir(), "srv-stop-")); + const tx = join(dir, "s.jsonl"); + writeFileSync(tx, [ + JSON.stringify({ type: "user", promptId: "P1", message: { content: "hello" } }), + JSON.stringify({ type: "assistant", message: { model: "claude-opus-4-7", usage: { input_tokens: 5, output_tokens: 7 }, content: [{ type: "text", text: "hi there" }] } }), + ].join("\n"), "utf8"); + try { + await postHook(port, { hook_event_name: "SessionStart", session_id: "stop1", transcript_path: tx }); + await postHook(port, { hook_event_name: "UserPromptSubmit", session_id: "stop1", prompt: "hello", prompt_id: "P1" }); + await postHook(port, { hook_event_name: "Stop", session_id: "stop1", transcript_path: tx }); + const chat = sentry.spans.find((s) => s.op === "gen_ai.chat"); + expect(chat).toBeTruthy(); + expect(chat!.attrs["gen_ai.output.messages"]).toBe( + JSON.stringify([{ role: "assistant", content: "hi there" }]), + ); + await postHook(port, { hook_event_name: "SessionEnd", session_id: "stop1", transcript_path: tx }); + expect(sentry.spans.filter((s) => s.op === "gen_ai.chat")).toHaveLength(1); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + }); }); describe("server: per-session git cwd (C4)", () => { @@ -355,6 +401,36 @@ describe("server: lazy session synthesis (R2)", () => { rmSync(dir, { recursive: true, force: true }); } }); + + it("refuses ordinal fallback when the session is synthesized to avoid wrong-turn attribution", async () => { + const dir = mkdtempSync(join(tmpdir(), "srv-r2-ord-")); + const tx = join(dir, "s.jsonl"); + try { + // Transcript already holds two prior real turns from before the + // collector spawned. Synthesized session's local turnIndex starts at + // 0 on its first UserPromptSubmit, which would ordinal-match the + // FIRST transcript turn's response — exactly the bug this guards. + writeFileSync(tx, [ + JSON.stringify({ type: "user", promptId: "OLD1", message: { content: "old1" } }), + JSON.stringify({ type: "assistant", message: { model: "m", usage: { input_tokens: 5, output_tokens: 5 }, content: [{ type: "text", text: "OLD RESPONSE 1" }] } }), + JSON.stringify({ type: "user", promptId: "OLD2", message: { content: "old2" } }), + JSON.stringify({ type: "assistant", message: { model: "m", usage: { input_tokens: 5, output_tokens: 5 }, content: [{ type: "text", text: "OLD RESPONSE 2" }] } }), + ].join("\n"), "utf8"); + // Skip SessionStart; UserPromptSubmit with a prompt_id that does NOT + // exist in the transcript forces ordinal fallback. + await postHook(port, { hook_event_name: "UserPromptSubmit", session_id: "syn-ord", prompt: "new", prompt_id: "NEW_NOT_IN_TRANSCRIPT", _aiobs: { context: { cwd: dir } } }); + await postHook(port, { hook_event_name: "SessionEnd", session_id: "syn-ord", transcript_path: tx }); + const chat = sentry.spans.find((s) => s.op === "gen_ai.chat"); + expect(chat).toBeTruthy(); + // We must NOT have inherited OLD1's response. + expect(chat!.attrs["gen_ai.output.messages"]).toBeUndefined(); + expect(chat!.attrs["claude_code.usage_extraction.status"]).toBe( + "no_matching_turn_synthesized_ordinal", + ); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + }); }); describe("server: dropped attr + heartbeat (R3/R4)", () => {