Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .claude-plugin/marketplace.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
{
"name": "claude-code-ai-observability",
"description": "Realtime Sentry traces with per-turn tokens, cost, subagent and error instrumentation, and rich auto-tagging (session / git / host). Forked from sergical/claude-code-sentry-monitor (MIT).",
"version": "0.2.10",
"version": "0.2.11",
"author": {
"name": "Joshkop"
},
Expand Down
2 changes: 1 addition & 1 deletion .claude-plugin/plugin.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "claude-code-ai-observability",
"description": "Comprehensive AI Agent Observability plugin for Claude Code — realtime Sentry traces with per-turn tokens, cost, subagent and error instrumentation, and rich auto-tagging (session / git / host). Forked from sergical/claude-code-sentry-monitor (MIT).",
"version": "0.2.10",
"version": "0.2.11",
"author": {
"name": "Joshkop"
},
Expand Down
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,14 @@ All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).

## [0.2.11] - 2026-05-21

### Fixed

- **Sentry AI Conversations now shows every assistant output, not just the first.** Two bugs combined to drop most assistant messages from the Conversations view:
- `closeTurnSpan` previously ran only on the *next* `UserPromptSubmit` (or `SessionEnd`), racing the transcript JSONL flush. If assistant text wasn't on disk at close time, `tokens.response` was `null` and the `gen_ai.output.messages` attribute was silently skipped — and the existing 200 ms retry only triggered when usage was zero, not when text was missing. The `Stop` hook now closes the current turn (it fires right after assistant text lands, so the transcript is freshest). `closeCurrentTurn` is idempotent, so the follow-up close from UserPromptSubmit / SessionEnd is a no-op. The late-flush retry now also triggers when `recordOutputs` is on and `response` is null.
- When the collector spawned (or self-healed) mid-session, its local `turnIndex` no longer aligned with the transcript's real-turn ordinal. `selectTurn`'s ordinal fallback would silently attribute an earlier turn's response to the current turn (the "how are you" turn would surface turn 0's "Hi! How can I help today?" output). Synthesized sessions now refuse ordinal matches and record `claude_code.usage_extraction.status = no_matching_turn_synthesized_ordinal` so the gap is observable in Sentry.

## [0.2.10] - 2026-05-21

### Fixed
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "claude-code-ai-observability",
"version": "0.2.10",
"version": "0.2.11",
"description": "Comprehensive AI Agent Observability plugin for Claude Code — realtime Sentry traces with per-turn tokens, cost, subagent and error instrumentation, and rich auto-tagging (session / git / host).",
"type": "module",
"scripts": {
Expand Down
54 changes: 46 additions & 8 deletions scripts/server.js
Original file line number Diff line number Diff line change
Expand Up @@ -169,26 +169,47 @@ export function startServer(sentry, config, baseAutoTags) {
// UserPromptSubmit corresponds to exactly one real (non-sidechain,
// non-tool_result) user line.
const selected = selectTurn(result, record.currentPromptId, record.turnIndex);
if (!selected.turn) {
tokenExtractionStatus = "no_matching_turn";
// Synthesized sessions start record.turnIndex at -1 even when the
// transcript already holds prior real turns (collector spawned mid-
// session or self-healed after a restart). The local ordinal no longer
// aligns with the transcript's real-turn index, so an ordinal fallback
// can silently attach an earlier turn's response to a later turn.
// Better to drop one turn's data than to attribute the wrong assistant
// text to it — refuse ordinal matches under synthesis.
const unsafeOrdinal = record.synthesized && selected.matchedBy === "ordinal";
if (!selected.turn || unsafeOrdinal) {
tokenExtractionStatus = unsafeOrdinal
? "no_matching_turn_synthesized_ordinal"
: "no_matching_turn";
}
else {
tokens = selected.turn;
if (tokens.inputTokens + tokens.outputTokens === 0) {
// Late-flush hypothesis: assistant usage may not yet be on disk.
// Sleep briefly and try once more.
// Late-flush hypothesis: assistant usage OR assistant text may not yet
// be on disk when the prior turn closes. Retry once if either is
// missing — without the response retry, turns whose usage flushed
// before text silently drop gen_ai.output.messages, leaving Sentry AI
// Conversations with only the inputs.
const usageMissing = tokens.inputTokens + tokens.outputTokens === 0;
const responseMissing = config.recordOutputs && !tokens.response;
if (usageMissing || responseMissing) {
await new Promise((r) => setTimeout(r, 200));
const retry = readTranscript(record.transcriptPath);
parseDegraded = retry.degraded;
sessionDims = retry.session;
const retrySelected = selectTurn(retry, record.currentPromptId, record.turnIndex);
if (retrySelected.turn && (retrySelected.turn.inputTokens + retrySelected.turn.outputTokens) > 0) {
tokens = retrySelected.turn;
const retryTurn = retrySelected.turn;
const retryUsageOk = retryTurn && (retryTurn.inputTokens + retryTurn.outputTokens) > 0;
const retryResponseOk = retryTurn && (!config.recordOutputs || retryTurn.response);
if (retryTurn && (usageMissing ? retryUsageOk : true) && (responseMissing ? retryResponseOk : true)) {
tokens = retryTurn;
tokenExtractionStatus = "ok|matched_after_retry";
}
else {
else if (usageMissing) {
tokenExtractionStatus = "turn_had_no_usage";
}
else {
tokenExtractionStatus = "turn_had_no_response";
}
}
else {
tokenExtractionStatus = "ok";
Expand Down Expand Up @@ -424,6 +445,21 @@ export function startServer(sentry, config, baseAutoTags) {
span.end();
record.pendingTools.delete(key);
};
// Stop fires right after the assistant text lands in the transcript — the
// freshest moment to harvest the response. Without this, closeCurrentTurn
// only runs on the next UserPromptSubmit (or SessionEnd) and races the
// transcript flush, silently dropping gen_ai.output.messages for any turn
// where text wasn't on disk yet. closeCurrentTurn is idempotent (clears
// currentTurnSpans), so a follow-up close from UserPromptSubmit is a no-op.
const handleStop = async (event) => {
const record = sessions.get(event.session_id);
if (!record)
return;
if (event.transcript_path && !record.transcriptPath) {
record.transcriptPath = event.transcript_path;
}
await closeCurrentTurn(record);
};
const handleSessionEnd = async (event) => {
const record = sessions.get(event.session_id);
if (!record)
Expand Down Expand Up @@ -500,6 +536,8 @@ export function startServer(sentry, config, baseAutoTags) {
await handleSessionEnd(event);
return;
case "Stop":
await handleStop(event);
return;
case "PreCompact":
return;
}
Expand Down
54 changes: 46 additions & 8 deletions src/server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import type {
ResolvedPluginConfig,
SessionEndEvent,
SessionStartEvent,
StopEvent,
UserPromptSubmitEvent,
} from "./types.js";
import { reportPluginError } from "./sentry-errors.js";
Expand Down Expand Up @@ -225,23 +226,43 @@ export function startServer(
// UserPromptSubmit corresponds to exactly one real (non-sidechain,
// non-tool_result) user line.
const selected = selectTurn(result, record.currentPromptId, record.turnIndex);
if (!selected.turn) {
tokenExtractionStatus = "no_matching_turn";
// Synthesized sessions start record.turnIndex at -1 even when the
// transcript already holds prior real turns (collector spawned mid-
// session or self-healed after a restart). The local ordinal no longer
// aligns with the transcript's real-turn index, so an ordinal fallback
// can silently attach an earlier turn's response to a later turn.
// Better to drop one turn's data than to attribute the wrong assistant
// text to it — refuse ordinal matches under synthesis.
const unsafeOrdinal = record.synthesized && selected.matchedBy === "ordinal";
if (!selected.turn || unsafeOrdinal) {
tokenExtractionStatus = unsafeOrdinal
? "no_matching_turn_synthesized_ordinal"
: "no_matching_turn";
} else {
tokens = selected.turn;
if (tokens.inputTokens + tokens.outputTokens === 0) {
// Late-flush hypothesis: assistant usage may not yet be on disk.
// Sleep briefly and try once more.
// Late-flush hypothesis: assistant usage OR assistant text may not yet
// be on disk when the prior turn closes. Retry once if either is
// missing — without the response retry, turns whose usage flushed
// before text silently drop gen_ai.output.messages, leaving Sentry AI
// Conversations with only the inputs.
const usageMissing = tokens.inputTokens + tokens.outputTokens === 0;
const responseMissing = config.recordOutputs && !tokens.response;
if (usageMissing || responseMissing) {
await new Promise((r) => setTimeout(r, 200));
const retry = readTranscript(record.transcriptPath);
parseDegraded = retry.degraded;
sessionDims = retry.session;
const retrySelected = selectTurn(retry, record.currentPromptId, record.turnIndex);
if (retrySelected.turn && (retrySelected.turn.inputTokens + retrySelected.turn.outputTokens) > 0) {
tokens = retrySelected.turn;
const retryTurn = retrySelected.turn;
const retryUsageOk = retryTurn && (retryTurn.inputTokens + retryTurn.outputTokens) > 0;
const retryResponseOk = retryTurn && (!config.recordOutputs || retryTurn.response);
if (retryTurn && (usageMissing ? retryUsageOk : true) && (responseMissing ? retryResponseOk : true)) {
tokens = retryTurn;
tokenExtractionStatus = "ok|matched_after_retry";
} else {
} else if (usageMissing) {
tokenExtractionStatus = "turn_had_no_usage";
} else {
tokenExtractionStatus = "turn_had_no_response";
}
} else {
tokenExtractionStatus = "ok";
Expand Down Expand Up @@ -499,6 +520,21 @@ export function startServer(
record.pendingTools.delete(key);
};

// Stop fires right after the assistant text lands in the transcript — the
// freshest moment to harvest the response. Without this, closeCurrentTurn
// only runs on the next UserPromptSubmit (or SessionEnd) and races the
// transcript flush, silently dropping gen_ai.output.messages for any turn
// where text wasn't on disk yet. closeCurrentTurn is idempotent (clears
// currentTurnSpans), so a follow-up close from UserPromptSubmit is a no-op.
const handleStop = async (event: StopEvent): Promise<void> => {
const record = sessions.get(event.session_id);
if (!record) return;
if (event.transcript_path && !record.transcriptPath) {
record.transcriptPath = event.transcript_path;
}
await closeCurrentTurn(record);
};

const handleSessionEnd = async (event: SessionEndEvent): Promise<void> => {
const record = sessions.get(event.session_id);
if (!record) return;
Expand Down Expand Up @@ -566,6 +602,8 @@ export function startServer(
await handleSessionEnd(event);
return;
case "Stop":
await handleStop(event);
return;
case "PreCompact":
return;
}
Expand Down
1 change: 1 addition & 0 deletions src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ export interface PreCompactEvent extends AiobsEnvelope {
export interface StopEvent extends AiobsEnvelope {
hook_event_name: "Stop";
session_id: string;
transcript_path?: string;
}

export type HookEvent =
Expand Down
76 changes: 76 additions & 0 deletions tests/server-lifecycle.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,52 @@ describe("server: reader integration (C1/C6)", () => {
rmSync(dir, { recursive: true, force: true });
}
});

});

describe("server: Stop hook closes the turn with response", () => {
let port: number;
let sentry: ReturnType<typeof makeFakeSentry>;
let close: () => Promise<void>;

beforeEach(async () => {
port = await findFreePort();
process.env.SENTRY_COLLECTOR_PORT = String(port);
sentry = makeFakeSentry();
// Stop's whole job is to make output capture reliable, so recordOutputs
// must be on for the assertion to mean anything.
const cfg: ResolvedPluginConfig = { ...baseConfig, recordOutputs: true, recordInputs: true };
const server = startServer(sentry as never, cfg, baseTags);
close = server.close;
for (let i = 0; i < 25; i++) {
try { const r = await fetch(`http://127.0.0.1:${port}/health`); if (r.ok) break; } catch { /* ignore */ }
await new Promise((r) => setTimeout(r, 50));
}
});
afterEach(async () => { await close(); delete process.env.SENTRY_COLLECTOR_PORT; });

it("Stop captures gen_ai.output.messages and SessionEnd is idempotent", async () => {
const dir = mkdtempSync(join(tmpdir(), "srv-stop-"));
const tx = join(dir, "s.jsonl");
writeFileSync(tx, [
JSON.stringify({ type: "user", promptId: "P1", message: { content: "hello" } }),
JSON.stringify({ type: "assistant", message: { model: "claude-opus-4-7", usage: { input_tokens: 5, output_tokens: 7 }, content: [{ type: "text", text: "hi there" }] } }),
].join("\n"), "utf8");
try {
await postHook(port, { hook_event_name: "SessionStart", session_id: "stop1", transcript_path: tx });
await postHook(port, { hook_event_name: "UserPromptSubmit", session_id: "stop1", prompt: "hello", prompt_id: "P1" });
await postHook(port, { hook_event_name: "Stop", session_id: "stop1", transcript_path: tx });
const chat = sentry.spans.find((s) => s.op === "gen_ai.chat");
expect(chat).toBeTruthy();
expect(chat!.attrs["gen_ai.output.messages"]).toBe(
JSON.stringify([{ role: "assistant", content: "hi there" }]),
);
await postHook(port, { hook_event_name: "SessionEnd", session_id: "stop1", transcript_path: tx });
expect(sentry.spans.filter((s) => s.op === "gen_ai.chat")).toHaveLength(1);
} finally {
rmSync(dir, { recursive: true, force: true });
}
});
});

describe("server: per-session git cwd (C4)", () => {
Expand Down Expand Up @@ -355,6 +401,36 @@ describe("server: lazy session synthesis (R2)", () => {
rmSync(dir, { recursive: true, force: true });
}
});

it("refuses ordinal fallback when the session is synthesized to avoid wrong-turn attribution", async () => {
const dir = mkdtempSync(join(tmpdir(), "srv-r2-ord-"));
const tx = join(dir, "s.jsonl");
try {
// Transcript already holds two prior real turns from before the
// collector spawned. Synthesized session's local turnIndex starts at
// 0 on its first UserPromptSubmit, which would ordinal-match the
// FIRST transcript turn's response — exactly the bug this guards.
writeFileSync(tx, [
JSON.stringify({ type: "user", promptId: "OLD1", message: { content: "old1" } }),
JSON.stringify({ type: "assistant", message: { model: "m", usage: { input_tokens: 5, output_tokens: 5 }, content: [{ type: "text", text: "OLD RESPONSE 1" }] } }),
JSON.stringify({ type: "user", promptId: "OLD2", message: { content: "old2" } }),
JSON.stringify({ type: "assistant", message: { model: "m", usage: { input_tokens: 5, output_tokens: 5 }, content: [{ type: "text", text: "OLD RESPONSE 2" }] } }),
].join("\n"), "utf8");
// Skip SessionStart; UserPromptSubmit with a prompt_id that does NOT
// exist in the transcript forces ordinal fallback.
await postHook(port, { hook_event_name: "UserPromptSubmit", session_id: "syn-ord", prompt: "new", prompt_id: "NEW_NOT_IN_TRANSCRIPT", _aiobs: { context: { cwd: dir } } });
await postHook(port, { hook_event_name: "SessionEnd", session_id: "syn-ord", transcript_path: tx });
const chat = sentry.spans.find((s) => s.op === "gen_ai.chat");
expect(chat).toBeTruthy();
// We must NOT have inherited OLD1's response.
expect(chat!.attrs["gen_ai.output.messages"]).toBeUndefined();
expect(chat!.attrs["claude_code.usage_extraction.status"]).toBe(
"no_matching_turn_synthesized_ordinal",
);
} finally {
rmSync(dir, { recursive: true, force: true });
}
});
});

describe("server: dropped attr + heartbeat (R3/R4)", () => {
Expand Down
Loading