diff --git a/CHANGELOG.md b/CHANGELOG.md index 8df5ac2744..2ff8d48482 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,10 @@ ## [Unreleased] +### Fixed + +- **Chat transcript no longer renders blank (only date separators) for a session full of empty-content recovered turns (#3875).** Two compounding bugs. **Render side:** the per-message render extracted a Thinking trace only from inline ``/channel/turn tags inside `content` — it never read a message's separate `reasoning` field (kept that way deliberately by #2565). An assistant turn with *empty visible content* but a populated `reasoning` field (e.g. a run-journal-recovered anchor: empty `content` + `reasoning` + `_recovered_from_run_journal`) therefore rendered no Thinking card and collapsed to an empty hidden anchor; a session made entirely of such rows painted as nothing but `SUNDAY`/`SATURDAY` date dividers. In legacy (Compact-tool-activity-off) mode, `renderMessages()` now falls back to the message's `reasoning`/`reasoning_content` field (via `_assistantReasoningPayloadText`) when the inline pass finds nothing AND the turn has no visible content, so the reasoning surfaces as a Thinking card and the turn is never blank — scoped to empty-content turns so normal answer-bearing messages are unchanged, and legacy-only because the simplified/Worklog path already derives reasoning (with an exact-answer echo-strip) higher up. **Data side:** run-journal recovery created an empty assistant "anchor" to host recovered tool cards for a tool-first stream, but the lazy read-side retry path re-ran recovery on every `get_session()` and a tool-first stream has no text to dedup on — so each retry, and each distinct interrupted stream over a session's life, appended another empty anchor, letting a heavily-interrupted session accumulate thousands of empty rows. `ensure_assistant_anchor` now reuses an existing empty recovered anchor for the same stream instead of appending a fresh one (one anchor per stream). (#3875) + ## [v0.51.346] — 2026-06-09 — Release LJ (PWA notification controls) ### Added diff --git a/api/models.py b/api/models.py index 3abc89d762..a358e4c45a 100644 --- a/api/models.py +++ b/api/models.py @@ -1421,6 +1421,30 @@ def ensure_assistant_anchor(created_at: float | None = None) -> int: # A stream can start with tools before any text. Keep those tools # visible after restart with an empty recovered assistant anchor instead # of inventing synthetic progress prose. + # + # Dedup guard (#3875): reuse an existing empty recovered anchor for THIS + # stream instead of appending a fresh one. The lazy read-side retry path + # (_retry_journal_recovery_in_place) re-runs this recovery on repeated + # get_session() calls, and a tool-first stream that never emitted text + # has no content to dedup on (flush_assistant() returns early on empty), + # so without this guard each retry — and each distinct interrupted stream + # over the session's life — appends another empty anchor. A session that + # was interrupted-and-recovered many times then accumulates thousands of + # empty content-less assistant rows, bloating the file and (combined with + # the render path) painting the transcript blank. One anchor per stream + # is all that's needed to host its recovered tool cards. + for _existing_idx in range(len(session.messages) - 1, -1, -1): + _m = session.messages[_existing_idx] + if not isinstance(_m, dict): + continue + if ( + _m.get('_recovered_from_run_journal') + and _m.get('_recovered_stream_id') == stream_id + and _m.get('role') == 'assistant' + and not str(_m.get('content') or '').strip() + ): + current_assistant_idx = _existing_idx + return _existing_idx session.messages.append({ 'role': 'assistant', 'content': '', diff --git a/static/ui.js b/static/ui.js index 8033eb5622..31354ca6c4 100644 --- a/static/ui.js +++ b/static/ui.js @@ -8240,6 +8240,29 @@ function renderMessages(options){ seg.setAttribute('data-live-assistant','1'); } if(_ERR_MSG_RE.test(String(content||'').trim())) seg.dataset.error='1'; + // A turn whose visible content is empty but which carries a separate + // `reasoning` field (e.g. a run-journal-recovered anchor: empty content + + // reasoning + `_recovered_from_run_journal`) extracts NO inline thinkingText + // and would render no Thinking Card at all — collapsing to an empty hidden + // anchor. A session made entirely of such rows then paints blank (only date + // separators) — the #3875 reporter's exact case (Compact tool activity OFF, + // i.e. legacy mode). Surface the message's reasoning payload as the Thinking + // Card source for these empty-content turns so the turn is never blank. + // + // LEGACY-MODE ONLY (!isSimplifiedToolCalling()): the simplified/Worklog path + // already derives reasoning above (line ~8149 via + // _worklogReasoningTextFromMessage, which strips an exact visible-answer echo + // so reasoning duplicating a sibling answer is not re-shown). Repopulating the + // raw reasoning here would bypass that echo-strip and re-render the duplicate + // as a Worklog Thinking card (Codex gate catch). In legacy mode there is no + // Worklog folding, so the raw payload is the correct Thinking-card source. + // Stays OUT of the inline-content `thinkingText` extraction block (#2565) and + // only fires for empty-content/no-inline-thinking turns, so answer-bearing + // messages are unchanged. + if(!isUser&&!isSimplifiedToolCalling()&&!thinkingText&&!String(content||'').trim()&&!filesHtml&&!statusHtml){ + const _reasoningPayload=_assistantReasoningPayloadText(m); + if(_reasoningPayload) thinkingText=_reasoningPayload; + } if(thinkingText&&window._showThinking!==false){ if(isSimplifiedToolCalling()&&_assistantThinkingBelongsInWorklog(m, rawIdx, toolCallAssistantIdxs)) assistantThinking.set(rawIdx, thinkingText); else if(window._showThinking!==false) seg.insertAdjacentHTML('beforeend', _thinkingCardHtml(thinkingText)); diff --git a/tests/test_issue3875_blank_transcript_failsafe.py b/tests/test_issue3875_blank_transcript_failsafe.py index 550150d374..876e31cf86 100644 --- a/tests/test_issue3875_blank_transcript_failsafe.py +++ b/tests/test_issue3875_blank_transcript_failsafe.py @@ -87,3 +87,60 @@ def test_failsafe_preserves_collapsed_worklog_when_visible_answer_exists(): assert "assistant-segment-anchor" in failsafe # The live turn drives its own state and must be excluded from the sweep. assert "liveAssistantTurn" in failsafe + + +def test_render_surfaces_reasoning_field_for_empty_content_turn(): + """#3875 (real reporter case): an assistant turn with empty visible content but + a separate `reasoning` field must surface that reasoning as a Thinking card. + + The per-segment inline thinkingText extraction only mines /channel/turn + tags out of `content`; it must NOT read `m.reasoning` (that constraint is + enforced by #2565 — reasoning metadata stays low-priority Worklog detail, never + inline-content extraction). So a run-journal-recovered anchor (empty content + + reasoning + `_recovered_from_run_journal`) extracted no thinkingText, rendered no + Thinking card, and collapsed to an empty hidden anchor — a session of such rows + painted blank (only date separators). The fix, at the segment-emission point + (AFTER the #2565-guarded inline extraction block, in LEGACY mode only), falls + back to `_assistantReasoningPayloadText(m)` and reuses `thinkingText` ONLY when + there is no inline thinkingText AND no visible content/files/status — keeping + reasoning out of the forbidden extraction block while ensuring the turn is never + blank. Legacy-only because the simplified/Worklog path already derives reasoning + (with an exact-visible-answer echo-strip) higher up. + """ + body = _function_body(UI_JS, "renderMessages") + # The fallback exists at emission time (after the #2565-guarded inline + # extraction block), reusing thinkingText but sourced from the reasoning payload. + assert "_assistantReasoningPayloadText(m)" in body, ( + "the empty-turn path must fall back to the message's reasoning payload" + ) + # It is scoped to LEGACY mode (simplified path already derives reasoning with + # echo-strip) AND empty-content turns with no inline thinkingText, so an + # answer-bearing message's rendering is unchanged and a Worklog echo is not + # double-rendered (Codex gate catch). + assert "!isUser&&!isSimplifiedToolCalling()&&!thinkingText&&!String(content||'').trim()&&!filesHtml&&!statusHtml" in body, ( + "the reasoning fallback must be scoped to legacy-mode empty-content/no-inline-thinking turns" + ) + + +def test_reasoning_fallback_stays_out_of_inline_extraction_block_2565(): + """Guard against regressing #2565: the reasoning fallback must NOT live in the + inline-content `thinkingText` extraction block (between `let thinkingText='';` + and `const isUser=...`). That block must never reference `m.reasoning`.""" + src = UI_JS + extraction = src.split("let thinkingText='';", 1)[1].split("const isUser=m.role==='user';", 1)[0] + assert "m.reasoning" not in extraction + assert "m.reasoning_content" not in extraction + assert "_assistantReasoningPayloadText" not in extraction, ( + "the reasoning fallback must live at the segment-emission point, not inside " + "the inline-content extraction block (#2565)" + ) + + +def test_assistant_reasoning_payload_reads_reasoning_fields(): + """The fallback relies on `_assistantReasoningPayloadText` reading the message's + `reasoning` / `reasoning_content` fields (not just inline content tags).""" + payload_fn = _function_body(UI_JS, "_assistantReasoningPayloadText") + # Reads the direct reasoning fields off the message object. + assert "m.reasoning_content||m.reasoning||m.thinking||m._reasoning" in payload_fn + + diff --git a/tests/test_issue3875_recovery_anchor_dedup.py b/tests/test_issue3875_recovery_anchor_dedup.py new file mode 100644 index 0000000000..c880efaceb --- /dev/null +++ b/tests/test_issue3875_recovery_anchor_dedup.py @@ -0,0 +1,130 @@ +"""Regression tests for #3875 — blank transcript from accumulated empty recovery anchors. + +A session that was interrupted-and-recovered many times accumulated thousands of +empty-content assistant rows tagged ``_recovered_from_run_journal``. Each was an +"anchor" created by ``_append_journaled_partial_output`` / ``ensure_assistant_anchor`` +to host recovered tool cards for a tool-first stream (one that emitted tools before +any visible text). Because a tool-first stream has no text to dedup on, the read-side +lazy-retry path re-created a fresh empty anchor on every retry, and every distinct +interrupted stream added its own — so the session filled with empty rows. Combined +with the render path (which dropped empty-content reasoning-only rows), the transcript +painted blank (only date separators). + +This file covers the DATA side: the anchor-dedup guard so recovery reuses a single +empty anchor per stream instead of appending an unbounded run of them. The render-side +fix (surface the message's `reasoning` field so an empty-content turn never paints +blank) is covered by tests/test_issue3875_blank_transcript_failsafe.py. +""" +from __future__ import annotations + +import pytest + +import api.profiles as profiles +from api.models import Session, _append_journaled_partial_output +from api.run_journal import append_run_event + + +@pytest.fixture +def hermes_home(tmp_path, monkeypatch): + # Mirror tests/test_session_lost_response_regression.py — isolate HERMES_HOME + # so Session.save() + run-journal writes land in a throwaway sandbox. + home = tmp_path / "hermes_home" + home.mkdir() + (home / "sessions").mkdir() + monkeypatch.setenv("HERMES_HOME", str(home)) + monkeypatch.setattr(profiles, "_DEFAULT_HERMES_HOME", home) + return home + + +def _tool_first_journal(session_id: str, stream_id: str) -> None: + """Write a run journal for a stream that emitted a tool BEFORE any text — + the shape that forces an empty assistant anchor on recovery.""" + append_run_event(session_id, stream_id, "tool", {"name": "search_files", "preview": "q=x"}) + append_run_event(session_id, stream_id, "tool_complete", {"name": "search_files", "preview": "done"}) + + +def test_tool_first_recovery_creates_single_empty_anchor(hermes_home): + sid = "issue3875_anchor" + stream_id = "stream-A" + _tool_first_journal(sid, stream_id) + s = Session(session_id=sid, title="repro", messages=[{"role": "user", "content": "go"}]) + + # First recovery: one empty anchor is created to host the recovered tool card. + assert _append_journaled_partial_output(s, stream_id, dedupe_existing=True) is True + anchors = [ + m for m in s.messages + if isinstance(m, dict) + and m.get("_recovered_from_run_journal") + and m.get("role") == "assistant" + and not str(m.get("content") or "").strip() + ] + assert len(anchors) == 1, "first recovery should create exactly one empty anchor" + + +def test_repeated_recovery_does_not_accumulate_empty_anchors(hermes_home): + """The #3875 root cause: re-running recovery for the SAME stream must reuse the + existing empty anchor, not pile up a fresh one each time.""" + sid = "issue3875_repeat" + stream_id = "stream-B" + _tool_first_journal(sid, stream_id) + s = Session(session_id=sid, title="repro", messages=[{"role": "user", "content": "go"}]) + + for _ in range(20): + _append_journaled_partial_output(s, stream_id, dedupe_existing=True) + + anchors = [ + m for m in s.messages + if isinstance(m, dict) + and m.get("_recovered_from_run_journal") + and m.get("role") == "assistant" + and not str(m.get("content") or "").strip() + ] + assert len(anchors) == 1, ( + f"repeated recovery for one stream must reuse a single empty anchor, " + f"got {len(anchors)} (the unbounded-accumulation bug)" + ) + + +def test_distinct_streams_get_distinct_anchors(hermes_home): + """Dedup is scoped per stream — two genuinely different interrupted streams + each keep their own anchor (we are not collapsing unrelated turns).""" + sid = "issue3875_multi" + s = Session(session_id=sid, title="repro", messages=[{"role": "user", "content": "go"}]) + for stream_id in ("stream-X", "stream-Y", "stream-Z"): + _tool_first_journal(sid, stream_id) + # run each twice to prove per-stream reuse on top of per-stream distinctness + _append_journaled_partial_output(s, stream_id, dedupe_existing=True) + _append_journaled_partial_output(s, stream_id, dedupe_existing=True) + + anchors = [ + m for m in s.messages + if isinstance(m, dict) + and m.get("_recovered_from_run_journal") + and m.get("role") == "assistant" + and not str(m.get("content") or "").strip() + ] + stream_ids = {m.get("_recovered_stream_id") for m in anchors} + assert len(anchors) == 3, f"expected one anchor per distinct stream, got {len(anchors)}" + assert stream_ids == {"stream-X", "stream-Y", "stream-Z"} + + +def test_text_bearing_recovery_still_appends_real_content(hermes_home): + """The dedup guard must not suppress recovery of genuine visible text — a + stream that emitted tokens still produces a content-bearing recovered row.""" + sid = "issue3875_text" + stream_id = "stream-T" + append_run_event(sid, stream_id, "token", {"text": "Hello "}) + append_run_event(sid, stream_id, "token", {"text": "world"}) + append_run_event(sid, stream_id, "done", {}) + s = Session(session_id=sid, title="repro", messages=[{"role": "user", "content": "go"}]) + + assert _append_journaled_partial_output(s, stream_id, dedupe_existing=True) is True + recovered_text = [ + m for m in s.messages + if isinstance(m, dict) + and m.get("_recovered_from_run_journal") + and str(m.get("content") or "").strip() + ] + assert any("Hello world" in m["content"] for m in recovered_text), ( + "token-bearing recovery must still append the visible assistant text" + )