diff --git a/CHANGELOG.md b/CHANGELOG.md
index 11ccf0829f..de579c4bcb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -26,6 +26,15 @@
## [v0.51.289] — 2026-06-06 — Release JE (hotfix — sidebar ReferenceError #3696 + scope-undef prevention gate)
### Fixed
+- **Live stream turns show the bottom timer immediately after starting.** The
+ first chat-start path now places the live footer timer as soon as the backend
+ returns `stream_id` and `pending_started_at`, instead of only restoring it
+ after a session switch or reconnect. Reloading a currently running session
+ also recreates the live worklog shell after the transcript DOM rebuild, so the
+ stream no longer stays invisible until you switch away and back. Live replay
+ cursors now stay tied to each queued SSE frame instead of the latest global
+ stream event, so reconnects cannot skip queued live output. (#3401,
+ @franksong2702)
- **Sidebar no longer crashes with `ReferenceError: _sessionAttentionState is not defined`.** The session-attention helper was declared *inside* `renderSessionListFromCache()` and relied on function hoisting, but the top-level `_sidebarRowHasVisibleMessages` (reached via `renderSessionListFromCache` → `_partitionSidebarSessionRows`) called it bare — and hoisting is scoped to the enclosing function, so every sidebar cache-render threw and the session list went blank. `_sessionAttentionState` is now a top-level function reachable by both call sites. Regressed in #3672 (v0.51.269). (#3696)
- **Stale-stream terminal events no longer risk a `ReferenceError: source is not defined`.** `_bailOutOfTerminalEventsFromStaleStream` (declared inside `attachLiveStream`) called `_closeSource(source)` against a `source` that was not in its lexical scope — it would have thrown on the late-finalizing-stream path when the user is back in an active session. `source` is now threaded as an explicit parameter. Found by the new scope gate below during review. (#3696)
diff --git a/DESIGN.md b/DESIGN.md
index bafbb6bd69..81593d6fb4 100644
--- a/DESIGN.md
+++ b/DESIGN.md
@@ -150,6 +150,14 @@ A tool card is a debug event row, not a chat message. Show icon, name, short tar
Same visual family as tool-call metadata. They should be quieter than assistant prose and should not use bright tinted full cards unless the user expands them.
+Automatic compression follows a quiet live-only divider treatment rather than a
+tool-card row. Use `Compressing context` for the active barrier and
+`Context auto-compressed` after continuation/completion; render both as centered
+non-interactive text with horizontal rules. Do not give it a caret, click
+target, distinct accent color, special leading dot, or separate card identity.
+Once the final answer is settled, omit the live-only compression row unless it is
+needed to explain a visible recovery or error state.
+
### Composer
The composer is the command surface. Keep it legible and focused: modest radius, subtle border, transparent inactive chips, no theatrical hover scaling.
diff --git a/api/config.py b/api/config.py
index b1acc0c812..0b3230c7ef 100644
--- a/api/config.py
+++ b/api/config.py
@@ -4871,8 +4871,13 @@ def __init__(self):
self._lock = threading.Lock()
self._subscribers: list[queue.Queue] = []
self._offline_buffer: list[tuple[str, object]] = []
+ self._last_event_id: str | None = None
def subscribe(self) -> queue.Queue:
+ q, _snapshot = self.subscribe_with_snapshot()
+ return q
+
+ def subscribe_with_snapshot(self) -> tuple[queue.Queue, dict[str, object]]:
q: queue.Queue = queue.Queue()
with self._lock:
# Replay buffered events to the new subscriber INSIDE the lock so a
@@ -4882,8 +4887,12 @@ def subscribe(self) -> queue.Queue:
# is safe. Per Opus advisor on stage-292.
for item in self._offline_buffer:
q.put_nowait(item)
+ snapshot = {
+ "offline_buffered_events": len(self._offline_buffer),
+ "last_event_id": self._last_event_id,
+ }
self._subscribers.append(q)
- return q
+ return q, snapshot
def unsubscribe(self, q: queue.Queue) -> None:
with self._lock:
@@ -4892,8 +4901,18 @@ def unsubscribe(self, q: queue.Queue) -> None:
except ValueError:
pass
- def put_nowait(self, item: tuple[str, object]) -> None:
+ def note_last_event_id(self, event_id: str | None) -> None:
+ """Record the latest journal event id without changing the queue shape."""
+ if not event_id:
+ return
+ with self._lock:
+ self._last_event_id = event_id
+
+ def put_nowait(self, item: tuple[str, object] | tuple[str, object, str | None]) -> None:
+ event_id = item[2] if len(item) >= 3 else None
with self._lock:
+ if event_id:
+ self._last_event_id = event_id
subscribers = list(self._subscribers)
if not subscribers:
self._offline_buffer.append(item)
@@ -4902,7 +4921,7 @@ def put_nowait(self, item: tuple[str, object]) -> None:
for q in subscribers:
q.put_nowait(item)
- def diagnostic_snapshot(self) -> dict[str, int]:
+ def diagnostic_snapshot(self) -> dict[str, object]:
"""Return non-sensitive stream observation counters for health checks."""
with self._lock:
return {
diff --git a/api/gateway_chat.py b/api/gateway_chat.py
index 7b5a2497d5..342dbc0dfb 100644
--- a/api/gateway_chat.py
+++ b/api/gateway_chat.py
@@ -224,6 +224,7 @@ def _run_gateway_chat_streaming(
def put_gateway_event(event, data):
if cancel_event.is_set() and event not in ("cancel", "error", "apperror"):
return
+ event_id = None
if run_journal is not None:
try:
journaled = run_journal.append_sse_event(event, data)
@@ -232,8 +233,14 @@ def put_gateway_event(event, data):
STREAM_LAST_EVENT_ID[stream_id] = event_id
except Exception:
logger.debug("Failed to append gateway event %s for stream %s", event, stream_id, exc_info=True)
+ if event_id and hasattr(q, "note_last_event_id"):
+ try:
+ q.note_last_event_id(event_id)
+ except Exception:
+ logger.debug("Failed to note gateway event_id %s for stream %s", event_id, stream_id, exc_info=True)
try:
- q.put_nowait((event, data))
+ queue_item = (event, data, event_id) if event_id and hasattr(q, "subscribe_with_snapshot") else (event, data)
+ q.put_nowait(queue_item)
except Exception:
logger.debug("Failed to put gateway event to queue")
diff --git a/api/routes.py b/api/routes.py
index ece4b8bddf..deac7e825f 100644
--- a/api/routes.py
+++ b/api/routes.py
@@ -2987,8 +2987,8 @@ def _keep_latest_messaging_session_per_source(
get_state_db_session_messages,
get_state_db_session_summary,
merge_session_messages_append_only,
- _session_message_merge_key,
_active_stream_ids,
+ _session_message_merge_key,
_is_empty_partial_activity_message,
_hide_from_default_sidebar,
prune_session_from_index,
@@ -8516,7 +8516,14 @@ def _parse_run_journal_after_seq(qs: dict, stream_id: str | None = None) -> int
return 0
-def _replay_run_journal(handler, stream_id: str, after_seq: int | None) -> bool:
+def _replay_run_journal(
+ handler,
+ stream_id: str,
+ after_seq: int | None,
+ *,
+ max_seq: int | None = None,
+ include_stale: bool = True,
+) -> bool:
summary = find_run_summary(stream_id)
if not summary:
return False
@@ -8524,6 +8531,7 @@ def _replay_run_journal(handler, stream_id: str, after_seq: int | None) -> bool:
str(summary.get("session_id") or ""),
stream_id,
after_seq=after_seq,
+ max_seq=max_seq,
)
for entry in journal.get("events") or []:
_sse_with_id(
@@ -8532,7 +8540,7 @@ def _replay_run_journal(handler, stream_id: str, after_seq: int | None) -> bool:
entry.get("payload"),
entry.get("event_id"),
)
- if not summary.get("terminal"):
+ if include_stale and not summary.get("terminal"):
stale = stale_interrupted_event(
str(summary.get("session_id") or ""),
stream_id,
@@ -8543,6 +8551,13 @@ def _replay_run_journal(handler, stream_id: str, after_seq: int | None) -> bool:
return True
+def _run_journal_same_run_seq(event_id: str | None, stream_id: str) -> int | None:
+ event_run_id, event_seq = _parse_run_journal_event_id(event_id)
+ if event_run_id != stream_id:
+ return None
+ return event_seq
+
+
def _runner_stream_cursor_from_query(qs: dict) -> str | None:
cursor = str(qs.get("cursor", [""])[0] or "").strip()
if cursor:
@@ -8660,26 +8675,58 @@ def _handle_sse_stream(handler, parsed):
except _CLIENT_DISCONNECT_ERRORS:
pass
return True
- subscriber = stream.subscribe() if hasattr(stream, "subscribe") else stream
+ if hasattr(stream, "subscribe_with_snapshot"):
+ subscriber, stream_snapshot = stream.subscribe_with_snapshot()
+ else:
+ subscriber = stream.subscribe() if hasattr(stream, "subscribe") else stream
+ stream_snapshot = {}
handler.send_response(200)
handler.send_header("Content-Type", "text/event-stream; charset=utf-8")
handler.send_header("Cache-Control", "no-cache")
handler.send_header("X-Accel-Buffering", "no")
handler.send_header("Connection", "close")
handler.end_headers()
+ replay_cutoff_seq = None
+ if qs.get("replay", [""])[0] or qs.get("after_seq", [None])[0] not in (None, "") or qs.get("after_event_id", [None])[0]:
+ snapshot_cutoff_seq = _run_journal_same_run_seq(
+ str(stream_snapshot.get("last_event_id") or ""),
+ stream_id,
+ )
+ try:
+ replayed = _replay_run_journal(
+ handler,
+ stream_id,
+ _parse_run_journal_after_seq(qs, stream_id),
+ max_seq=snapshot_cutoff_seq,
+ include_stale=False,
+ )
+ if replayed:
+ replay_cutoff_seq = snapshot_cutoff_seq
+ except _CLIENT_DISCONNECT_ERRORS:
+ raise
+ except Exception:
+ logger.debug("Failed to replay active run journal for stream %s", stream_id, exc_info=True)
try:
while True:
try:
- event, data = subscriber.get(timeout=_SSE_HEARTBEAT_INTERVAL_SECONDS)
+ item = subscriber.get(timeout=_SSE_HEARTBEAT_INTERVAL_SECONDS)
except queue.Empty:
handler.wfile.write(b": heartbeat\n\n")
handler.wfile.flush()
continue
+ if len(item) >= 3:
+ event, data, queued_event_id = item[0], item[1], item[2]
+ else:
+ event, data = item
+ queued_event_id = STREAM_LAST_EVENT_ID.get(stream_id)
# Stage-364: emit `id:` from STREAM_LAST_EVENT_ID side-channel so
# the frontend's `_lastRunJournalSeq` cursor advances during live
# streaming. Without this, mid-stream error→replay would arrive
# with after_seq=0 and double-render every journaled event.
- event_id = STREAM_LAST_EVENT_ID.get(stream_id)
+ event_id = queued_event_id or STREAM_LAST_EVENT_ID.get(stream_id)
+ event_seq = _run_journal_same_run_seq(event_id, stream_id)
+ if replay_cutoff_seq is not None and event_seq is not None and event_seq <= replay_cutoff_seq:
+ continue
if event_id:
_sse_with_id(handler, event, data, event_id)
else:
@@ -10943,6 +10990,41 @@ def _is_hidden_empty_session(s) -> bool:
)
+def _active_stream_blocks_chat_start(session, stream_id: str | None) -> bool:
+ """Return whether an active_stream_id still owns this session's next turn.
+
+ ``active_stream_id`` is written before the SSE channel is registered, so a
+ very fresh pending turn must also block duplicate chat_start requests. If we
+ only check STREAMS here, a second request can race through the registration
+ gap and overwrite the sidecar owner.
+ """
+ if not stream_id:
+ return False
+ with STREAMS_LOCK:
+ if stream_id in STREAMS:
+ return True
+ try:
+ from api import config as _live_config
+ with _live_config.ACTIVE_RUNS_LOCK:
+ if stream_id in (_live_config.ACTIVE_RUNS or {}):
+ return True
+ except Exception:
+ pass
+ if getattr(session, "pending_user_message", None):
+ try:
+ from api.models import _REPAIR_STALE_PENDING_GRACE_SECONDS
+ grace_seconds = float(_REPAIR_STALE_PENDING_GRACE_SECONDS)
+ except Exception:
+ grace_seconds = 30.0
+ try:
+ pending_started_at = float(getattr(session, "pending_started_at", None) or 0)
+ except Exception:
+ pending_started_at = 0.0
+ if pending_started_at and time.time() - pending_started_at < grace_seconds:
+ return True
+ return False
+
+
def _start_chat_stream_for_session(
s,
*,
@@ -10963,10 +11045,7 @@ def _start_chat_stream_for_session(
diag.stage("active_stream_check") if diag else None
current_stream_id = getattr(s, "active_stream_id", None)
if current_stream_id:
- diag.stage("active_stream_lock_wait") if diag else None
- with STREAMS_LOCK:
- current_active = current_stream_id in STREAMS
- if current_active:
+ if _active_stream_blocks_chat_start(s, current_stream_id):
diag.stage("response_write") if diag else None
return {
"error": "session already has an active stream",
@@ -10984,21 +11063,45 @@ def _start_chat_stream_for_session(
goal_related = True
PENDING_GOAL_CONTINUATION.discard(s.session_id)
- stream_id = uuid.uuid4().hex
session_lock = _get_session_agent_lock(s.session_id)
diag.stage("session_lock_wait") if diag else None
- with session_lock:
- diag.stage("save_pending_state") if diag else None
- was_hidden_empty_session = _is_hidden_empty_session(s)
- _prepare_chat_start_session_for_stream(
- s,
- msg=msg,
- attachments=attachments,
- workspace=workspace,
- model=model,
- model_provider=model_provider,
- stream_id=stream_id,
- )
+ while True:
+ with session_lock:
+ locked_stream_id = getattr(s, "active_stream_id", None)
+ if locked_stream_id:
+ if _active_stream_blocks_chat_start(s, locked_stream_id):
+ diag.stage("response_write") if diag else None
+ return {
+ "error": "session already has an active stream",
+ "active_stream_id": locked_stream_id,
+ "_status": 409,
+ }
+ needs_stale_cleanup = True
+ else:
+ needs_stale_cleanup = False
+ stream_id = uuid.uuid4().hex
+ diag.stage("save_pending_state") if diag else None
+ was_hidden_empty_session = _is_hidden_empty_session(s)
+ _prepare_chat_start_session_for_stream(
+ s,
+ msg=msg,
+ attachments=attachments,
+ workspace=workspace,
+ model=model,
+ model_provider=model_provider,
+ stream_id=stream_id,
+ )
+ break
+ if needs_stale_cleanup:
+ diag.stage("stale_stream_cleanup") if diag else None
+ cleared = _clear_stale_stream_state(s)
+ if not cleared and getattr(s, "active_stream_id", None):
+ diag.stage("response_write") if diag else None
+ return {
+ "error": "session already has an active stream",
+ "active_stream_id": getattr(s, "active_stream_id", None),
+ "_status": 409,
+ }
if was_hidden_empty_session:
publish_session_list_changed("session_new", profile=getattr(s, "profile", None))
diag.stage("turn_journal_submitted") if diag else None
diff --git a/api/run_journal.py b/api/run_journal.py
index 9e1e9f0d7a..b7526797cc 100644
--- a/api/run_journal.py
+++ b/api/run_journal.py
@@ -199,12 +199,15 @@ def read_run_events(
run_id: str,
*,
after_seq: int | None = None,
+ max_seq: int | None = None,
session_dir: Path | None = None,
) -> dict:
path = _run_path(session_id, run_id, session_dir=session_dir)
events, malformed = _read_jsonl(path)
if after_seq is not None:
events = [event for event in events if int(event.get("seq") or 0) > int(after_seq)]
+ if max_seq is not None:
+ events = [event for event in events if int(event.get("seq") or 0) <= int(max_seq)]
return {
"session_id": str(session_id),
"run_id": str(run_id),
diff --git a/api/streaming.py b/api/streaming.py
index c3adda48ad..9f53d044d2 100644
--- a/api/streaming.py
+++ b/api/streaming.py
@@ -341,8 +341,12 @@ def _clarify_timeout_seconds(default: int = 120) -> int:
_WEBUI_PROGRESS_PROMPT = """
WebUI progress guidance:
-- Match the normal Hermes messaging style; do not add extra status updates solely because this is a browser session.
-- For long multi-step work that uses tools, you may provide brief user-visible progress updates before continuing with tool calls.
+- Match the normal Hermes messaging style, but do not let long tool-running WebUI turns appear silent.
+- For long multi-step work that uses tools, emit brief user-visible progress updates as normal assistant content, not only as hidden reasoning.
+- Before the first tool batch in a long task, say what you are about to inspect.
+- After each meaningful batch of tool calls, say what you just confirmed and what you will check next before continuing with more tools.
+- Do not run many independent tool batches back-to-back without visible assistant text between them when the task is still ongoing.
+- Do not keep progress only in reasoning, thinking, or tool-result channels; those are not a substitute for visible interim updates.
- Each update should say what you are about to check, what you just confirmed, or why the next tool call is needed.
- Keep updates concise, factual, and in the user's language. One or two short sentences are enough.
- Do not reveal hidden reasoning, chain-of-thought, private scratchpads, secrets, raw logs, or long tool output.
@@ -2983,6 +2987,37 @@ def _deduplicate_context_messages(messages):
return deduped
+def _prune_context_tool_results_after_compression(agent, context_messages):
+ """Run the active compressor's cheap tool-result pruning on model context.
+
+ Auto-compression can happen mid-turn and then the agent may run more tools
+ before producing the final answer. Those completed tail tool results are
+ model-facing context, but they were produced after the compression pass and
+ therefore did not go through the compressor's tool-output pruning. Apply the
+ same cheap pruning once more after a confirmed compression event. This keeps
+ the visible transcript untouched while preventing the next turn from seeing
+ raw post-compression tool dumps.
+ """
+ if not context_messages:
+ return context_messages
+ compressor = getattr(agent, 'context_compressor', None)
+ prune = getattr(compressor, '_prune_old_tool_results', None)
+ if not callable(prune):
+ return context_messages
+ try:
+ pruned_messages, pruned_count = prune(
+ copy.deepcopy(context_messages),
+ protect_tail_count=getattr(compressor, 'protect_last_n', 20),
+ protect_tail_tokens=getattr(compressor, 'tail_token_budget', None),
+ )
+ except Exception:
+ logger.debug("post-compression context tool-result pruning failed", exc_info=True)
+ return context_messages
+ if not pruned_count:
+ return context_messages
+ return _deduplicate_context_messages(pruned_messages)
+
+
def _restore_reasoning_metadata(previous_messages, updated_messages):
"""Carry forward display-only metadata lost during API-safe history sanitization.
@@ -4666,24 +4701,28 @@ def put(event, data):
# If cancelled, drop all further events except the cancel event itself
if cancel_event.is_set() and event not in ('cancel', 'error'):
return
+ event_id = None
if run_journal is not None:
try:
journaled = run_journal.append_sse_event(event, data)
- # Stage-364: propagate journal event_id via a side-channel dict
- # (STREAM_LAST_EVENT_ID) instead of changing the queue tuple
- # shape — keeping the 2-tuple shape preserves backward
- # compatibility for tests and any non-SSE queue consumer. The
- # SSE handler reads this dict at emit time to populate `id:`
- # on every live frame, which lets the frontend's cursor
- # advance during live streaming and prevents replay from
- # double-rendering tokens after a mid-stream error→reconnect.
+ # Carry the exact journal id for this queued frame. A global
+ # "latest event" side channel is still kept for legacy queues,
+ # but StreamChannel subscribers need the per-item id so a
+ # queued backlog cannot advance the browser cursor past an
+ # undelivered event.
event_id = (journaled or {}).get('event_id') if isinstance(journaled, dict) else None
if event_id:
STREAM_LAST_EVENT_ID[stream_id] = event_id
except Exception:
logger.debug("Failed to append run journal event %s for stream %s", event, stream_id, exc_info=True)
+ if event_id and hasattr(q, "note_last_event_id"):
+ try:
+ q.note_last_event_id(event_id)
+ except Exception:
+ logger.debug("Failed to note event_id %s for stream %s", event_id, stream_id, exc_info=True)
try:
- q.put_nowait((event, data))
+ queue_item = (event, data, event_id) if event_id and hasattr(q, "subscribe_with_snapshot") else (event, data)
+ q.put_nowait(queue_item)
except Exception:
logger.debug("Failed to put event to queue")
@@ -4711,7 +4750,7 @@ def _agent_status_callback(kind, message):
if _is_compression_start:
put('compressing', {
'session_id': session_id,
- 'message': 'Auto-compressing context to continue...',
+ 'message': 'Compressing context',
})
return
# Pass through rate-limit and fallback messages so the frontend can
@@ -6384,6 +6423,10 @@ def _periodic_checkpoint():
_compressed = True
# Notify the frontend that compression happened
if _compressed:
+ s.context_messages = _prune_context_tool_results_after_compression(
+ agent,
+ s.context_messages,
+ )
visible_after = visible_messages_for_anchor(s.messages, auto_compression=True)
# Find the LAST [CONTEXT COMPACTION] marker in s.messages
# and count visible messages before it. This is the correct
@@ -6447,7 +6490,7 @@ def _periodic_checkpoint():
'old_session_id': _compression_origin_session_id,
'new_session_id': _compression_continuation_session_id,
'continuation_session_id': _compression_continuation_session_id,
- 'message': 'Context auto-compressed to continue the conversation',
+ 'message': 'Compression finished',
'usage': _live_usage_snapshot(),
})
@@ -7869,6 +7912,12 @@ def cancel_stream(stream_id: str) -> bool:
logger.debug("Failed to clear session state on cancel for %s", _cancel_session_id)
if _emit_cancel_event and q:
+ _cancel_event_id = STREAM_LAST_EVENT_ID.get(stream_id)
+ if _cancel_event_id and hasattr(q, "note_last_event_id"):
+ try:
+ q.note_last_event_id(_cancel_event_id)
+ except Exception:
+ logger.debug("Failed to note cancel event_id %s for stream %s", _cancel_event_id, stream_id, exc_info=True)
try:
q.put_nowait(('cancel', {'message': 'Cancelled by user'}))
except Exception:
diff --git a/docs/UIUX-GUIDE.md b/docs/UIUX-GUIDE.md
index 63f66f9c0d..5791fe3bca 100644
--- a/docs/UIUX-GUIDE.md
+++ b/docs/UIUX-GUIDE.md
@@ -79,6 +79,15 @@ not raw debug detail. Compact Activity may collapse tool arguments, long tool
results, and low-level reasoning detail, but it must not make concise
user-visible progress text available only inside a collapsed disclosure.
+Automatic compression is a live-only context barrier, not a special branded
+tool card. Render it as a centered, non-interactive divider with quiet horizontal
+rules: `Compressing context` while the compression barrier is active and
+`Context auto-compressed` when the agent has continued or the compression
+completion event arrives. Do not give it a caret, click target, leading status
+dot, or standalone running badge. In settled final history, remove live-only
+automatic compression rows unless they explain a visible recovery or error
+state.
+
The existing two-stage proposal in `docs/ui-ux/two-stage-proposal.html` records a
compatible direction for long turns: live work can be grouped as a worklog, then
settled history can collapse while the final answer reads as the calm
diff --git a/docs/rfcs/webui-run-state-consistency-contract.md b/docs/rfcs/webui-run-state-consistency-contract.md
index 9fa365cdda..6a3c839fb6 100644
--- a/docs/rfcs/webui-run-state-consistency-contract.md
+++ b/docs/rfcs/webui-run-state-consistency-contract.md
@@ -89,6 +89,16 @@ while WebUI still has multiple overlapping state stores.
reference cards are recovery/handoff material. They must not be treated as a
new user request, active-turn content, or the default visible explanation for
the current answer.
+ Automatic compression may appear during a live turn only as a quiet,
+ non-interactive context divider in the Worklog timeline, not as a clickable
+ tool row. It should use action wording: `Compressing context` while active
+ and `Context auto-compressed` when the agent has continued past the
+ compression barrier or when a completion event arrives. The timer is
+ diagnostic detail, not the source of truth for the divider's running state.
+ Later tool, reasoning, or interim assistant events prove the compression
+ barrier has passed even if no explicit completion event was delivered.
+ Settled final history should omit live-only automatic-compression rows unless
+ there is a user-visible recovery or error state to explain.
7. **Observation has a degraded path.** Long-running or many-session observation
should expose enough heartbeat/degraded status that the UI does not appear
silent and ordinary APIs do not stall behind active streams.
diff --git a/static/index.html b/static/index.html
index dc8c8725e1..c91eac8015 100644
--- a/static/index.html
+++ b/static/index.html
@@ -419,6 +419,7 @@
`;
+ return row;
}
function _compressionCardsNode(state){
const wrap=document.createElement('div');
@@ -6384,6 +6985,37 @@ function appendLiveCompressionCard(state){
const inner=_assistantTurnBlocks(turn);
if(!inner) return false;
closeCurrentLiveActivityGroup();
+ if(state.automatic){
+ const group=ensureLiveWorklogContainer(inner,{activityKey:_activityKeyForLiveTurn()});
+ const list=_toolWorklogListEl(group);
+ if(!group||!list) return false;
+ const node=_autoCompressionWorklogNode(state);
+ node.setAttribute('data-live-compression-card','1');
+ node.setAttribute('data-compression-phase',String(state.phase||''));
+ if(state.phase==='running'){
+ const started=_compressionElapsedStartedAt(state)||Date.now()/1000;
+ node.setAttribute('data-compression-started-at',String(started));
+ node.setAttribute('data-compression-message',String(state.message||'Compressing context'));
+ _startCompressionElapsedTimer();
+ } else {
+ node.removeAttribute('data-compression-started-at');
+ node.removeAttribute('data-compression-message');
+ const _activeCompState = _compressionStateForCurrentSession();
+ if (!_activeCompState || !_activeCompState.automatic || _activeCompState.phase !== 'running') {
+ _clearCompressionElapsedTimer();
+ }
+ }
+ const existingRunning=group.querySelector('[data-live-compression-card="1"][data-compression-started-at]');
+ const existingDone=Array.from(group.querySelectorAll('[data-live-compression-card="1"][data-compression-phase="done"]')).pop();
+ const existing=state.phase==='running'?existingRunning:(existingRunning||existingDone);
+ if(existing) existing.replaceWith(node);
+ else list.appendChild(node);
+ _syncToolCallGroupSummary(group);
+ _moveLiveRunStatusToTurnEnd();
+ _restoreMessageScrollSnapshotSameFrame(scrollSnapshot);
+ if(typeof scrollIfPinned==='function') scrollIfPinned();
+ return true;
+ }
const node=_compressionCardsNode(state);
if(!node) return false;
node.setAttribute('data-live-compression-card','1');
@@ -6920,6 +7552,36 @@ function _cliToolCardHasDiffSnippet(resultSnippet, patchSnippet){
return !!patchSnippet || _cliLooksLikePatchDiff(resultSnippet);
}
+function _assistantToolAnchorIdxForMessage(messages, rawIdx){
+ const list=Array.isArray(messages)?messages:[];
+ const current=list[rawIdx];
+ if(_assistantMessageHasVisibleContent(current)) return rawIdx;
+ if(_assistantReasoningPayloadText(current)) return rawIdx;
+ for(let idx=rawIdx-1;idx>=0;idx--){
+ if(_assistantMessageHasVisibleContent(list[idx])) return idx;
+ }
+ return rawIdx;
+}
+function _toolArgsSnapshot(args, limit){
+ if(!args||typeof args!=='object'||Array.isArray(args)) return {};
+ const max=Math.max(1,Number(limit)||6);
+ const priority=[
+ 'query','search_query','searchQuery','pattern','q','keyword','keywords','term',
+ 'url','uri','command','cmd','path','file','file_path','filename','file_glob',
+ 'glob','offset','limit',
+ ];
+ const keys=[
+ ...priority.filter(k=>Object.prototype.hasOwnProperty.call(args,k)),
+ ...Object.keys(args).filter(k=>!priority.includes(k)),
+ ].slice(0,max);
+ const out={};
+ keys.forEach(k=>{
+ const v=String(args[k]);
+ out[k]=v.slice(0,120)+(v.length>120?'...':'');
+ });
+ return out;
+}
+
function _captureMessageScrollSnapshot(){
const el=$('messages');
if(!el) return null;
@@ -6974,8 +7636,8 @@ function _scrollAfterMessageRender(preserveScroll, scrollSnapshot){
// pinned users stay at bottom; users who manually scrolled up get their
// pre-render scrollTop restored after the DOM replacement.
if(preserveScroll){
- if(_scrollPinned) scrollIfPinned();
- else _restoreMessageScrollSnapshot(scrollSnapshot);
+ if(_followMessagesAfterDomReplace()) return;
+ _restoreMessageScrollSnapshot(scrollSnapshot);
return;
}
if(S.activeStreamId){
@@ -6991,6 +7653,10 @@ function renderMessages(options){
const inner=$('msgInner');
const sid=S.session?S.session.session_id:null;
const msgCount=S.messages.length;
+ // During session switch, S.messages is intentionally cleared while the full
+ // message fetch is still in flight. Other async updates can still call
+ // renderMessages() in this window. Keep the existing loading placeholder.
+ if(_loadingSessionId===sid&&msgCount===0&&inner) return;
if(sid!==_messageRenderWindowSid) _resetMessageRenderWindow(sid);
const renderWindowSize=_currentMessageRenderWindowSize();
let cachedRenderSignature=null;
@@ -7024,7 +7690,16 @@ function renderMessages(options){
}
}
- const compressionState=_compressionStateForCurrentSession();
+ const compressionState=(()=>{
+ let compressionState=_compressionStateForCurrentSession();
+ if(!S.busy && compressionState && compressionState.automatic){
+ window._compressionUi=null;
+ _clearCompressionElapsedTimer();
+ _setCompressionSessionLock(null);
+ compressionState=null;
+ }
+ return compressionState;
+ })();
if(window._compressionUi && !compressionState) clearCompressionUi();
const handoffState=_handoffStateForCurrentSession();
if(window._handoffUi && !handoffState) window._handoffUi=null;
@@ -7049,6 +7724,8 @@ function renderMessages(options){
const hasPartialTc=Array.isArray(m._partial_tool_calls)&&m._partial_tool_calls.length>0;
if(hasTc||hasTu||hasPartialTc||_messageHasReasoningPayload(m)) return true;
if(_assistantMessageHasVisibleContent(m)) return true;
+ const visibleText=_isAssistantEmptyPlaceholderContent(m,msgContent(m))?'':msgContent(m);
+ return m._statusCard||visibleText||m.attachments?.length;
}
return m._statusCard||msgContent(m)||m.attachments?.length;
});
@@ -7080,7 +7757,8 @@ function renderMessages(options){
const hasTc=Array.isArray(m.tool_calls)&&m.tool_calls.length>0;
const hasTu=Array.isArray(m.content)&&m.content.some(p=>p&&p.type==='tool_use');
const hasPartialTc=Array.isArray(m._partial_tool_calls)&&m._partial_tool_calls.length>0;
- if(msgContent(m)||m._statusCard||m.attachments?.length||(m.role==='assistant'&&(hasTc||hasTu||hasPartialTc||_messageHasReasoningPayload(m)||_assistantMessageHasVisibleContent(m)))) rebuilt.push({m,rawIdx:ri});
+ const visibleText=_isAssistantEmptyPlaceholderContent(m,msgContent(m))?'':msgContent(m);
+ if(visibleText||m._statusCard||m.attachments?.length||(m.role==='assistant'&&(hasTc||hasTu||hasPartialTc||_messageHasReasoningPayload(m)||_assistantMessageHasVisibleContent(m)))) rebuilt.push({m,rawIdx:ri});
ri++;
}
_visWithIdxCache=rebuilt;
@@ -7103,6 +7781,8 @@ function renderMessages(options){
const hiddenBeforeCount=windowStart;
const renderVisWithIdx=visWithIdx.slice(windowStart);
const firstRenderedRawIdx=renderVisWithIdx.length?renderVisWithIdx[0].rawIdx:Infinity;
+ const assistantTurnFinalVisibleContentByRawIdx=_assistantTurnFinalVisibleContentMap(visWithIdx);
+ const assistantTurnVisibleContentByRawIdx=_assistantTurnVisibleContentMap(visWithIdx);
const hasServerOlder=!!(typeof _messagesTruncated!=='undefined' && _messagesTruncated && S.messages.length>0);
const serverOlderCount=hasServerOlder&&Number.isFinite(Number(_oldestIdx))?Math.max(0,Number(_oldestIdx)):0;
if(typeof _applySessionNavigationPrefs==='function') _applySessionNavigationPrefs();
@@ -7232,21 +7912,17 @@ function renderMessages(options){
let content=m.content||'';
let thinkingText='';
if(Array.isArray(content)){
- thinkingText=content.filter(p=>p&&(p.type==='thinking'||p.type==='reasoning')).map(p=>p.thinking||p.reasoning||p.text||'').join('\n');
content=content.filter(p=>p&&p.type==='text').map(p=>p.text||p.content||'').join('\n');
}
- if(!thinkingText && (m.reasoning_content || m.reasoning)) thinkingText=m.reasoning_content || m.reasoning;
if(!thinkingText && typeof content==='string'){
const thinkMatch=content.match(/^\s*([\s\S]*?)<\/think>\s*/);
if(thinkMatch){
- thinkingText=thinkMatch[1].trim();
content=content.replace(/^\s*[\s\S]*?<\/think>\s*/,'').trimStart();
}
if(!thinkingText){
// Historical name "gemmaMatch" refers to MiniMax <|channel>thought format.
const gemmaMatch=content.match(/^\s*<\|channel\|?>thought\n?([\s\S]*?)\s*/);
if(gemmaMatch){
- thinkingText=gemmaMatch[1].trim();
content=content.replace(/^\s*<\|channel\|?>thought\n?[\s\S]*?\s*/,'').trimStart();
}
}
@@ -7254,7 +7930,6 @@ function renderMessages(options){
// Gemma 4 uses asymmetric <|turn|>thinking\n... delimiters.
const gemmaTurnMatch=content.match(/^\s*<\|turn\|>thinking\n([\s\S]*?)\s*/);
if(gemmaTurnMatch){
- thinkingText=gemmaTurnMatch[1].trim();
content=content.replace(/^\s*<\|turn\|>thinking\n[\s\S]*?\s*/,'').trimStart();
}
}
@@ -7264,15 +7939,13 @@ function renderMessages(options){
content='**Error:** No response received after context compression. Please retry.';
}
const displayContent=isUser?_stripAttachedFilesMarkerForDisplay(_stripWorkspaceDisplayPrefix(content)):content;
- if(thinkingText&&!isUser){
- thinkingText=_stripVisibleAssistantEchoFromThinking(thinkingText, displayContent);
- // #3709 (defect B): if this message's own visible body didn't strip the
- // echo (e.g. a thinking-only message whose answer prose lives on a sibling
- // message in the same turn), also strip against the turn's combined answer.
- const turnVisible=_turnVisibleTextByRawIdx.get(rawIdx);
- if(thinkingText&&turnVisible&&turnVisible!==displayContent){
- thinkingText=_stripVisibleAssistantEchoFromThinking(thinkingText, turnVisible);
- }
+ if(!isUser&&_isAssistantEmptyPlaceholderContent(m, displayContent)){
+ content='';
+ }
+ if(!isUser&&isSimplifiedToolCalling()&&!thinkingText){
+ const turnFinalVisibleContent=assistantTurnFinalVisibleContentByRawIdx.get(rawIdx)||'';
+ const turnVisibleContents=assistantTurnVisibleContentByRawIdx.get(rawIdx)||[];
+ thinkingText=_worklogReasoningTextFromMessage(m, rawIdx, toolCallAssistantIdxs, displayContent, turnFinalVisibleContent, turnVisibleContents);
}
const isLastAssistant=!isUser&&vi===renderVisWithIdx.length-1;
const nextRendered=renderVisWithIdx[vi+1];
@@ -7346,6 +8019,13 @@ function renderMessages(options){
seg.className='assistant-segment';
seg.dataset.msgIdx=rawIdx;
seg.dataset.rawText=String(content).trim();
+ if(m._activityBurstId!==undefined&&m._activityBurstId!==null) seg.setAttribute('data-activity-burst-id',String(m._activityBurstId));
+ if(Number.isFinite(Number(m._liveSegmentSeq))) seg.setAttribute('data-live-segment-seq',String(Number(m._liveSegmentSeq)));
+ const messageBelongsInWorklog=!S.busy&&isSimplifiedToolCalling()&&_assistantMessageBelongsInWorklog(m, rawIdx, toolCallAssistantIdxs, displayContent, {isTurnFinalAssistant});
+ if(messageBelongsInWorklog){
+ seg.classList.add('assistant-segment-worklog-source');
+ seg.setAttribute('aria-hidden','true');
+ }
if(m._live){
currentAssistantTurn.id='liveAssistantTurn';
// Stamp the session id on the live turn so finalizeThinkingCard()
@@ -7357,7 +8037,7 @@ function renderMessages(options){
}
if(_ERR_MSG_RE.test(String(content||'').trim())) seg.dataset.error='1';
if(thinkingText&&window._showThinking!==false){
- if(isSimplifiedToolCalling()) assistantThinking.set(rawIdx, thinkingText);
+ if(isSimplifiedToolCalling()&&_assistantThinkingBelongsInWorklog(m, rawIdx, toolCallAssistantIdxs)) assistantThinking.set(rawIdx, thinkingText);
else if(window._showThinking!==false) seg.insertAdjacentHTML('beforeend', _thinkingCardHtml(thinkingText));
}
const hasVisibleBody=!!(String(content||'').trim()||filesHtml||statusHtml);
@@ -7461,7 +8141,14 @@ function renderMessages(options){
// tracking, or runs that didn't go through the normal streaming path), build
// a display list from per-message tool_calls (OpenAI format) stored in each
// assistant message. This covers the reload case described in issue #140.
- if(!S.busy && (!S.toolCalls||!S.toolCalls.length)){
+ const hasMessageToolMetadata=!S.busy&&Array.isArray(S.messages)&&S.messages.some(m=>
+ m&&m.role==='assistant'&&(
+ (Array.isArray(m.tool_calls)&&m.tool_calls.length>0)||
+ (Array.isArray(m._partial_tool_calls)&&m._partial_tool_calls.length>0)||
+ (Array.isArray(m.content)&&m.content.some(p=>p&&typeof p==='object'&&p.type==='tool_use'))
+ )
+ );
+ if(!S.busy && (hasMessageToolMetadata||!S.toolCalls||!S.toolCalls.length)){
// Index tool outputs by tool_call_id / tool_use_id so the
// fallback-built cards carry their result snippet (not just the command).
// Without this step CLI-origin sessions reload with empty tool cards.
@@ -7489,13 +8176,39 @@ function renderMessages(options){
}
if(m.role==='assistant'){
const hasTopLevelToolCalls=Array.isArray(m.tool_calls)&&m.tool_calls.length>0;
- const hasContentToolUse=Array.isArray(m.content)&&m.content.some(p=>p&&typeof p==='object'&&p.type==='tool_use');
const hasPartialToolCalls=Array.isArray(m._partial_tool_calls)&&m._partial_tool_calls.length>0;
+ const hasContentToolUse=Array.isArray(m.content)&&m.content.some(p=>p&&typeof p==='object'&&p.type==='tool_use');
if(hasTopLevelToolCalls||hasContentToolUse||hasPartialToolCalls) fallbackToolSources.push({m,rawIdx});
}
});
const derived=[];
+ const liveToolMetadata=Array.isArray(S._settledLiveToolMetadata)
+ ? S._settledLiveToolMetadata
+ : (Array.isArray(S.toolCalls)?S.toolCalls:[]);
+ const liveMetadataByTid=new Map();
+ liveToolMetadata.forEach((tc,idx)=>{
+ if(!tc||typeof tc!=='object') return;
+ const tid=tc.tid||tc.id||tc.tool_call_id||tc.call_id||'';
+ if(tid&&!liveMetadataByTid.has(tid)) liveMetadataByTid.set(tid,{tc,idx});
+ });
+ const usedLiveToolMetadata=new Set();
+ const copyLiveToolMetadata=(next,name,tid)=>{
+ let matchEntry=tid?liveMetadataByTid.get(tid):null;
+ if(!matchEntry){
+ const matchIdx=liveToolMetadata.findIndex((tc,i)=>tc&&!usedLiveToolMetadata.has(i)&&(!name||tc.name===name));
+ if(matchIdx>=0) matchEntry={tc:liveToolMetadata[matchIdx],idx:matchIdx};
+ }
+ if(matchEntry){
+ usedLiveToolMetadata.add(matchEntry.idx);
+ const live=matchEntry.tc||{};
+ for(const key of ['activityBurstId','duration','started_at']){
+ if((next[key]===undefined||next[key]===null)&&live[key]!==undefined&&live[key]!==null) next[key]=live[key];
+ }
+ }
+ return next;
+ };
fallbackToolSources.forEach(({m,rawIdx})=>{
+ const assistantToolAnchorIdx=_assistantToolAnchorIdxForMessage(S.messages,rawIdx);
// OpenAI format: top-level tool_calls field on the assistant message
(m.tool_calls||[]).forEach(tc=>{
if(!tc||typeof tc!=='object') return;
@@ -7506,17 +8219,43 @@ function renderMessages(options){
const tid=tc.id||tc.call_id||'';
const patchSnippet=_cliPatchSnippetFromArgs(name,args);
const resultSnippet=resultsByTid[tid]||'';
- let argsSnap={};
- Object.keys(args).slice(0,4).forEach(k=>{ const v=String(args[k]); argsSnap[k]=v.slice(0,120)+(v.length>120?'...':''); });
- derived.push({
+ let argsSnap=_toolArgsSnapshot(args);
+ derived.push(copyLiveToolMetadata({
name,
snippet:_cliToolCardSnippet(resultSnippet,patchSnippet),
is_diff:_cliToolCardHasDiffSnippet(resultSnippet,patchSnippet),
tid,
- assistant_msg_idx:rawIdx,
+ assistant_msg_idx:assistantToolAnchorIdx,
args:argsSnap,
done:true,
- });
+ }, name, tid));
+ });
+ // WebUI partial/live format: _partial_tool_calls snapshots survive
+ // interrupted or adapter-shaped settles even when session.tool_calls is empty.
+ const partialToolCalls=Array.isArray(m._partial_tool_calls)?m._partial_tool_calls:[];
+ partialToolCalls.forEach(tc=>{
+ if(!tc||typeof tc!=='object') return;
+ const fn=tc.function||{};
+ const name=tc.name||fn.name||'tool';
+ let args=tc.args||tc.input||{};
+ if(!args||typeof args!=='object'){
+ try{ args=JSON.parse(fn.arguments||'{}'); }catch(e){ args={}; }
+ }else if(!Object.keys(args).length&&fn.arguments){
+ try{ args=JSON.parse(fn.arguments||'{}'); }catch(e){}
+ }
+ const tid=tc.tid||tc.id||tc.tool_call_id||tc.call_id||'';
+ const patchSnippet=_cliPatchSnippetFromArgs(name,args);
+ const resultSnippet=resultsByTid[tid]||tc.snippet||tc.preview||'';
+ const argsSnap=_toolArgsSnapshot(args);
+ derived.push(copyLiveToolMetadata({
+ name,
+ snippet:_cliToolCardSnippet(resultSnippet,patchSnippet),
+ is_diff:_cliToolCardHasDiffSnippet(resultSnippet,patchSnippet),
+ tid,
+ assistant_msg_idx:assistantToolAnchorIdx,
+ args:argsSnap,
+ done:true,
+ }, name, tid));
});
// Anthropic format: tool_use blocks inside assistant content array
if(Array.isArray(m.content)){
@@ -7527,19 +8266,16 @@ function renderMessages(options){
const tid=p.id||'';
const patchSnippet=_cliPatchSnippetFromArgs(name,args);
const resultSnippet=resultsByTid[tid]||'';
- const argsSnap={};
- if(args && typeof args==='object'){
- Object.keys(args).slice(0,4).forEach(k=>{ const v=String(args[k]); argsSnap[k]=v.slice(0,120)+(v.length>120?'...':''); });
- }
- derived.push({
+ const argsSnap=_toolArgsSnapshot(args);
+ derived.push(copyLiveToolMetadata({
name,
snippet:_cliToolCardSnippet(resultSnippet,patchSnippet),
is_diff:_cliToolCardHasDiffSnippet(resultSnippet,patchSnippet),
tid,
- assistant_msg_idx:rawIdx,
+ assistant_msg_idx:assistantToolAnchorIdx,
args:argsSnap,
done:true,
- });
+ }, name, tid));
});
}
// WebUI-internal partial tool calls captured on cancel/stop
@@ -7552,176 +8288,160 @@ function renderMessages(options){
const tid=tc.id||tc.call_id||tc.tool_call_id||tc.tid||'';
const patchSnippet=_cliPatchSnippetFromArgs(name,args);
const resultSnippet=_cliToolResultSnippet(tc.snippet||tc.result||tc.output||tc.preview||'');
- const argsSnap={};
- if(args && typeof args==='object'){
- Object.keys(args).slice(0,4).forEach(k=>{ const v=String(args[k]); argsSnap[k]=v.slice(0,120)+(v.length>120?'...':''); });
- }
- derived.push({
+ const argsSnap=_toolArgsSnapshot(args,4);
+ derived.push(copyLiveToolMetadata({
name,
snippet:_cliToolCardSnippet(resultSnippet,patchSnippet),
is_diff:_cliToolCardHasDiffSnippet(resultSnippet,patchSnippet),
tid,
- assistant_msg_idx:rawIdx,
+ assistant_msg_idx:assistantToolAnchorIdx,
args:argsSnap,
done:true,
- });
+ }, name, tid));
});
}
});
if(derived.length) S.toolCalls=derived;
+ if(S._settledLiveToolMetadata) S._settledLiveToolMetadata=null;
}
- // Render tool cards: allow during streaming when S.toolCalls is already
- // populated (e.g. from INFLIGHT restore or SSE events). Only the fallback
- // derivation above is blocked by S.busy — DOM insertion should proceed
- // whenever tool cards exist.
- if(!S.busy || (S.toolCalls&&S.toolCalls.length)){
- inner.querySelectorAll('.tool-call-group:not([data-compression-card]),.tool-card-row:not([data-compression-card]),.agent-activity-thinking:not([data-live-thinking="1"])').forEach(el=>el.remove());
- const byAssistant = {};
- for(const tc of (S.toolCalls||[])){
- const key = tc.assistant_msg_idx !== undefined ? tc.assistant_msg_idx : -1;
- if(!byAssistant[key]) byAssistant[key] = [];
- byAssistant[key].push(tc);
- }
+ if(!S.busy){
+ inner.querySelectorAll('.tool-worklog-group:not([data-compression-card]),.tool-call-group:not([data-compression-card]),.tool-card-row:not([data-compression-card]),.agent-activity-thinking:not([data-live-thinking="1"]),.wl-reason[data-worklog-reason-source="reasoning"]').forEach(el=>el.remove());
+ const byActivity = new Map();
const assistantIdxs=[...assistantSegments.keys()].sort((a,b)=>a-b);
- const anchorInsertAfter = new Map();
- if(isSimplifiedToolCalling()){
- // Shared anchor resolver: maps an activity index to the assistant segment
- // its Activity group will anchor on. The group-render path falls back to a
- // nearby earlier segment when an index has no directly-rendered segment
- // (legacy/rebased assistant_msg_idx). The inline-suppression precompute MUST
- // use the SAME resolution, or a fallback-anchored group's turn won't be in
- // turnsWithActivityGroup and the thinking renders twice (Codex re-gate).
- const _anchorRowForActivityIdx=(aIdx)=>{
- let row=assistantSegments.get(aIdx)||null;
- if(!row&&assistantIdxs.length){
- if(aIdxidx<=aIdx);
- row=fallbackIdx!==undefined?assistantSegments.get(fallbackIdx):assistantSegments.get(assistantIdxs[assistantIdxs.length-1]);
+ const _assistantAnchorForActivity=(aIdx,segmentSeq,burstId)=>{
+ if(segmentSeq){
+ for(const seg of assistantSegments.values()){
+ if(seg&&seg.getAttribute('data-live-segment-seq')===String(segmentSeq)) return seg;
}
- return row;
- };
- // #3709: a turn (one .assistant-turn, spanning every assistant segment
- // between two user messages) can contain BOTH a tool-bearing message and a
- // trailing thinking-only message. The tool message builds an Activity group
- // that already carries the turn's thinking at its top; if the thinking-only
- // sibling ALSO renders its thinking inline, the card shows twice. Precompute
- // the set of turn nodes that have any tool card so the inline branch can
- // skip turns that already own an Activity group.
- const turnsWithActivityGroup=new Set();
- // Per-turn merged thinking: a tool-bearing turn's Activity group must carry
- // ALL of that turn's thinking — including a trailing thinking-only sibling
- // whose inline card we suppress below (#3709 A1). Rendering only the tool
- // message's own assistantThinking entry would silently DROP a sibling's
- // distinct reasoning. Aggregate every assistantThinking entry by turn,
- // de-duped and in index order, and render that once in the group.
- const turnThinkingParts=new Map(); // turnNode -> [text,...]
- const _addTurnThinking=(idx)=>{
- if(!assistantThinking.has(idx)) return;
- const seg=_anchorRowForActivityIdx(idx);
- const turn=seg?seg.closest('.assistant-turn'):null;
- if(!turn) return;
- const txt=String(assistantThinking.get(idx)||'').trim();
- if(!txt) return;
- const arr=turnThinkingParts.get(turn)||[];
- if(!arr.includes(txt)) arr.push(txt);
- turnThinkingParts.set(turn, arr);
- };
- for(const tcIdx of Object.keys(byAssistant).map(k=>parseInt(k))){
- if(!(byAssistant[tcIdx]||[]).length) continue;
- const tcSeg=_anchorRowForActivityIdx(tcIdx);
- const tcTurn=tcSeg?tcSeg.closest('.assistant-turn'):null;
- if(tcTurn) turnsWithActivityGroup.add(tcTurn);
}
- // Aggregate thinking for every assistant idx that has it (tool-bearing or not).
- for(const tIdx of assistantThinking.keys()) _addTurnThinking(tIdx);
- const _renderedTurnThinking=new Set();
- const activityIdxs=[...new Set([...Object.keys(byAssistant).map(k=>parseInt(k)), ...assistantThinking.keys()])].sort((a,b)=>a-b);
- for(const aIdx of activityIdxs){
- const cards=byAssistant[aIdx]||[];
- if(!cards.length&&assistantThinking.has(aIdx)){
- // Thinking-only message. Render its thinking inline ONLY when the turn
- // has no Activity group at all (#3592 — a genuinely thinking-only turn
- // must not bury its thinking in a collapsed group). If a sibling
- // tool-message in the same turn built a group, that group carries the
- // turn's thinking (including THIS message's, via turnThinkingParts) —
- // don't emit a duplicate inline card here (#3709 A).
- const anchorRow=_anchorRowForActivityIdx(aIdx);
- const anchorTurn=anchorRow?anchorRow.closest('.assistant-turn'):null;
- if(anchorRow&&window._showThinking!==false&&!(anchorTurn&&turnsWithActivityGroup.has(anchorTurn))){
- // Insert the thinking card BEFORE the answer body + msg-foot footer
- // (the segment already has them appended), so it reads above the
- // answer rather than orphaned below the "Done in …" line (#3709 A2).
- const bodyEl=anchorRow.querySelector('.msg-body,.msg-foot');
- const cardHtml=_thinkingCardHtml(assistantThinking.get(aIdx));
- if(bodyEl) bodyEl.insertAdjacentHTML('beforebegin',cardHtml);
- else anchorRow.insertAdjacentHTML('beforeend',cardHtml);
- }
- continue;
- }
- let anchorRow=_anchorRowForActivityIdx(aIdx);
- if(!anchorRow) continue;
- const anchorParent=anchorRow.parentElement;
- let insertAfterNode = anchorInsertAfter.get(anchorRow) || anchorRow;
- const group=ensureActivityGroup(anchorParent,{collapsed:true,anchor:insertAfterNode,activityKey:`assistant:${aIdx}`});
- const sourceMsg=S.messages[aIdx]||{};
- if(sourceMsg._turnDuration!==undefined) group.setAttribute('data-turn-duration', String(sourceMsg._turnDuration));
- const body=group&&group.querySelector('.tool-call-group-body');
- if(!body) continue;
- // Render the TURN's merged thinking once (covers this message's own
- // thinking + any suppressed thinking-only sibling in the same turn, #3709).
- const groupTurn=anchorRow?anchorRow.closest('.assistant-turn'):null;
- const mergedThinking=(groupTurn&&turnThinkingParts.get(groupTurn)||[]).join('\n\n').trim();
- if(mergedThinking&&!(groupTurn&&_renderedTurnThinking.has(groupTurn))){
- body.appendChild(_thinkingActivityNode(mergedThinking, false));
- if(groupTurn) _renderedTurnThinking.add(groupTurn);
+ const wantedBurst=burstId!==undefined&&burstId!==null&&String(burstId)!==''&&String(burstId)!=='0'?String(burstId):'';
+ if(wantedBurst){
+ for(const seg of assistantSegments.values()){
+ if(seg&&seg.getAttribute('data-activity-burst-id')===wantedBurst) return seg;
}
- for(const tc of cards){
- body.appendChild(buildToolCard(tc));
- }
- _syncToolCallGroupSummary(group);
- if(anchorRow) anchorInsertAfter.set(anchorRow, group);
}
- }else if(S.toolCalls && S.toolCalls.length){
- for(const [key, cards] of Object.entries(byAssistant)){
- const aIdx = parseInt(key);
- let anchorRow=assistantSegments.get(aIdx)||null;
- if(!anchorRow&&assistantIdxs.length){
- if(aIdxidx<=aIdx);
- anchorRow=fallbackIdx!==undefined?assistantSegments.get(fallbackIdx):assistantSegments.get(assistantIdxs[assistantIdxs.length-1]);
- }
- if(!anchorRow) continue;
- const anchorParent=anchorRow.parentElement;
- const frag=document.createDocumentFragment();
- let lastInsertedNode=null;
- for(const tc of cards){
- const card=buildToolCard(tc);
- frag.appendChild(card);
- lastInsertedNode=card;
- }
- // Add expand/collapse toggle for groups with 2+ cards
- if(cards.length>=2){
- const toggle=document.createElement('div');
- toggle.className='tool-cards-toggle';
- // Collect card elements before they get moved to DOM
- const cardEls=Array.from(frag.querySelectorAll('.tool-card'));
- const expandBtn=document.createElement('button');
- expandBtn.textContent=t('expand_all');
- expandBtn.onclick=()=>cardEls.forEach(c=>c.classList.add('open'));
- const collapseBtn=document.createElement('button');
- collapseBtn.textContent=t('collapse_all');
- collapseBtn.onclick=()=>cardEls.forEach(c=>c.classList.remove('open'));
- toggle.appendChild(expandBtn);
- toggle.appendChild(collapseBtn);
- frag.insertBefore(toggle,frag.firstChild);
- }
- const insertAfterNode = anchorInsertAfter.get(anchorRow) || anchorRow;
- const refNode = insertAfterNode ? insertAfterNode.nextSibling : null;
- if(refNode) anchorParent.insertBefore(frag,refNode);
- else anchorParent.appendChild(frag);
- if(anchorRow&&lastInsertedNode) anchorInsertAfter.set(anchorRow, lastInsertedNode);
+ let anchorRow=assistantSegments.get(aIdx)||null;
+ if(!anchorRow&&assistantIdxs.length){
+ if(aIdxidx<=aIdx);
+ anchorRow=fallbackIdx!==undefined?assistantSegments.get(fallbackIdx):assistantSegments.get(assistantIdxs[assistantIdxs.length-1]);
+ }
+ return anchorRow;
+ };
+ const _turnDurationForAnchor=(anchorRow)=>{
+ if(!anchorRow) return undefined;
+ const turn=anchorRow.closest('.assistant-turn');
+ const blocks=_assistantTurnBlocks(turn);
+ if(!blocks) return undefined;
+ let duration;
+ for(const seg of blocks.querySelectorAll('.assistant-segment')){
+ const idx=Number(seg.dataset&&seg.dataset.msgIdx);
+ const msg=Number.isFinite(idx)?S.messages[idx]:null;
+ if(msg&&msg._turnDuration!==undefined) duration=msg._turnDuration;
+ }
+ return duration;
+ };
+ const durationAssignedTurns = new Set();
+ const activityByTurn = new Map();
+ const activityOrder = [];
+ const ensureActivityBucket=(key,aIdx,segmentSeq,burstId)=>{
+ if(!byActivity.has(key)){
+ const entry={key,aIdx,segmentSeq:segmentSeq||'',burstId:burstId||'',cards:[],thinkingIdx:null,includeAnchorReason:false};
+ byActivity.set(key,entry);
+ activityOrder.push(entry);
}
+ return byActivity.get(key);
+ };
+ const normalizeToken=(value)=>{
+ const hasValue=value!==undefined&&value!==null&&String(value)!==''&&String(value)!=='0';
+ return hasValue?String(value):'';
+ };
+ for(const tc of (S.toolCalls||[])){
+ if(!tc) continue;
+ const aIdx=tc.assistant_msg_idx!==undefined?parseInt(tc.assistant_msg_idx):-1;
+ const segmentSeq=normalizeToken(tc.activitySegmentSeq);
+ const burstId=normalizeToken(tc.activityBurstId);
+ const key=segmentSeq?`segment:${segmentSeq}`:(burstId?`burst:${burstId}`:`assistant:${aIdx}`);
+ const entry=ensureActivityBucket(key,aIdx,segmentSeq,burstId);
+ entry.cards.push(tc);
+ entry.includeAnchorReason=true;
+ }
+ for(const aIdx of assistantThinking.keys()){
+ const seg=assistantSegments.get(aIdx);
+ const segmentSeq=seg&&seg.getAttribute('data-live-segment-seq')||'';
+ const burstId=seg&&seg.getAttribute('data-activity-burst-id')||'';
+ const key=segmentSeq?`segment:${segmentSeq}`:(burstId?`burst:${burstId}`:`assistant:${aIdx}`);
+ const entry=ensureActivityBucket(key,aIdx,segmentSeq,burstId);
+ if(entry.thinkingIdx===null) entry.thinkingIdx=aIdx;
+ }
+ for(const [aIdx,seg] of assistantSegments){
+ if(!seg||!seg.classList||!seg.classList.contains('assistant-segment-worklog-source')) continue;
+ if(!_worklogReasonHtmlFromAnchor(seg)) continue;
+ const segmentSeq=seg&&seg.getAttribute('data-live-segment-seq')||'';
+ const burstId=seg&&seg.getAttribute('data-activity-burst-id')||'';
+ const key=segmentSeq?`segment:${segmentSeq}`:(burstId?`burst:${burstId}`:`assistant:${aIdx}`);
+ const entry=ensureActivityBucket(key,aIdx,segmentSeq,burstId);
+ entry.includeAnchorReason=true;
+ }
+ activityOrder.sort((a,b)=>{
+ const anchorA=_assistantAnchorForActivity(a.aIdx,a.segmentSeq,a.burstId);
+ const anchorB=_assistantAnchorForActivity(b.aIdx,b.segmentSeq,b.burstId);
+ const idxA=(anchorA&&anchorA.parentElement)?Array.prototype.indexOf.call(anchorA.parentElement.children,anchorA):Number.MAX_SAFE_INTEGER;
+ const idxB=(anchorB&&anchorB.parentElement)?Array.prototype.indexOf.call(anchorB.parentElement.children,anchorB):Number.MAX_SAFE_INTEGER;
+ if(idxA!==idxB) return idxA-idxB;
+ const seqA=a.segmentSeq!==''?Number(a.segmentSeq):Number.MAX_SAFE_INTEGER;
+ const seqB=b.segmentSeq!==''?Number(b.segmentSeq):Number.MAX_SAFE_INTEGER;
+ if(Number.isFinite(seqA)&&Number.isFinite(seqB)&&seqA!==seqB) return seqA-seqB;
+ const burstA=a.burstId!==''?Number(a.burstId):Number.MAX_SAFE_INTEGER;
+ const burstB=b.burstId!==''?Number(b.burstId):Number.MAX_SAFE_INTEGER;
+ if(Number.isFinite(burstA)&&Number.isFinite(burstB)&&burstA!==burstB) return burstA-burstB;
+ return a.aIdx-b.aIdx;
+ });
+ for(const entry of activityOrder){
+ const {aIdx,segmentSeq,burstId,cards,thinkingIdx,includeAnchorReason}=entry;
+ if(aIdx{
+ _syncToolCallGroupSummary(state.group);
+ });
}
// Render per-turn duration and optional token usage on assistant messages.
// Duration stays visible even when token usage is disabled, because it answers
@@ -7738,14 +8458,11 @@ function renderMessages(options){
const failoverText=_gatewayRoutingFailoverText(routing);
const modelWarningText=_gatewayModelWarningText(routing);
const hasTurnUsage=!!msg._turnUsage;
- // The activity-group summary owns the "Done in …" duration ONLY when a
- // group is actually created. A tool-call turn always builds one. A
- // thinking-only turn under Simplified Tool Calling now renders thinking
- // inline (no group — see the `continue` at the activityIdxs loop, #3592),
- // so it must keep its footer duration; suppressing it there would silently
- // drop "Done in …" for thinking-only turns (#3592 review).
- const compactActivityForMessage=isSimplifiedToolCalling()&&toolCallAssistantIdxs.has(mi);
- const durationText=compactActivityForMessage?'':_formatTurnDuration(msg._turnDuration);
+ // The Worklog summary owns the "Done in …" duration whenever this
+ // assistant message contributes tool or thinking detail to a folded
+ // Worklog above the final answer.
+ const compactWorklogForMessage=isSimplifiedToolCalling()&&(toolCallAssistantIdxs.has(mi)||assistantThinking.has(mi));
+ const durationText=compactWorklogForMessage?'':_formatTurnDuration(msg._turnDuration);
if(!hasTurnUsage&&!durationText&&!gatewayText&&!failoverText&&!modelWarningText) continue;
const seg=assistantSegments.get(mi);
const row=seg?seg.closest('.assistant-turn'):null;
@@ -7799,6 +8516,7 @@ function renderMessages(options){
// Only force-scroll when not actively streaming — mid-stream re-renders
// (tool completion, session switch) must not override the user's scroll position.
// scrollIfPinned() respects _scrollPinned, so it's a no-op if user scrolled up.
+ if(typeof _syncLiveRunStatusAfterRender==='function') _syncLiveRunStatusAfterRender();
_scrollAfterMessageRender(preserveScroll, scrollSnapshot);
// Apply syntax highlighting after DOM is built
requestAnimationFrame(()=>postProcessRenderedMessages(inner));
@@ -7849,6 +8567,239 @@ function _isSkillUpdate(tc){
if(!tc||tc.name!=='skill_manage'||tc.done===false||tc.is_error) return false;
return _SKILL_UPDATE_ACTIONS.has(_tcAction(tc));
}
+// ── Tool action label helpers ──────────────────────────────────────────────
+function _decodeToolLabelEntities(value){
+ return String(value||'')
+ .replace(/"/g,'"')
+ .replace(/'|'/g,"'")
+ .replace(/</g,'<')
+ .replace(/>/g,'>')
+ .replace(/&/g,'&');
+}
+function _redactToolTargetLabel(value){
+ return String(value||'')
+ .replace(/\bsshpass\s+-p\s+(?:"[^"]*"|'[^']*'|\S+)/gi,'sshpass -p "[redacted]"')
+ .replace(/(--password(?:=|\s+))(?:"[^"]*"|'[^']*'|\S+)/gi,'$1[redacted]')
+ .replace(/(password(?:=|\s+))(?:"[^"]*"|'[^']*'|\S+)/gi,'$1[redacted]');
+}
+function _shortToolLabel(value, limit){
+ const text=String(value||'').replace(/\s+/g,' ').trim();
+ const max=limit||112;
+ if(text.length<=max) return text;
+ const head=Math.max(24, Math.floor(max*.68));
+ const tail=Math.max(12, max-head-3);
+ return text.slice(0,head).trimEnd()+'...'+text.slice(-tail).trimStart();
+}
+function _toolActionKind(tc){
+ const n=String(tc&&tc.name||'').toLowerCase().replace(/[^a-z0-9]+/g,'_');
+ if(!n) return 'unknown';
+ if(n==='subagent_progress'||n==='delegate_task') return 'delegate';
+ if(n.includes('terminal')||n.includes('shell')||n.includes('command')||n.includes('process')||n==='execute_code') return 'shell';
+ if(n.includes('read')||n.includes('view')||n.includes('open')||n==='vision_analyze') return 'read';
+ if(n.includes('list')||n==='todo') return 'list';
+ if(n.includes('web')||n.includes('fetch')||n.includes('curl')||n.includes('extract')||n.includes('browse')||n.includes('navigate')) return 'web';
+ if(n.includes('search')||n.includes('grep')||n.includes('find')) return 'search';
+ if(n.includes('write')||n.includes('patch')||n.includes('edit')) return 'write';
+ return 'unknown';
+}
+function _toolTargetLabel(tc){
+ const a=tc&&tc.args||{};
+ const raw=a.cmd||a.command||a.path||a.file||a.uri||a.url||a.query||a.pattern||a.dir||a.task||tc.preview||'';
+ return _redactToolTargetLabel(_decodeToolLabelEntities(String(raw).split('\n')[0].trim()));
+}
+function _toolVisibleTargetLabel(tc, opts){
+ opts=opts||{};
+ const target=_toolTargetLabel(tc);
+ if(!target) return '';
+ return _shortToolLabel(target, opts.limit||112);
+}
+function _toolCommandTitle(command){
+ const normalized=String(command||'').replace(/\s+/g,' ').trim();
+ if(!normalized) return '';
+ if(/^git\s+fetch\b/i.test(normalized)) return 'git fetch';
+ if(/^git\s+(?:status|rev-list|branch)\b/i.test(normalized)) return 'git ahead/behind';
+ if(/^git\s+log\b/i.test(normalized)) return 'git log';
+ if(/\bcurl\b/i.test(normalized)&&/\/health\b/i.test(normalized)) return 'health check';
+ if(/\b(?:ps|pgrep)\b/i.test(normalized)) return 'process check';
+ const m=normalized.match(/\blsof\b.*(?:-i|:)(\d{2,5})\b/i);
+ if(m) return `port ${m[1]} check`;
+ if(/\blaunchctl\b/i.test(normalized)) return 'launchctl';
+ return _shortToolLabel(normalized,72);
+}
+function _toolQueryTitle(query){
+ const normalized=String(query||'').replace(/\s+/g,' ').trim();
+ return _shortToolLabel(normalized,72);
+}
+function _toolActionLabelText(tc, opts){
+ opts=opts||{};
+ const kind=_toolActionKind(tc);
+ const done=tc&&tc.done!==false;
+ const isErr=tc&&tc.is_error;
+ let target=opts.generic?'':_toolVisibleTargetLabel(tc, opts);
+ if(kind==='shell'&&target) target=_toolCommandTitle(target);
+ else if((kind==='search'||kind==='web')&&target) target=_toolQueryTitle(target);
+ const verbs={
+ shell: {ing:'Running', ed:'Ran'},
+ read: {ing:'Reading', ed:'Read'},
+ list: {ing:'Listing', ed:'Listed'},
+ search: {ing:'Searching for',ed:'Searched for'},
+ web: {ing:'Checking', ed:'Checked'},
+ write: {ing:'Updating', ed:'Updated'},
+ delegate:{ing:'Delegating',ed:'Delegated'},
+ unknown: {ing:'Running', ed:'Ran'},
+ };
+ const v=verbs[kind]||verbs.unknown;
+ const display=_toolDisplayName(tc);
+ if(isErr){
+ return target?`Failed ${v.ing.toLowerCase()} ${target}`:`Failed ${v.ing.toLowerCase()} ${display}`;
+ }
+ if(done) return target?`${v.ed} ${target}`:`${v.ed} ${display}`;
+ return target?`${v.ing} ${target}`:`${v.ing} ${display}`;
+}
+function _toolActionLabel(tc){
+ return esc(_toolActionLabelText(tc,{limit:112}));
+}
+const _toolWorklogSummaries={
+ shell:{running:'Running a command',runningMany:'Running {n} commands',done:'Ran a command',doneMany:'Ran {n} commands'},
+ read:{running:'Reading a file',runningMany:'Read {n} files',done:'Read a file',doneMany:'Read {n} files'},
+ list:{running:'Listing files',runningMany:'Listed {n} items',done:'Listed files',doneMany:'Listed {n} files'},
+ search:{running:'Searching workspace',runningMany:'Searching workspace {n} times',done:'Searched workspace',doneMany:'Searched workspace {n} times'},
+ web:{running:'Checking web',runningMany:'Checked web {n} times',done:'Checked the web',doneMany:'Checked the web {n} times'},
+ write:{running:'Updating a file',runningMany:'Updated {n} files',done:'Wrote a file',doneMany:'Wrote {n} files'},
+ delegate:{running:'Delegating a task',runningMany:'Delegated {n} tasks',done:'Delegated a task',doneMany:'Delegated {n} tasks'},
+ unknown:{running:'Running a tool',runningMany:'Running {n} tools',done:'Ran a tool',doneMany:'Ran {n} tools'},
+};
+function _toolWorklogActionParts(tc){
+ if(tc&&tc.nodeType===1){
+ const row=tc.classList&&tc.classList.contains('tool-card-row')?tc:tc.closest&&tc.closest('.tool-card-row');
+ const card=tc.classList&&tc.classList.contains('tool-card')?tc:(row&&row.querySelector('.tool-card'));
+ const actionLabel=(row&&row.dataset.toolActionLabel)||(card&&card.querySelector('.tool-card-name')&&card.querySelector('.tool-card-name').textContent.trim())||'';
+ const kind=(row&&row.dataset.toolKind)||'unknown';
+ const isDone=!((row&&row.dataset.toolDone)==='false'||(card&&card.classList.contains('tool-card-running')));
+ const isErr=(row&&row.dataset.toolError)==='true'||(card&&card.classList.contains('tool-card-error'));
+ return {kind,isDone,isErr,target:'',summary:_toolWorklogSummaries[kind]||_toolWorklogSummaries.unknown,actionLabel};
+ }
+ const kind=_toolActionKind(tc);
+ return {
+ kind,
+ isDone:tc&&tc.done!==false,
+ isErr:tc&&tc.is_error,
+ target:_toolTargetLabel(tc),
+ summary:_toolWorklogSummaries[kind]||_toolWorklogSummaries.unknown,
+ actionLabel:_toolActionLabelText(tc),
+ };
+}
+function _toolWorklogSummary(toolCalls, opts){
+ const cards=Array.from(toolCalls||[]).filter(tc=>tc);
+ if(!cards.length) return (opts&&opts.live)?'Running':'Worklog';
+ if(cards.length===1){
+ const part=_toolWorklogActionParts(cards[0]);
+ const fmt=part.summary||_toolWorklogSummaries.unknown;
+ const line=part.isDone?fmt.done:fmt.running;
+ return part.isErr?`${line}, 1 failed`:line;
+ }
+ const order=['shell','read','write','search','web','list','delegate','unknown'];
+ const runningCounts={}, doneCounts={};
+ let failed=0;
+ for(const tc of cards){
+ const part=_toolWorklogActionParts(tc);
+ const counts=part.isDone?doneCounts:runningCounts;
+ counts[part.kind]=(counts[part.kind]||0)+1;
+ if(part.isErr) failed+=1;
+ }
+ const emit=(counts,state)=>{
+ const out=[];
+ for(const kind of order){
+ const n=counts[kind]||0;
+ if(!n) continue;
+ const fmt=_toolWorklogSummaries[kind]||_toolWorklogSummaries.unknown;
+ if(n===1) out.push(state==='done'?fmt.done:fmt.running);
+ else out.push((state==='done'?fmt.doneMany:fmt.runningMany).replace('{n}',String(n)));
+ }
+ return out;
+ };
+ const lines=[...emit(runningCounts,'running'),...emit(doneCounts,'done')];
+ if(failed) lines.push(`${failed} failed`);
+ return lines.length?lines.map((line,idx)=>idx===0?line:line.charAt(0).toLowerCase()+line.slice(1)).join(', '):_toolActionLabel(cards[0]);
+}
+function _toolWorklogListEl(group){
+ if(!group) return null;
+ return group.querySelector('.tool-worklog-list') || group.querySelector('.activity-body') || group.querySelector('.tool-call-group-body');
+}
+function _toolWorklogToolsEl(group){
+ const list=_toolWorklogListEl(group);
+ if(!list) return null;
+ let tools=list.querySelector(':scope > .wl-step-tools[data-worklog-tools="1"]');
+ if(!tools){
+ tools=document.createElement('div');
+ tools.className='wl-step-tools tool-worklog-tools';
+ tools.setAttribute('data-worklog-tools','1');
+ list.appendChild(tools);
+ }
+ return tools;
+}
+function _liveToolStepEl(group){
+ const list=_toolWorklogListEl(group);
+ if(!list) return null;
+ const last=list.lastElementChild;
+ if(last&&last.classList&&last.classList.contains('wl-step-tools')&&last.getAttribute('data-worklog-tools')==='1') return last;
+ const tools=document.createElement('div');
+ tools.className='wl-step-tools tool-worklog-tools';
+ tools.setAttribute('data-worklog-tools','1');
+ list.appendChild(tools);
+ return tools;
+}
+function _directWorklogToolRows(list){
+ if(!list) return [];
+ const rows=[];
+ Array.from(list.children).forEach(child=>{
+ if(child.classList&&child.classList.contains('tool-card-row')) rows.push(child);
+ else if(child.classList&&(child.classList.contains('tool-worklog-tool-group')||child.classList.contains('tool-group'))) rows.push(...Array.from(child.querySelectorAll('.tool-card-row')));
+ });
+ return rows;
+}
+function _unwrapNestedToolGroups(tools){
+ if(!tools) return;
+ tools.querySelectorAll(':scope > .tool-worklog-tool-group,:scope > .tool-group').forEach(el=>el.remove());
+}
+function _syncToolRowsContainer(tools, isLiveWorklog){
+ if(!tools) return;
+ const rows=_directWorklogToolRows(tools);
+ _unwrapNestedToolGroups(tools);
+ rows.forEach(row=>{ if(row.parentElement) row.remove(); });
+ tools.querySelectorAll(':scope > .tool-card-row').forEach(row=>row.remove());
+ const shouldGroup=tools.classList.contains('wl-step-tools') && rows.length>1;
+ if(!shouldGroup){
+ rows.forEach(row=>tools.appendChild(row));
+ return;
+ }
+ const hasRunning=rows.some(row=>row&&row.dataset&&row.dataset.toolDone==='false');
+ const shouldOpen=false;
+ const group=document.createElement('div');
+ group.className='tool-group'+(shouldOpen?' open':' tool-worklog-tool-group-collapsed');
+ group.setAttribute('data-tool-worklog-tool-group','1');
+ const summary=hasRunning?'Running':_toolWorklogSummary(rows,{live:isLiveWorklog, toolCount:rows.length});
+ group.innerHTML=`
`;
+ const body=group.querySelector('.tg-rows');
+ rows.forEach(row=>body.appendChild(row));
+ tools.appendChild(group);
+}
+function _syncToolWorklogToolGroup(group){
+ const list=_toolWorklogListEl(group);
+ if(!list) return;
+ const isLiveWorklog=!!(group.getAttribute('data-live-tool-worklog-group')==='1' || group.getAttribute('data-live-tool-call-group')==='1');
+ const steps=Array.from(list.querySelectorAll(':scope > .wl-step-tools[data-worklog-tools="1"]'));
+ if(!steps.length){
+ const pendingRows=_directWorklogToolRows(list);
+ if(!pendingRows.length) return;
+ const tools=_toolWorklogToolsEl(group);
+ if(!tools) return;
+ pendingRows.forEach(row=>tools.appendChild(row));
+ _syncToolRowsContainer(tools,isLiveWorklog);
+ return;
+ }
+ steps.forEach(tools=>_syncToolRowsContainer(tools,isLiveWorklog));
+}
function toolIcon(name){
const icons={
terminal: li('terminal'),
@@ -7933,6 +8884,11 @@ function _toolCardPreviewText(tc, displaySnippet){
function buildToolCard(tc){
const row=document.createElement('div');
row.className='tool-card-row';
+ if(!row.dataset) row.dataset={};
+ row.dataset.toolKind=typeof _toolActionKind==='function'?_toolActionKind(tc):'unknown';
+ row.dataset.toolDone=String(tc&&tc.done!==false);
+ row.dataset.toolError=String(!!(tc&&tc.is_error));
+ row.dataset.toolActionLabel=typeof _toolActionLabelText==='function'?_toolActionLabelText(tc):_toolDisplayName(tc);
const icon=toolIcon(tc.name);
const hasDetail=(tc.snippet&&tc.snippet!==tc.preview)||(tc.args&&Object.keys(tc.args).length>0);
let displaySnippet='';
@@ -8028,34 +8984,54 @@ function _toggleToolDiff(btn){
function _syncToolCallGroupSummary(group){
if(!group) return;
- const cards=Array.from(group.querySelectorAll('.tool-card-row .tool-card'));
+ if(group.getAttribute('data-tool-worklog-group')==='1') _syncToolWorklogToolGroup(group);
+ const cards=Array.from((_toolWorklogListEl(group)||group).querySelectorAll('.tool-card-row .tool-card,.tool-card-row.tl'));
const toolCount=cards.length;
- const label=group.querySelector('.tool-call-group-label');
+ const label=group.querySelector('.tool-worklog-label') || group.querySelector('.tool-call-group-label');
+ const isWorklogGroup=!!(group.getAttribute('data-tool-worklog-group')==='1');
+ const isLiveWorklog=!!(group.getAttribute('data-live-tool-worklog-group')==='1' || group.getAttribute('data-live-tool-call-group')==='1');
+ const hasRunningTool=cards.some(card=>card.classList.contains('tool-card-running'));
+ if(isWorklogGroup){
+ if(hasRunningTool) group.setAttribute('data-tool-worklog-running','1');
+ else group.removeAttribute('data-tool-worklog-running');
+ }
const durationEl=group.querySelector('.tool-call-group-duration');
if(label){
- const rows=Array.from(group.querySelectorAll('.tool-card-row'));
- // Prefer the live _tcData classification; fall back to the durable data-*
- // flags for rows restored from an HTML snapshot (which drops JS properties).
- const isMem=r=>_isMemorySave(r._tcData)||r.getAttribute('data-memory-save')==='1';
- const isSkill=r=>_isSkillUpdate(r._tcData)||r.getAttribute('data-skill-update')==='1';
- const memCount=rows.filter(isMem).length;
- const skillCount=rows.filter(r=>!isMem(r)&&isSkill(r)).length;
- const otherCount=Math.max(0, toolCount-memCount-skillCount);
- let suffix='';
- if(memCount) suffix+=`, ${memCount} ${memCount===1?'memory':'memories'} saved`;
- if(skillCount) suffix+=`, ${skillCount} ${skillCount===1?'skill':'skills'} updated`;
- const toolsPart=otherCount?`${otherCount} tool${otherCount===1?'':'s'}`:'';
- if(group.getAttribute('data-live-tool-call-group')==='1'){
- if(toolsPart) label.textContent=`Activity: ${toolsPart}${suffix}`;
- else if(suffix) label.textContent=`Activity: ${suffix.slice(2)}`;
- else label.textContent='Activity · Running';
- }else if(toolsPart||suffix){
- label.textContent=toolsPart?`Activity: ${toolsPart}${suffix}`:`Activity: ${suffix.slice(2)}`;
- }else label.textContent='Activity';
+ if(group.getAttribute('data-run-activity-group')==='1'){
+ label.textContent=toolCount?_toolWorklogSummary(cards,{live:isLiveWorklog, toolCount}):'Running';
+ }else if(isWorklogGroup){
+ label.textContent=_toolWorklogSummary(cards,{live:isLiveWorklog, toolCount, labelOnly:!toolCount&&isLiveWorklog});
+ if(!label.textContent) label.textContent=isLiveWorklog?'Running':'Worklog';
+ }else{
+ const rows=Array.from(group.querySelectorAll('.tool-card-row'));
+ // Prefer the live _tcData classification; fall back to the durable data-*
+ // flags for rows restored from an HTML snapshot (which drops JS properties).
+ const isMem=r=>_isMemorySave(r._tcData)||r.getAttribute('data-memory-save')==='1';
+ const isSkill=r=>_isSkillUpdate(r._tcData)||r.getAttribute('data-skill-update')==='1';
+ const memCount=rows.filter(isMem).length;
+ const skillCount=rows.filter(r=>!isMem(r)&&isSkill(r)).length;
+ const otherCount=Math.max(0, toolCount-memCount-skillCount);
+ let suffix='';
+ if(memCount) suffix+=`, ${memCount} ${memCount===1?'memory':'memories'} saved`;
+ if(skillCount) suffix+=`, ${skillCount} ${skillCount===1?'skill':'skills'} updated`;
+ const toolsPart=otherCount?`${otherCount} tool${otherCount===1?'':'s'}`:'';
+ if(group.getAttribute('data-live-tool-call-group')==='1'){
+ if(toolsPart) label.textContent=`Activity: ${toolsPart}${suffix}`;
+ else if(suffix) label.textContent=`Activity: ${suffix.slice(2)}`;
+ else label.textContent='Running';
+ }else if(toolsPart||suffix){
+ label.textContent=toolsPart?`Activity: ${toolsPart}${suffix}`:`Activity: ${suffix.slice(2)}`;
+ }else label.textContent='Activity';
+ }
label.setAttribute('data-sweep-label', label.textContent);
}
if(durationEl){
- if(group.getAttribute('data-live-tool-call-group')==='1'){
+ if(group.getAttribute('data-run-activity-group')==='1'){
+ const durationText=_formatTurnDuration(group.dataset.turnDuration);
+ const label=durationText?'':_activityElapsedLabel(group);
+ durationEl.textContent=durationText?` Done in ${durationText}`:(label?` Working for ${label}`:'');
+ durationEl.style.display=durationEl.textContent?'':'none';
+ }else if(group.getAttribute('data-live-tool-call-group')==='1'){
const activeText=_activityElapsedLabel(group);
const progressText=_activityLiveProgressLabel(group);
if(activeText) group.setAttribute('data-active-turn-elapsed',activeText);
@@ -8064,7 +9040,7 @@ function _syncToolCallGroupSummary(group){
durationEl.style.display=durationEl.textContent?'':'none';
}else{
const durationText=_formatTurnDuration(group.dataset.turnDuration);
- durationEl.textContent=durationText?`Done in ${durationText}`:'';
+ durationEl.textContent=durationText?` Done in ${durationText}`:'';
durationEl.style.display=durationText?'':'none';
}
}
@@ -8124,6 +9100,9 @@ function appendLiveToolCard(tc){
// Guard: ignore if session was switched. Prevents stale tool events from
// a previous session's SSE stream from manipulating the new session's DOM.
if(!S.session||!S.activeStreamId) return;
+ const opts=arguments[1]||{};
+ if(opts.sessionId&&S.session.session_id!==opts.sessionId) return;
+ if(opts.streamId&&S.activeStreamId!==opts.streamId) return;
let turn=$('liveAssistantTurn');
if(!turn){
turn=_createAssistantTurn();
@@ -8134,88 +9113,93 @@ function appendLiveToolCard(tc){
const inner=_assistantTurnBlocks(turn);
if(!inner) return;
const tid=tc.tid||'';
- if(!isSimplifiedToolCalling()){
- // Update existing card in place (tool_complete after tool_start)
- if(tid){
- const existing=inner.querySelector(`.tool-card-row[data-live-tid="${CSS.escape(tid)}"]`);
- if(existing){
- const replacement=buildToolCard(tc);
- replacement.dataset.liveTid=tid;
- existing.replaceWith(replacement);
- // Keep #toolRunningRow alive — dots stay until text starts streaming
- // or the next tool fires (which replaces them). Removing here caused
- // a gap between tool completion and the first text token arriving.
- return;
- }
- }
- const row=buildToolCard(tc);
- if(tid) row.dataset.liveTid=tid;
- // Insert after whichever comes last: the current live assistant segment or
- // the last tool card. This handles both cases:
- // text → tool1 → tool2 (no text between tools: anchor is card1)
- // text1 → tool1 → text2 → tool2 (text between tools: anchor is text2)
- const children=Array.from(inner.children);
- // Include .thinking-card-row so tool cards land AFTER a finalized thinking
- // card, not between the text segment and thinking.
- const anchor=children.filter(el=>el.matches('[data-live-assistant="1"],.tool-card-row,.thinking-card-row')).pop();
- if(anchor) anchor.insertAdjacentElement('afterend', row);
- else inner.appendChild(row);
- // Add a 3-dot waiting indicator below the tool card so there's visual
- // feedback while the tool is running. Removed when text starts streaming
- // (ensureAssistantRow) or when tool_complete fires.
- const oldWait=$('toolRunningRow');if(oldWait)oldWait.remove();
- const waitRow=document.createElement('div');
- waitRow.id='toolRunningRow';
- waitRow.className='assistant-segment';
- waitRow.innerHTML='
';
- row.insertAdjacentElement('afterend', waitRow);
- if(typeof scrollIfPinned==='function') scrollIfPinned();
- return;
- }
const children=Array.from(inner.children);
- const anchor=children.filter(el=>el.matches('[data-live-assistant="1"],.tool-call-group,.tool-card-row,.agent-activity-thinking')).pop();
- const group=ensureActivityGroup(inner,{live:true,collapsed:true,anchor,activityKey:_activityKeyForLiveTurn()});
- const body=group.querySelector('.tool-call-group-body');
- const toolName=_toolDisplayName(tc);
- const toolEventId=tid?`tool-${tid}`:`tool-${String(tc.name||'tool').replace(/[^a-z0-9_-]/gi,'_')}`;
- const toolDone=tc.done!==false;
- _appendActivityEvent(group,{
- id:toolEventId,
- kind:'tool',
- label:toolDone?`Tool finished: ${toolName}`:`Running tool: ${toolName}`,
- detail:tc.preview||tc.snippet||'',
- status:toolDone?(tc.is_error?'error':'done'):'waiting',
- ts:_activityNowSeconds(),
+ const burstId=tc.activityBurstId!==undefined&&tc.activityBurstId!==null&&String(tc.activityBurstId)!=='0'?String(tc.activityBurstId):'';
+ const segmentSeq=tc.activitySegmentSeq!==undefined&&tc.activitySegmentSeq!==null&&String(tc.activitySegmentSeq)!=='0'?String(tc.activitySegmentSeq):'';
+ const segmentAnchor=segmentSeq?_findLiveAssistantAnchorForSegment(inner, segmentSeq):null;
+ const burstAnchor=burstId?_findLatestVisibleLiveAssistantByBurst(inner, burstId):null;
+ const anchor=segmentAnchor||burstAnchor||_findLatestVisibleLiveAssistant(inner)||children.filter(el=>el.matches('[data-live-assistant="1"]')).pop();
+ const effectiveSegmentSeq=anchor&&anchor.getAttribute?anchor.getAttribute('data-live-segment-seq')||segmentSeq:segmentSeq;
+ if(anchor) _removeEmptyLiveWorklogShells(inner);
+ const group=ensureLiveWorklogContainer(inner,{
+ anchor,
+ activityKey:_activityKeyForLiveTurn(),
+ segmentSeq:effectiveSegmentSeq,
+ burstId,
});
- const waiting=body.querySelector('.agent-activity-status[data-activity-event-id="thinking-placeholder"]');
- if(waiting&&!toolDone){
- const labelEl=waiting.querySelector('.agent-activity-status-label');
- const detailEl=waiting.querySelector('.agent-activity-status-detail');
- if(labelEl) labelEl.textContent='Waiting on tool result';
- if(detailEl) detailEl.textContent=`${_activityProgressLabelForToolName(toolName)}: ${toolName}. Results will appear here.`;
- }
- // Update existing card in place (tool_complete after tool_start)
+ const list=_liveToolStepEl(group);
+ if(!list) return;
+ // toolComplete can replace the existing live card with the same tid.
if(tid){
- const existing=body.querySelector(`.tool-card-row[data-live-tid="${CSS.escape(tid)}"]`);
+ const existing=group.querySelector(`.tool-card-row[data-live-tid="${CSS.escape(tid)}"]`);
if(existing){
const replacement=buildToolCard(tc);
replacement.dataset.liveTid=tid;
existing.replaceWith(replacement);
_syncToolCallGroupSummary(group);
+ _moveLiveRunStatusToTurnEnd();
+ if(typeof scrollIfPinned==='function') scrollIfPinned();
return;
}
}
+ const worklog=_toolWorklogListEl(group) || list;
+ const waiting=worklog.querySelector('.agent-activity-status[data-activity-event-id="thinking-placeholder"] .agent-activity-status-label');
+ if(waiting&&tc.done===false) waiting.textContent='Waiting on tool result';
const row=buildToolCard(tc);
if(tid) row.dataset.liveTid=tid;
- body.appendChild(row);
+ list.appendChild(row);
_syncToolCallGroupSummary(group);
+ _moveLiveRunStatusToTurnEnd();
if(typeof scrollIfPinned==='function') scrollIfPinned();
}
+function _findLatestLiveAssistantByBurst(inner, burstId){
+ if(!inner || !burstId) return null;
+ const candidates=Array.from(inner.querySelectorAll(`[data-live-assistant="1"][data-activity-burst-id="${CSS.escape(String(burstId))}"]`))
+ .filter(el=>el.isConnected!==false);
+ return candidates[candidates.length-1] || null;
+}
+function _findLatestLiveAssistantBySegment(inner, segmentSeq){
+ if(!inner || !segmentSeq) return null;
+ const candidates=Array.from(inner.querySelectorAll(`[data-live-assistant="1"][data-live-segment-seq="${CSS.escape(String(segmentSeq))}"]`)).filter(el=>el.isConnected!==false);
+ return candidates[candidates.length-1] || null;
+}
+function _liveAssistantHasVisibleText(el){
+ if(!el||!el.matches||!el.matches('[data-live-assistant="1"]')) return false;
+ const body=el.querySelector&&el.querySelector('.msg-body');
+ const text=(body?body.textContent:el.textContent)||el.dataset&&el.dataset.rawText||'';
+ return !!String(text||'').trim();
+}
+function _findPreviousVisibleLiveAssistant(inner, beforeNode){
+ if(!inner) return null;
+ let node=beforeNode&&beforeNode.previousElementSibling;
+ while(node){
+ if(_liveAssistantHasVisibleText(node)) return node;
+ node=node.previousElementSibling;
+ }
+ return null;
+}
+function _findLatestVisibleLiveAssistant(inner){
+ if(!inner) return null;
+ const candidates=Array.from(inner.querySelectorAll('[data-live-assistant="1"]')).filter(el=>el.isConnected!==false&&_liveAssistantHasVisibleText(el));
+ return candidates[candidates.length-1] || null;
+}
+function _findLatestVisibleLiveAssistantByBurst(inner, burstId){
+ if(!inner || !burstId) return null;
+ const candidates=Array.from(inner.querySelectorAll(`[data-live-assistant="1"][data-activity-burst-id="${CSS.escape(String(burstId))}"]`))
+ .filter(el=>el.isConnected!==false&&_liveAssistantHasVisibleText(el));
+ return candidates[candidates.length-1] || null;
+}
+function _findLiveAssistantAnchorForSegment(inner, segmentSeq){
+ const exact=_findLatestLiveAssistantBySegment(inner, segmentSeq);
+ if(exact&&_liveAssistantHasVisibleText(exact)) return exact;
+ return _findPreviousVisibleLiveAssistant(inner, exact) || _findLatestVisibleLiveAssistant(inner) || exact;
+}
+
function clearLiveToolCards(){
if(typeof _clearActivityElapsedTimer==='function') _clearActivityElapsedTimer();
const inner=_assistantTurnBlocks($('liveAssistantTurn'));
- if(inner) inner.querySelectorAll('.tool-call-group[data-live-tool-call-group],.tool-card-row[data-live-tid]').forEach(el=>el.remove());
+ if(inner) inner.querySelectorAll('.live-worklog[data-live-worklog-shell],.tool-worklog-group[data-live-tool-call-group],.tool-call-group[data-live-tool-call-group],.tool-card-row[data-live-tid]').forEach(el=>el.remove());
// Reset the per-turn user expand intent so the next turn starts at the
// default collapsed state (#1298).
if(typeof _clearLiveActivityUserIntent==='function') _clearLiveActivityUserIntent();
@@ -8224,6 +9208,36 @@ function clearLiveToolCards(){
const container=$('liveToolCards');
if(container){container.innerHTML='';container.style.display='none';}
}
+function _removeEmptyLiveWorklogShells(inner){
+ if(!inner) return;
+ inner.querySelectorAll('.live-worklog[data-live-worklog-shell="1"],.tool-worklog-group[data-live-worklog-shell="1"],.tool-call-group[data-live-worklog-shell="1"]').forEach(group=>{
+ if(!group.querySelector('.tool-card-row,.wl-reason,.agent-activity-thinking')) group.remove();
+ });
+}
+function ensureLiveWorklogShell(){
+ if(!S.session||!S.activeStreamId) return null;
+ $('emptyState').style.display='none';
+ if(!isSimplifiedToolCalling()){
+ appendThinking();
+ return $('thinkingRow');
+ }
+ let turn=$('liveAssistantTurn');
+ if(!turn){
+ turn=_createAssistantTurn();
+ turn.id='liveAssistantTurn';
+ if(S.session) turn.dataset.sessionId=S.session.session_id;
+ $('msgInner').appendChild(turn);
+ }
+ const blocks=_assistantTurnBlocks(turn);
+ if(!blocks) return null;
+ const group=ensureLiveWorklogContainer(blocks,{
+ activityKey:_activityKeyForLiveTurn(),
+ });
+ if(!group) return null;
+ _moveLiveRunStatusToTurnEnd();
+ scrollIfPinned();
+ return group;
+}
// ── Edit + Regenerate ──
@@ -8980,19 +9994,14 @@ function finalizeThinkingCard(){
return;
}
const turn=$('liveAssistantTurn');
- const group=turn&&turn.querySelector('.tool-call-group[data-live-tool-call-group="1"]');
+ const group=turn&&turn.querySelector('.live-worklog[data-live-tool-call-group="1"],.tool-worklog-group[data-live-tool-call-group="1"],.tool-call-group[data-live-tool-call-group="1"]');
if(group){
- // Respect the user's explicit expand intent (#1298) — only force-collapse
- // when the user has not manually expanded this turn's activity group, or
- // has manually collapsed it. Otherwise the panel snaps shut whenever new
- // activity arrives, even mid-read.
- if(_liveActivityUserExpanded !== true && !(window._activityFeedExpandedDefault === true && _liveActivityUserExpanded !== false)){
- group.classList.add('tool-call-group-collapsed');
- const summary=group.querySelector('.tool-call-group-summary');
- if(summary) summary.setAttribute('aria-expanded','false');
- }
- const active=turn.querySelector('.agent-activity-thinking[data-thinking-active="1"]');
- if(active) active.removeAttribute('data-thinking-active');
+ const activeReason=turn.querySelector('.wl-reason[data-worklog-reason-active="1"]');
+ if(activeReason) activeReason.removeAttribute('data-worklog-reason-active');
+ turn.querySelectorAll('.agent-activity-thinking[data-thinking-active="1"]').forEach(active=>{
+ active.removeAttribute('data-thinking-active');
+ active.removeAttribute('data-live-thinking');
+ });
_syncToolCallGroupSummary(group);
}
}
@@ -9000,109 +10009,74 @@ function appendThinking(text='', options){
// Guard: ignore if session was switched during an async SSE stream.
// The old stream's reasoning events can still fire after switch;
// without this check they would pollute the new session's DOM.
+ options=options||{};
const allowPendingPlaceholder=!!(options&&options.pending===true);
if(!S.session||(!S.activeStreamId&&!allowPendingPlaceholder)) return;
- $('emptyState').style.display='none';
- let turn=$('liveAssistantTurn');
- if(!turn){
- turn=_createAssistantTurn();
- turn.id='liveAssistantTurn';
- if(S.session) turn.dataset.sessionId=S.session.session_id; // see #1366
- $('msgInner').appendChild(turn);
- }
- const blocks=_assistantTurnBlocks(turn);
- if(!blocks) return;
+ const empty=$('emptyState');
+ if(empty) empty.style.display='none';
if(!isSimplifiedToolCalling()){
let row=$('thinkingRow');
if(!row){
row=document.createElement('div');
- row.className='assistant-segment';
row.id='thinkingRow';
- row.setAttribute('data-thinking-active','1');
- // Insert after whichever comes last: a live assistant segment or a tool card.
- // This mirrors appendLiveToolCard's anchor logic so thinking always appears
- // in the right position in the interleaved sequence.
- // Also skip #toolRunningRow (dots) — thinking should go before dots, not after.
- const allChildren=Array.from(blocks.children);
- const anchor=allChildren.filter(el=>
- el.id!=='toolRunningRow' &&
- el.matches('[data-live-assistant="1"],.tool-card-row')
- ).pop();
- if(anchor) anchor.insertAdjacentElement('afterend', row);
- else blocks.appendChild(row);
- }
- const clean=_sanitizeThinkingDisplayText(text);
- const hasClean=!!String(clean||'').trim();
- row.className=hasClean?'assistant-segment thinking-card-row':'assistant-segment';
- _renderThinkingInto(row,text);
- scrollIfPinned();
- // Auto-scroll the thinking card body to bottom if the user is watching
- // (scroll pinned). If the user scrolled up to read history, leave it alone.
- if(_scrollPinned){
- const body=row&&row.querySelector('.thinking-card-body');
- if(body) body.scrollTop=body.scrollHeight;
- }
- return;
- }
- const thinkingText=String(text||'').trim()||'Thinking…';
- const cleanThinking=_sanitizeThinkingDisplayText(thinkingText);
- const allChildren=Array.from(blocks.children);
- const anchor=allChildren.filter(el=>
- el.id!=='toolRunningRow' &&
- el.matches('[data-live-assistant="1"],.tool-call-group,.tool-card-row')
- ).pop();
- const group=ensureActivityGroup(blocks,{live:true,collapsed:true,anchor,activityKey:_activityKeyForLiveTurn()});
- const body=group&&group.querySelector('.tool-call-group-body');
- if(!body) return;
- if(!cleanThinking||cleanThinking==='Thinking…'){
- const hasRunningTool=!!body.querySelector('.tool-card.tool-card-running');
- const hasToolCard=!!body.querySelector('.tool-card-row');
- let label;
- let detail;
- if(!S.activeStreamId && options && options.pending){
- label='Starting agent';
- detail='Creating the stream and sending your message…';
- }else if(hasRunningTool){
- label='Waiting on tool result';
- detail=_activityWaitingDetail(group,label);
- }else if(hasToolCard){
- label='Waiting on model';
- detail=_activityWaitingDetail(group,label);
- }else{
- label='Waiting for first model token';
- detail='Stream connected; no model output has arrived yet.';
+ row.className='thinking-card-row';
+ const inner=$('msgInner');
+ if(inner) inner.appendChild(row);
}
- _appendActivityEvent(group,{id:'thinking-placeholder',kind:'waiting',label,detail,status:'waiting',ts:_activityNowSeconds()});
- const active=body.querySelector('.agent-activity-thinking[data-thinking-active="1"]');
- if(active) active.removeAttribute('data-thinking-active');
- _syncToolCallGroupSummary(group);
- scrollIfPinned();
+ row.setAttribute('data-thinking-active','1');
+ _renderThinkingInto(row,text);
+ if(typeof scrollIfPinned==='function') scrollIfPinned();
return;
}
- const placeholder=body.querySelector('.agent-activity-status[data-activity-event-id="thinking-placeholder"]');
- if(placeholder) placeholder.remove();
- let row=body.querySelector('.agent-activity-thinking[data-thinking-active="1"]');
- if(!row){
- const thinkingCards=Array.from(body.querySelectorAll('.agent-activity-thinking'));
- row=thinkingCards.pop()||null;
- if(row) row.setAttribute('data-thinking-active','1');
- }
- if(!row){
- row=_thinkingActivityNode(thinkingText, false);
- row.setAttribute('data-thinking-active','1');
- body.appendChild(row);
- }else{
- _renderThinkingInto(row,thinkingText);
+ let turn=$('liveAssistantTurn');
+ if(!turn){
+ turn=_createAssistantTurn();
+ turn.id='liveAssistantTurn';
+ if(S.session) turn.dataset.sessionId=S.session.session_id;
+ const inner=$('msgInner');
+ if(inner) inner.appendChild(turn);
}
- _activityMarkObserved(group);
- _syncToolCallGroupSummary(group);
- scrollIfPinned();
- if(_scrollPinned){
- const body=row&&row.querySelector('.thinking-card-body');
- if(body) body.scrollTop=body.scrollHeight;
+ const blocks=_assistantTurnBlocks(turn);
+ if(!blocks) return;
+ const clean=_sanitizeThinkingDisplayText(text);
+ if(clean&&window._showThinking!==false){
+ const segmentSeq=options.segmentSeq!==undefined&&options.segmentSeq!==null?String(options.segmentSeq):'';
+ const burstId=options.burstId!==undefined&&options.burstId!==null?String(options.burstId):'';
+ const thinkingKey=String(options.thinkingKey||(
+ segmentSeq?`segment:${segmentSeq}`:
+ burstId?`burst:${burstId}`:
+ 'turn'
+ ));
+ const group=ensureLiveWorklogContainer(blocks,{
+ activityKey:options.activityKey||(S.activeStreamId?'live:'+S.activeStreamId:null),
+ });
+ const list=_toolWorklogListEl(group);
+ if(list){
+ let row=list.querySelector(`.agent-activity-thinking[data-live-thinking="1"][data-live-thinking-key="${CSS.escape(thinkingKey)}"]`);
+ if(!row){
+ row=_thinkingActivityNode(clean, false);
+ row.setAttribute('data-live-thinking','1');
+ row.setAttribute('data-live-thinking-key',thinkingKey);
+ if(segmentSeq) row.setAttribute('data-live-segment-seq',segmentSeq);
+ if(burstId) row.setAttribute('data-activity-burst-id',burstId);
+ list.querySelectorAll('.agent-activity-thinking[data-thinking-active="1"]').forEach(el=>{
+ if(el!==row){
+ el.removeAttribute('data-thinking-active');
+ el.removeAttribute('data-live-thinking');
+ }
+ });
+ row.setAttribute('data-thinking-active','1');
+ list.appendChild(row);
+ }else{
+ _renderThinkingInto(row, clean);
+ }
+ row.setAttribute('data-thinking-active','1');
+ _syncToolCallGroupSummary(group);
+ }
}
+ if(typeof scrollIfPinned==='function') scrollIfPinned();
}
-function updateThinking(text=''){appendThinking(text);}
+function updateThinking(text='', options){appendThinking(text, options);}
function removeThinking(){
if(!isSimplifiedToolCalling()){
const el=$('thinkingRow');
diff --git a/tests/test_auto_compression_card.py b/tests/test_auto_compression_card.py
index a1dd63fed7..f65a2d0cf9 100644
--- a/tests/test_auto_compression_card.py
+++ b/tests/test_auto_compression_card.py
@@ -2,7 +2,10 @@
from api.compression_anchor import visible_messages_for_anchor
from api.models import Session
-from api.streaming import _is_fallback_lifecycle_message
+from api.streaming import (
+ _is_fallback_lifecycle_message,
+ _prune_context_tool_results_after_compression,
+)
ROOT = Path(__file__).resolve().parents[1]
@@ -30,6 +33,46 @@ def _compressing_listener_block() -> str:
return src[start:end]
+def test_post_compression_context_prunes_tail_tool_results_with_active_compressor():
+ class FakeCompressor:
+ protect_last_n = 20
+ tail_token_budget = 4096
+
+ def __init__(self):
+ self.calls = []
+
+ def _prune_old_tool_results(self, messages, protect_tail_count, protect_tail_tokens=None):
+ self.calls.append(
+ {
+ "protect_tail_count": protect_tail_count,
+ "protect_tail_tokens": protect_tail_tokens,
+ }
+ )
+ out = []
+ pruned = 0
+ for msg in messages:
+ next_msg = dict(msg)
+ if next_msg.get("role") == "tool" and len(str(next_msg.get("content") or "")) > 200:
+ next_msg["content"] = "[browser_navigate] opened page (large snapshot summarized)"
+ pruned += 1
+ out.append(next_msg)
+ return out, pruned
+
+ compressor = FakeCompressor()
+ agent = type("Agent", (), {"context_compressor": compressor})()
+ context_messages = [
+ {"role": "assistant", "content": "", "tool_calls": [{"id": "call_big"}]},
+ {"role": "tool", "tool_call_id": "call_big", "content": "x" * 5000},
+ {"role": "assistant", "content": "Final answer"},
+ ]
+
+ pruned = _prune_context_tool_results_after_compression(agent, context_messages)
+
+ assert compressor.calls == [{"protect_tail_count": 20, "protect_tail_tokens": 4096}]
+ assert pruned[1]["content"] == "[browser_navigate] opened page (large snapshot summarized)"
+ assert context_messages[1]["content"] == "x" * 5000
+
+
def test_auto_compression_running_sse_uses_active_session_running_card():
block = _compressing_listener_block()
@@ -39,7 +82,8 @@ def test_auto_compression_running_sse_uses_active_session_running_card():
assert "setCompressionUi" in block
assert "phase:'running'" in block
assert "automatic:true" in block
- assert "message:d.message||'Auto-compressing context...'" in block
+ assert "message:'Compressing context'" in block
+ assert "message:d.message||'Compressing context'" not in block
def test_agent_status_callback_emits_compressing_and_warning_events():
@@ -53,7 +97,7 @@ def test_agent_status_callback_emits_compressing_and_warning_events():
# compressing events for compression lifecycle notices
assert "put('compressing'" in block
assert "'session_id': session_id" in block
- assert "'message': 'Auto-compressing context to continue...'" in block
+ assert "'message': 'Compressing context'" in block
assert "'preflight compression'" in block
assert "'compressing'" in block
assert "'compacting context'" in block
@@ -95,7 +139,7 @@ def test_fallback_lifecycle_message_predicate_matches_agent_emitters():
)
assert not _is_fallback_lifecycle_message(
"lifecycle",
- "Auto-compressing context to continue...",
+ "Compressing context",
)
@@ -105,7 +149,18 @@ def test_auto_compression_completion_transition_is_preserved_after_running_liste
compressed_idx = src.find("source.addEventListener('compressed'")
assert compressing_idx != -1 and compressed_idx != -1
assert compressing_idx < compressed_idx
+ assert "appendLiveCompressionCard({" in _compressed_listener_block()
assert "phase:'done'" in _compressed_listener_block()
+ assert "message:'Context auto-compressed'" in _compressed_listener_block()
+ assert "clearCompressionUi()" in _compressed_listener_block()
+
+
+def test_auto_compression_completion_ignores_legacy_payload_message():
+ block = _compressed_listener_block()
+
+ assert "d.message||'Compression finished'" not in block
+ assert "setCompressionUi" not in block
+ assert "message:'Context auto-compressed'" in block
def test_auto_compression_running_sse_stamps_elapsed_timer_start():
@@ -115,7 +170,7 @@ def test_auto_compression_running_sse_stamps_elapsed_timer_start():
assert block.index("startedAt:Date.now()/1000") < block.index("setCompressionUi(state)")
-def test_auto_compression_running_card_renders_elapsed_timer_and_caps_updates():
+def test_auto_compression_running_card_keeps_elapsed_timer_out_of_visible_copy():
src = _read("static/ui.js")
start = src.find("function _autoCompressionPreviewText")
assert start != -1, "auto compression preview helper not found"
@@ -126,8 +181,9 @@ def test_auto_compression_running_card_renders_elapsed_timer_and_caps_updates():
assert "const _COMPRESSION_ELAPSED_MAX_SECONDS=5*60;" in src
assert "function _compressionElapsedLabel(state)" in src
assert "_formatActiveElapsedTimer" in src
- assert "_compressionElapsedLabel(state)" in helper
- assert "elapsedLabel" in helper
+ assert "_compressionElapsedLabel(state)" not in helper
+ assert "elapsedLabel" not in helper
+ assert "`Elapsed: ${elapsedLabel}`" not in helper
assert "_autoCompressionPreviewText(state)" in helper
assert "_autoCompressionDetailText(state)" in helper
assert "function _startCompressionElapsedTimer()" in src
@@ -137,6 +193,104 @@ def test_auto_compression_running_card_renders_elapsed_timer_and_caps_updates():
assert "_clearCompressionElapsedTimer();" in src
+def test_auto_compression_uses_command_action_copy():
+ src = _read("static/ui.js")
+ start = src.find("function _autoCompressionPreviewText")
+ assert start != -1, "auto compression preview helper not found"
+ end = src.find("function _autoCompressionDetailText", start)
+ assert end != -1, "auto compression detail helper not found after preview helper"
+ helper = src[start:end]
+
+ assert "Compressing context" in helper
+ assert "Context auto-compressed" in helper
+ assert "Compression finished" not in helper
+ assert "return running?'Running':'Done';" not in helper
+
+
+def test_auto_compression_running_card_defaults_collapsed():
+ src = _read("static/ui.js")
+ start = src.find("function _autoCompressionCardsHtml")
+ assert start != -1, "auto compression card helper not found"
+ end = src.find("function _compressionCardsNode", start)
+ assert end != -1, "compression cards node helper not found after auto helper"
+ helper = src[start:end]
+
+ assert "auto-compression-divider" in helper
+ assert "open: false" not in helper
+ assert "open: running" not in helper
+
+
+def test_auto_compression_uses_centered_noninteractive_divider():
+ src = _read("static/style.css")
+
+ assert ".auto-compression-divider" in src
+ assert "grid-template-columns:minmax(32px,1fr) auto minmax(32px,1fr)" in src
+ assert "pointer-events:none" in src
+ override = src.split(".auto-compression-divider{", 1)[1].split("}", 1)[0]
+ assert "color:var(--muted)" in override
+ assert "user-select:none" in override
+
+
+def test_auto_compression_worklog_row_does_not_use_tool_card_affordances():
+ src = _read("static/ui.js")
+ start = src.find("function _autoCompressionWorklogNode")
+ assert start != -1, "auto compression worklog node helper not found"
+ end = src.find("function _compressionCardsNode", start)
+ assert end != -1, "compression cards node helper not found after worklog helper"
+ helper = src[start:end]
+
+ assert "tool-card-running-dot" not in helper
+ assert "auto_compress_label" not in helper
+ assert "tool-card-header" not in helper
+ assert "onclick" not in helper
+ assert "tabindex" not in helper
+ assert "tl-caret" not in helper
+ assert "auto-compression-divider" in helper
+ assert "auto-compression-divider-line" in helper
+ assert "_autoCompressionPreviewText(state)" in helper
+
+
+def test_auto_compression_live_card_appends_to_worklog_timeline():
+ src = _read("static/ui.js")
+ start = src.find("function appendLiveCompressionCard")
+ assert start != -1, "live compression card append helper not found"
+ end = src.find("function _isHandoffSummaryToolPayload", start)
+ assert end != -1, "handoff helper not found after live compression helper"
+ helper = src[start:end]
+
+ assert "ensureLiveWorklogContainer" in helper
+ assert "_toolWorklogListEl(group)" in helper
+ assert "_autoCompressionWorklogNode(state)" in helper
+ automatic_branch = helper.split("if(state.automatic){", 1)[1].split("const node=_compressionCardsNode(state);", 1)[0]
+ assert "inner.appendChild(node)" not in automatic_branch
+ assert "list.appendChild(node)" in automatic_branch
+
+
+def test_final_settle_removes_live_auto_compression_row():
+ src = _read("static/ui.js")
+ start = src.find("function clearLiveToolCards")
+ assert start != -1, "live tool cleanup helper not found"
+ end = src.find("function _removeEmptyLiveWorklogShells", start)
+ assert end != -1, "next live worklog helper not found after cleanup helper"
+ helper = src[start:end]
+
+ assert ".live-worklog[data-live-worklog-shell]" in helper
+ assert "data-live-compression-card" in src
+
+
+def test_final_settle_drops_transient_automatic_compression_state():
+ src = _read("static/ui.js")
+ start = src.find("function renderMessages")
+ assert start != -1, "renderMessages not found"
+ end = src.find("function _toolDisplayName", start)
+ assert end != -1, "renderMessages end marker not found"
+ helper = src[start:end]
+
+ assert "compressionState && compressionState.automatic" in helper
+ assert "window._compressionUi=null;" in helper
+ assert "compressionState=null;" in helper
+
+
def test_auto_compression_elapsed_cap_uses_non_frozen_label():
src = _read("static/ui.js")
start = src.find("function _compressionElapsedLabel")
@@ -158,11 +312,12 @@ def test_auto_compression_running_detail_avoids_duplicate_message_text():
assert end != -1, "auto compression card helper not found after detail helper"
helper = src[start:end]
- assert "return elapsedLabel?`Elapsed: ${elapsedLabel}`:base;" in helper
+ assert "if(running)return '';" in helper
+ assert "`Elapsed: ${elapsedLabel}`" not in helper
assert "${base}\\nElapsed:" not in helper
-def test_auto_compression_done_detail_surfaces_continuation_handoff():
+def test_auto_compression_done_detail_is_not_persisted_in_worklog():
src = _read("static/ui.js")
start = src.find("function _autoCompressionDetailText")
assert start != -1, "auto compression detail helper not found"
@@ -170,9 +325,9 @@ def test_auto_compression_done_detail_surfaces_continuation_handoff():
assert end != -1, "auto compression card helper not found after detail helper"
helper = src[start:end]
- assert "continuationSessionId" in helper
- assert "Continued in compressed session" in helper
- assert "return [base,handoff].filter(Boolean).join('\\n');" in helper
+ assert "continuationSessionId" not in helper
+ assert "Continued in compressed session" not in helper
+ assert "return '';" in helper
def test_auto_compression_live_card_keeps_elapsed_state_for_timer_refresh():
@@ -188,18 +343,71 @@ def test_auto_compression_live_card_keeps_elapsed_state_for_timer_refresh():
assert "_compressionLiveCardState" in src
-def test_auto_compression_does_not_rerender_over_live_answer_text():
+def test_auto_compression_does_not_rerender_over_live_worklog():
block = _compressing_listener_block()
src = _read("static/ui.js")
- assert "const liveAnswerStarted=" in block
+ assert "const liveAnswerStarted=" not in block
assert "appendLiveCompressionCard(state)" in block
- assert block.index("appendLiveCompressionCard(state)") < block.index("renderMessages({preserveScroll:true})")
- assert "window._compressionUi=null;" in block
+ assert "renderMessages({preserveScroll:true})" not in block
+ assert "restoreLiveTurnHtmlForSession(activeSid)" not in block
+ assert block.index("appendLiveCompressionCard(state)") < block.index("setCompressionUi(state)")
+ assert "clearCompressionUi()" in block
assert "function appendLiveCompressionCard(state)" in src
assert 'data-live-compression-card' in src
+def test_auto_compression_live_repeated_starts_keep_only_current_running_row():
+ src = _read("static/ui.js")
+ start = src.find("function appendLiveCompressionCard(state)")
+ assert start != -1, "live compression card append helper not found"
+ end = src.find("function _isHandoffSummaryToolPayload", start)
+ assert end != -1, "handoff helper not found after live compression helper"
+ helper = src[start:end]
+
+ assert "node.setAttribute('data-compression-phase',String(state.phase||''));" in helper
+ assert "const existingRunning=group.querySelector('[data-live-compression-card=\"1\"][data-compression-started-at]');" in helper
+ assert 'const existing=state.phase===\'running\'?existingRunning:(existingRunning||existingDone);' in helper
+ assert "if(existing) existing.replaceWith(node);" in helper
+ assert "else list.appendChild(node);" in helper
+
+
+def test_auto_compression_running_card_completes_on_followup_live_events():
+ src = _read("static/messages.js")
+
+ assert "function _completeAutomaticCompressionOnLiveProgress" in src
+ helper = src.split("function _completeAutomaticCompressionOnLiveProgress", 1)[1].split("source.addEventListener('token'", 1)[0]
+ assert "data-live-compression-card=\"1\"][data-compression-started-at]" in helper
+ assert "window._compressionUi&&window._compressionUi.automatic&&window._compressionUi.phase==='running'" in helper
+ assert "phase:'done'" in helper
+ assert "message:'Context auto-compressed'" in helper
+ assert "appendLiveCompressionCard({" in helper
+
+ for event_name in ("token", "interim_assistant", "reasoning", "tool", "tool_complete"):
+ start = src.find(f"source.addEventListener('{event_name}'")
+ assert start != -1, f"{event_name} listener not found"
+ end = src.find("source.addEventListener(", start + 1)
+ assert end != -1, f"{event_name} listener end not found"
+ block = src[start:end]
+ assert "_completeAutomaticCompressionOnLiveProgress(activeSid)" in block
+ assert "settleLiveCompressionCards" not in block
+ assert "clearCompressionUi()" not in block
+
+
+def test_auto_compression_elapsed_update_is_not_visible_detail_churn():
+ src = _read("static/ui.js")
+ start = src.find("function _updateCompressionElapsedCards")
+ assert start != -1, "elapsed update helper not found"
+ end = src.find("function _startCompressionElapsedTimer", start)
+ assert end != -1, "timer helper not found after elapsed updater"
+ helper = src[start:end]
+
+ assert "return false;" in helper
+ assert ".tool-card-compress-auto" not in helper
+ assert "tool-card-preview" not in helper
+ assert "tool-card-result" not in helper
+
+
def test_auto_compression_sse_uses_transient_card_not_fake_message():
"""Auto compression must not inject display-only text into S.messages."""
src = _read("static/messages.js")
@@ -207,13 +415,12 @@ def test_auto_compression_sse_uses_transient_card_not_fake_message():
assert "*[Context was auto-compressed to continue the conversation]*" not in src
assert "S.messages.push" not in block
- assert "setCompressionUi" in block
+ assert "setCompressionUi" not in block
assert "phase:'done'" in block
assert "automatic:true" in block
+ assert "appendLiveCompressionCard" in block
assert "_setCompressionSessionLock" in block
- assert "const appended=typeof appendLiveCompressionCard==='function'&&appendLiveCompressionCard(state);" in block
- assert "window._compressionUi=null;" in block
- assert block.index("appendLiveCompressionCard(state)") < block.index("window._compressionUi=null;")
+ assert "clearCompressionUi()" in block
def test_auto_compression_sse_keeps_inactive_and_malformed_paths_safe():
@@ -221,7 +428,7 @@ def test_auto_compression_sse_keeps_inactive_and_malformed_paths_safe():
guard = "if(!S.session) return;"
assert guard in block
- assert block.index(guard) < block.index("setCompressionUi")
+ assert block.index(guard) < block.index("appendLiveCompressionCard")
assert "try{ d=JSON.parse(e.data||'{}')||{}; }catch(_){ d={}; }" in block
assert "const eventSid=d.old_session_id||d.session_id||activeSid;" in block
assert "const eventMatchesCurrent=" in block
@@ -257,9 +464,8 @@ def test_auto_compression_done_accepts_event_after_current_session_rotates():
assert "const currentSid=S.session.session_id;" in block
assert "const eventMatchesCurrent=" in block
assert "const displaySid=currentSid;" in block
- assert "sessionId:displaySid" in block
assert block.index("const eventSid=") < block.index("const eventMatchesCurrent=")
- assert block.index("const displaySid=") < block.index("setCompressionUi(state)")
+ assert block.index("const displaySid=") < block.index("appendLiveCompressionCard")
def test_auto_compression_done_sse_refreshes_context_indicator_usage():
@@ -268,7 +474,7 @@ def test_auto_compression_done_sse_refreshes_context_indicator_usage():
assert "if(d.usage&&typeof _syncCtxIndicator==='function')" in block
assert "_mergeUsageForCtxIndicator(d.usage,S.lastUsage||{})" in block
assert "_syncCtxIndicator(S.lastUsage);" in block
- assert block.index("_syncCtxIndicator(S.lastUsage);") < block.index("setCompressionUi")
+ assert block.index("_syncCtxIndicator(S.lastUsage);") < block.index("appendLiveCompressionCard")
def test_auto_compression_done_payload_includes_live_usage_snapshot():
@@ -283,6 +489,7 @@ def test_auto_compression_done_payload_includes_live_usage_snapshot():
assert "'old_session_id': _compression_origin_session_id" in block
assert "'new_session_id': _compression_continuation_session_id" in block
assert "'continuation_session_id': _compression_continuation_session_id" in block
+ assert "'message': 'Compression finished'" in block
assert "'usage': _live_usage_snapshot()" in block
@@ -310,24 +517,17 @@ def test_auto_compression_card_reuses_compression_card_renderer():
helper = src[start:end]
assert "if(state.automatic) return _autoCompressionCardsHtml(state);" in src
- assert "tool-card-row compression-card-row" in helper
- assert "tool-card-compress-complete tool-card-compress-auto" in helper
- assert "auto_compress_label" in helper
+ assert "tool-card-row compression-card-row auto-compression-divider-row" in helper
+ assert "auto-compression-divider-line" in helper
+ assert "variantClass: 'tool-card-compress-auto'" not in helper
+ assert "statusLabel: preview" not in helper
-def test_auto_compression_compressed_sse_showtoast_has_explicit_longer_duration():
+def test_auto_compression_compressed_sse_does_not_show_persistent_completion_toast():
block = _compressed_listener_block()
- assert 'showToast' in block
- # Must call showToast with an explicit duration that is meaningfully longer
- # than the default (3000 ms) so the compressed event toast is harder to miss.
- import re
- m = re.search(r'showToast\(.*?,\s*(\d+)\s*\)', block)
- assert m is not None, 'showToast call in compressed SSE handler has no explicit duration'
- duration = int(m.group(1))
- assert duration >= 8000, (
- f'compressed SSE showToast duration ({duration} ms) must be >= 8000 ms'
- )
+ assert 'showToast' not in block
+ assert "Compression finished" not in block
def test_auto_compression_card_survives_compression_session_rotation():
diff --git a/tests/test_cancelled_turn_status.py b/tests/test_cancelled_turn_status.py
index 3e9b7c501c..6a56c35e9f 100644
--- a/tests/test_cancelled_turn_status.py
+++ b/tests/test_cancelled_turn_status.py
@@ -122,6 +122,8 @@ def test_cancel_marker_patterns_are_centralized_for_dedupe(self):
def test_silent_failure_path_checks_cancel_event_before_persisting_provider_error(self):
src = _read("api/streaming.py")
silent_idx = src.find("# ── Detect silent agent failure")
+ if silent_idx == -1:
+ silent_idx = src.find("# ── Detect missing final assistant reply")
assert silent_idx != -1, "silent-failure block not found"
apperror_idx = src.find("put('apperror', _error_payload)", silent_idx)
assert apperror_idx != -1, "silent-failure apperror emission not found"
@@ -135,6 +137,20 @@ def test_silent_failure_path_checks_cancel_event_before_persisting_provider_erro
"The cancellation guard should persist/report a cancelled turn, not silently drop state."
)
+ def test_streamed_progress_without_final_assistant_still_reports_error(self):
+ src = _read("api/streaming.py")
+ failure_idx = src.find("_terminal_failure = (")
+ assert failure_idx != -1, "terminal-failure guard not found"
+ apperror_idx = src.find("put('apperror', _error_payload)", failure_idx)
+ assert apperror_idx != -1, "terminal-failure guard must emit apperror"
+ block = src[failure_idx:apperror_idx]
+
+ assert "_agent_result_terminal_failure(result)" in block
+ assert "if _terminal_failure or (not _assistant_added and not _token_sent):" in block, (
+ "Explicit terminal failures, including compression/tool-tail failures, must report "
+ "an error even when interim progress already streamed."
+ )
+
def test_exception_path_classifies_after_cancel_event_before_generic_error(self):
src = _read("api/streaming.py")
except_idx = src.find("print('[webui] stream error:")
diff --git a/tests/test_inflight_stream_reuse.py b/tests/test_inflight_stream_reuse.py
index 69c756b174..fe9a72e33e 100644
--- a/tests/test_inflight_stream_reuse.py
+++ b/tests/test_inflight_stream_reuse.py
@@ -1,10 +1,14 @@
"""Regression tests for preserving live streams across session switches."""
import re
+import shutil
+import subprocess
from pathlib import Path
REPO_ROOT = Path(__file__).parent.parent
MESSAGES_JS = (REPO_ROOT / "static" / "messages.js").read_text(encoding="utf-8")
SESSIONS_JS = (REPO_ROOT / "static" / "sessions.js").read_text(encoding="utf-8")
+UI_JS = (REPO_ROOT / "static" / "ui.js").read_text(encoding="utf-8")
+NODE = shutil.which("node")
def _function_body(src: str, name: str) -> str:
@@ -26,6 +30,25 @@ def _function_body(src: str, name: str) -> str:
return src[brace + 1 : i - 1]
+def _function_decl(src: str, name: str) -> str:
+ marker = f"function {name}("
+ start = src.find(marker)
+ assert start != -1, f"{name}() not found"
+ brace = src.find("){", start)
+ assert brace != -1, f"{name}() body not found"
+ brace += 1
+ depth = 1
+ i = brace + 1
+ while i < len(src) and depth:
+ if src[i] == "{":
+ depth += 1
+ elif src[i] == "}":
+ depth -= 1
+ i += 1
+ assert depth == 0, f"{name}() body did not close"
+ return src[start:i]
+
+
def test_attach_live_stream_reuses_existing_same_stream_transport():
"""Returning to a running session must not tear down its same SSE stream.
@@ -42,10 +65,31 @@ def test_attach_live_stream_reuses_existing_same_stream_transport():
assert close_pos != -1, "attachLiveStream() should still close stale/different streams"
assert reuse_pos < close_pos, "same-stream reuse must run before closeLiveStream(activeSid)"
assert "existingLive.streamId===streamId" in body
- assert "existingLive.source.readyState!==EventSource.CLOSED" in body
+ assert "existingLive.source.readyState===EventSource.OPEN" in body
+ assert "(!reconnecting&&existingLive.source.readyState===EventSource.CONNECTING)" in body
assert "return" in body[reuse_pos:close_pos]
+def test_attach_live_stream_reconnect_does_not_reuse_connecting_transport():
+ """Explicit reattach must reopen a stale CONNECTING EventSource.
+
+ A page can keep a same-stream EventSource object in CONNECTING while the
+ server has no SSE subscriber. Reconnect paths from loadSession() should not
+ treat that object as healthy, or the live pane remains blank despite the
+ backend stream still emitting events.
+ """
+ body = _function_body(MESSAGES_JS, "attachLiveStream")
+ reuse_pos = body.find("const existingLive=LIVE_STREAMS[activeSid]")
+ close_pos = body.find("\n closeLiveStream(activeSid);\n")
+ assert reuse_pos != -1
+ assert close_pos != -1
+ reuse_block = body[reuse_pos:close_pos]
+ compact = re.sub(r"\s+", "", reuse_block)
+ assert "existingLive.source.readyState===EventSource.OPEN" in reuse_block
+ assert "(!reconnecting&&existingLive.source.readyState===EventSource.CONNECTING)" in compact
+ assert "existingLive.source.readyState!==EventSource.CLOSED" not in reuse_block
+
+
def test_attach_live_stream_closes_other_session_streams_before_opening_new_one():
"""Only the selected conversation pane should hold an open chat SSE transport."""
body = _function_body(MESSAGES_JS, "attachLiveStream")
@@ -102,6 +146,56 @@ def test_load_session_reattach_path_uses_attach_live_stream_for_running_sessions
assert "{reconnecting:true}" in body[reattach_pos : reattach_pos + 200]
+def test_load_session_same_sid_noop_does_not_mask_pending_switch_back():
+ """Clicking back to the prior session during a pending switch must reload it.
+
+ loadSession() clears S.messages before the metadata fetch for the target
+ session returns. During that small window S.session still points at the
+ previous session. A fast click back to that previous sid used to hit the
+ same-session no-op guard and leave the pane empty/Loading forever.
+ """
+ body = _function_body(SESSIONS_JS, "loadSession")
+ compact = re.sub(r"\s+", "", body)
+ guard = "if(currentSid===sid&&!forceReload&&!_loadingSessionId)return;"
+ assert guard in compact, (
+ "same-session no-op must be disabled while another loadSession() call "
+ "is in flight, otherwise switching away and immediately back can keep "
+ "the previous session's cleared transcript"
+ )
+ assert compact.find(guard) < compact.find("_loadingSessionId=sid;")
+
+
+def test_load_session_preserves_existing_worklog_content_without_destructive_fallback():
+ """Switching back to an active stream with live Worklog content should be treated as restored.
+
+ If loadSession() sees .wl-reason or .tool-card-row already in #liveAssistantTurn,
+ the destructive fallback must not call clearLiveToolCards() and rebuild a blank
+ Running shell over the preserved timeline.
+ """
+ body = _function_body(SESSIONS_JS, "loadSession")
+ content_pos = body.find("const hasCurrentWorklogContent=")
+ clear_pos = body.find("clearLiveToolCards();", content_pos)
+ assert content_pos != -1
+ assert clear_pos != -1
+ between = body[content_pos:clear_pos]
+ compact = re.sub(r"\s+", "", between)
+ assert "if(hasCurrentWorklogContent)restoredLiveTurn=true" in compact, (
+ "Existing live Worklog content must mark the turn restored before the "
+ "clearLiveToolCards() fallback runs."
+ )
+
+
+def test_tool_events_are_guarded_against_stale_session_and_stream():
+ """Delayed tool events from an old EventSource must not mutate the current session DOM."""
+ tool_handler = MESSAGES_JS.split("source.addEventListener('tool',e=>{", 1)[1].split("source.addEventListener('tool_complete'", 1)[0]
+ complete_handler = MESSAGES_JS.split("source.addEventListener('tool_complete',e=>{", 1)[1].split("source.addEventListener('approval'", 1)[0]
+ for handler in (tool_handler, complete_handler):
+ assert "_terminalStateReached||_streamFinalized" in handler
+ assert "S.session.session_id!==activeSid" in handler
+ assert "S.activeStreamId!==streamId" in handler
+ assert "appendLiveToolCard(tc,{sessionId:activeSid,streamId})" in handler
+
+
def test_close_live_stream_marks_inflight_for_reattach_on_return():
"""When closeLiveStream() tears down a still-active SSE transport (e.g. the
user switched to another session), the corresponding INFLIGHT entry must be
@@ -118,8 +212,13 @@ def test_close_live_stream_marks_inflight_for_reattach_on_return():
"closeLiveStream() must touch INFLIGHT so loadSession() reattaches the "
"SSE when the user switches back to a still-streaming session"
)
+ snapshot_pos = body.find("snapshotLiveTurnHtmlForSession(sessionId)")
+ hide_pos = body.find("hideLiveRunStatus")
+ assert snapshot_pos != -1, "closeLiveStream() must snapshot the visible Worklog before tearing down the pane"
+ assert hide_pos != -1 and snapshot_pos < hide_pos
assert re.search(r"INFLIGHT\[\w+\]\s*&&\s*\(?INFLIGHT\[\w+\]\.reattach\s*=\s*true", body) \
- or re.search(r"if\s*\(\s*INFLIGHT\[\w+\]\s*\)\s*INFLIGHT\[\w+\]\.reattach\s*=\s*true", body), (
+ or re.search(r"if\s*\(\s*INFLIGHT\[\w+\]\s*\)\s*INFLIGHT\[\w+\]\.reattach\s*=\s*true", body) \
+ or re.search(r"if\s*\(\s*INFLIGHT\[\w+\]\s*\)\s*\{[^}]*INFLIGHT\[\w+\]\.reattach\s*=\s*true", body, re.DOTALL), (
"closeLiveStream() must set INFLIGHT[sessionId].reattach = true "
"(guarded by an existence check) so loadSession()'s reattach branch fires"
)
@@ -157,7 +256,7 @@ def test_load_session_reattaches_when_inflight_is_in_memory_and_marked_for_reatt
body = _function_body(SESSIONS_JS, "loadSession")
inflight_idx = body.find("if(INFLIGHT[sid]){")
assert inflight_idx >= 0, "INFLIGHT branch not found in loadSession"
- inflight_block = body[inflight_idx : inflight_idx + 2400]
+ inflight_block = body[inflight_idx : inflight_idx + 4200]
assert "INFLIGHT[sid].reattach" in inflight_block, (
"loadSession()'s INFLIGHT branch must gate the SSE reattach on the "
"reattach flag so closeLiveStream()'s marking flows through"
@@ -171,3 +270,796 @@ def test_load_session_reattaches_when_inflight_is_in_memory_and_marked_for_reatt
"loadSession() must reattach via attachLiveStream() when "
"INFLIGHT[sid].reattach && activeStreamId"
)
+
+
+def test_load_session_attaches_sse_before_auxiliary_work():
+ """Live SSE reattach is the primary recovery path.
+
+ Rendering, workspace refresh, badges, and side-channel pollers must not run
+ before attachLiveStream(), because any synchronous failure in those paths
+ would otherwise leave the backend stream active with no browser subscriber.
+ """
+ body = _function_body(SESSIONS_JS, "loadSession")
+ active_branch = body[body.find("if(activeStreamId){") : body.find("}else{", body.find("if(activeStreamId){"))]
+ active_attach = active_branch.find("attachLiveStream(sid, activeStreamId")
+ assert active_attach != -1
+ for marker in (
+ "updateSendBtn();",
+ "syncTopbar();renderMessages();appendThinking();loadDir('.');",
+ "updateQueueBadge(sid);",
+ "startApprovalPolling(sid)",
+ ):
+ pos = active_branch.find(marker)
+ assert pos != -1, f"{marker} not found in active-stream branch"
+ assert active_attach < pos, f"attachLiveStream() must run before {marker}"
+
+
+def test_running_reattach_refreshes_single_live_assistant_from_server_progress():
+ """Switching back to a running session should keep one visible assistant
+ source for the active turn.
+
+ The server transcript can already contain interim assistant progress while
+ INFLIGHT also holds the live assistant tail. Reattach must refresh the live
+ tail from the server copy, drop the server's active-turn assistant rows, and
+ render one `_live` assistant instead of duplicating or deleting progress.
+ """
+ assert NODE, "node not on PATH"
+ start = SESSIONS_JS.find("function _messageComparableText")
+ end = SESSIONS_JS.find("// Load older messages", start)
+ assert start != -1 and end != -1
+ helper_src = SESSIONS_JS[start:end]
+ script = f"""
+const assert = require('assert');
+{helper_src}
+
+let base = [
+ {{role:'user', content:'go'}},
+ {{role:'assistant', content:'First progress.'}},
+ {{role:'tool', content:'{{}}'}},
+ {{role:'assistant', content:'Second progress.'}},
+];
+let inflight = [
+ {{role:'user', content:'go'}},
+ {{role:'assistant', _live:true, content:'First progress.\\n\\nSecond progress.\\n\\nSecond progress.'}},
+];
+assert.strictEqual(_prepareRunningLiveTail(base, inflight), true);
+assert.strictEqual(inflight[1].content, 'First progress.\\n\\nSecond progress.');
+base = _dropCurrentTurnAssistantMessages(base);
+let merged = _mergeInflightTailMessages(base, inflight);
+assert.strictEqual(merged.filter(m => m.role === 'assistant').length, 1);
+assert.strictEqual(merged[merged.length - 1]._live, true);
+assert.strictEqual(merged[merged.length - 1].content, 'First progress.\\n\\nSecond progress.');
+
+base = [
+ {{role:'user', content:'go'}},
+ {{role:'assistant', content:'First progress.'}},
+ {{role:'tool', content:'{{}}'}},
+ {{role:'assistant', content:'Second progress.'}},
+];
+inflight = [
+ {{role:'user', content:'go'}},
+ {{role:'assistant', _live:true, content:'First progress.'}},
+];
+assert.strictEqual(_prepareRunningLiveTail(base, inflight), true);
+assert.strictEqual(inflight[1].content, 'First progress.\\n\\nSecond progress.');
+base = _dropCurrentTurnAssistantMessages(base);
+merged = _mergeInflightTailMessages(base, inflight);
+assert.strictEqual(merged.filter(m => m.role === 'assistant').length, 1);
+assert.strictEqual(merged[merged.length - 1]._live, true);
+assert.strictEqual(merged[merged.length - 1].content, 'First progress.\\n\\nSecond progress.');
+"""
+ result = subprocess.run([NODE, "-e", script], capture_output=True, text=True, check=False)
+ assert result.returncode == 0, result.stderr
+
+
+def test_running_reattach_rebuilds_live_assistant_from_last_text_before_activity():
+ """A fast session switch can happen after INFLIGHT.lastAssistantText was
+ updated but before the live assistant message/DOM snapshot caught up.
+
+ Reattach must rebuild the structured `_live` assistant before restoring
+ Activity, otherwise the UI can show only the Activity group until another
+ switch or token causes the text segment to reappear.
+ """
+ assert NODE, "node not on PATH"
+ start = SESSIONS_JS.find("function _messageComparableText")
+ end = SESSIONS_JS.find("// Load older messages", start)
+ assert start != -1 and end != -1
+ helper_src = SESSIONS_JS[start:end]
+ script = f"""
+const assert = require('assert');
+{helper_src}
+
+let base = [{{role:'user', content:'go'}}];
+let inflightState = {{
+ lastAssistantText:'Recovered progress text.',
+ lastReasoningText:'',
+ messages:[{{role:'user', content:'go'}}],
+}};
+assert.strictEqual(_ensureInflightLiveAssistantMessage(inflightState), true);
+assert.strictEqual(inflightState.messages.length, 2);
+assert.strictEqual(inflightState.messages[1]._live, true);
+assert.strictEqual(inflightState.messages[1].content, 'Recovered progress text.');
+assert.strictEqual(_prepareRunningLiveTail(base, inflightState.messages), true);
+base = _dropCurrentTurnAssistantMessages(base);
+const merged = _mergeInflightTailMessages(base, inflightState.messages);
+assert.strictEqual(merged.filter(m => m.role === 'assistant').length, 1);
+assert.strictEqual(merged[merged.length - 1]._live, true);
+assert.strictEqual(merged[merged.length - 1].content, 'Recovered progress text.');
+"""
+ result = subprocess.run([NODE, "-e", script], capture_output=True, text=True, check=False)
+ assert result.returncode == 0, result.stderr
+
+
+def test_running_reattach_projects_live_text_into_activity_burst_segments():
+ """Fallback reattach should rebuild the same process-text/tool-burst
+ timeline even when the DOM snapshot is unavailable.
+ """
+ assert NODE, "node not on PATH"
+ start = SESSIONS_JS.find("function _messageComparableText")
+ end = SESSIONS_JS.find("// Load older messages", start)
+ assert start != -1 and end != -1
+ helper_src = SESSIONS_JS[start:end]
+ script = f"""
+const assert = require('assert');
+{helper_src}
+
+const inflight = {{
+ currentActivityBurstId: 2,
+ activityBurstAnchors: [
+ {{id: 1, textEnd: 'First progress.'.length}},
+ {{id: 2, textEnd: 'First progress.\\n\\nSecond progress.'.length}},
+ ],
+ messages: [
+ {{role:'user', content:'go'}},
+ {{role:'assistant', _live:true, content:'First progress.\\n\\nSecond progress.\\n\\nTail progress.'}},
+ ],
+}};
+const projected = _projectInflightMessagesForActivityBursts(inflight);
+assert.strictEqual(projected.length, 4);
+assert.strictEqual(projected[1].content, 'First progress.');
+assert.strictEqual(projected[1]._activityBurstId, 1);
+assert.strictEqual(projected[2].content, 'Second progress.');
+assert.strictEqual(projected[2]._activityBurstId, 2);
+assert.strictEqual(projected[3].content, 'Tail progress.');
+assert.strictEqual(projected[3]._activityBurstId, 2);
+"""
+ result = subprocess.run([NODE, "-e", script], capture_output=True, text=True, check=False)
+ assert result.returncode == 0, result.stderr
+
+
+def test_running_reattach_reprojects_segmented_live_tail_without_duplicate_prefix():
+ """A reconnect write can leave a segmented live tail plus a full accumulator.
+
+ syncInflightAssistantMessage() updates the last `_live` message from the
+ full assistant accumulator. On the next session switch,
+ _projectInflightMessagesForActivityBursts() must replace the whole live
+ tail with one projection from the accumulator; keeping the earlier
+ projected segments and also splitting the full accumulator repeats already
+ visible process text.
+ """
+ assert NODE, "node not on PATH"
+ start = SESSIONS_JS.find("function _messageComparableText")
+ end = SESSIONS_JS.find("// Load older messages", start)
+ assert start != -1 and end != -1
+ helper_src = SESSIONS_JS[start:end]
+ script = f"""
+const assert = require('assert');
+{helper_src}
+
+const fullText = 'First progress.\\n\\nSecond progress.\\n\\nTail progress.';
+const inflight = {{
+ currentActivityBurstId: 2,
+ currentLiveSegmentSeq: 2,
+ activityBurstAnchors: [
+ {{id: 1, textEnd: 'First progress.'.length}},
+ {{id: 2, textEnd: 'First progress.\\n\\nSecond progress.'.length}},
+ ],
+ messages: [
+ {{role:'user', content:'go'}},
+ {{role:'assistant', _live:true, content:'First progress.', _activityBurstId:1, _liveSegmentSeq:1}},
+ {{role:'assistant', _live:true, content:fullText, _activityBurstId:2, _liveSegmentSeq:2}},
+ ],
+}};
+const projected = _projectInflightMessagesForActivityBursts(inflight);
+assert.deepStrictEqual(
+ projected.filter(m => m.role === 'assistant').map(m => m.content),
+ ['First progress.', 'Second progress.', 'Tail progress.']
+);
+assert.deepStrictEqual(
+ projected.filter(m => m.role === 'assistant').map(m => m._liveSegmentSeq),
+ [1, 2, 3]
+);
+"""
+ result = subprocess.run([NODE, "-e", script], capture_output=True, text=True, check=False)
+ assert result.returncode == 0, result.stderr
+
+
+def test_running_reattach_keeps_segmented_tail_when_last_segment_is_not_accumulator():
+ """Normal segmented live tails must not be collapsed from the last segment.
+
+ The duplicate-prefix repair only applies when the last live message already
+ contains earlier live segment text. If the last segment is only its own tail,
+ the prior live segments are still the source of truth and must be preserved.
+ """
+ assert NODE, "node not on PATH"
+ start = SESSIONS_JS.find("function _messageComparableText")
+ end = SESSIONS_JS.find("// Load older messages", start)
+ assert start != -1 and end != -1
+ helper_src = SESSIONS_JS[start:end]
+ script = f"""
+const assert = require('assert');
+{helper_src}
+
+const inflight = {{
+ currentActivityBurstId: 2,
+ currentLiveSegmentSeq: 2,
+ activityBurstAnchors: [
+ {{id: 1, textEnd: 'First progress.'.length}},
+ {{id: 2, textEnd: 'First progress.\\n\\nSecond progress.'.length}},
+ ],
+ messages: [
+ {{role:'user', content:'go'}},
+ {{role:'assistant', _live:true, content:'First progress.', _activityBurstId:1, _liveSegmentSeq:1}},
+ {{role:'assistant', _live:true, content:'Second progress.', _activityBurstId:2, _liveSegmentSeq:2}},
+ ],
+}};
+const projected = _projectInflightMessagesForActivityBursts(inflight);
+assert.deepStrictEqual(
+ projected.filter(m => m.role === 'assistant').map(m => m.content),
+ ['First progress.', 'Second progress.']
+);
+"""
+ result = subprocess.run([NODE, "-e", script], capture_output=True, text=True, check=False)
+ assert result.returncode == 0, result.stderr
+
+
+def test_running_reattach_aliases_empty_activity_bursts_to_previous_text_segment():
+ """Duplicate boundaries with no new text should not leave tool activity
+ attached to a burst id that has no visible assistant segment.
+ """
+ assert NODE, "node not on PATH"
+ start = SESSIONS_JS.find("function _messageComparableText")
+ end = SESSIONS_JS.find("// Load older messages", start)
+ assert start != -1 and end != -1
+ helper_src = SESSIONS_JS[start:end]
+ script = f"""
+const assert = require('assert');
+{helper_src}
+
+const inflight = {{
+ currentActivityBurstId: 2,
+ activityBurstAnchors: [
+ {{id: 1, textEnd: 'First progress.'.length}},
+ {{id: 2, textEnd: 'First progress.'.length}},
+ ],
+ toolCalls: [
+ {{name:'read_file', activityBurstId: 2}},
+ ],
+ messages: [
+ {{role:'user', content:'go'}},
+ {{role:'assistant', _live:true, content:'First progress.'}},
+ ],
+}};
+ const projected = _projectInflightMessagesForActivityBursts(inflight);
+ assert.strictEqual(projected.length, 2);
+ assert.strictEqual(projected[1].content, 'First progress.');
+ assert.strictEqual(projected[1]._activityBurstId, 1);
+ assert.strictEqual(inflight.toolCalls[0].activityBurstId, 1);
+ assert.strictEqual(inflight.toolCalls[0].activitySegmentSeq, 1);
+"""
+ result = subprocess.run([NODE, "-e", script], capture_output=True, text=True, check=False)
+ assert result.returncode == 0, result.stderr
+
+
+def test_running_reattach_backfills_tool_segment_seq_for_burst_anchors():
+ """When reattaching a running stream, persisted tool calls without
+ activitySegmentSeq should be rebound to the projected live segment sequence
+ so tool cards land next to their triggering text, not at the tail.
+ """
+ assert NODE, "node not on PATH"
+ start = SESSIONS_JS.find("function _messageComparableText")
+ end = SESSIONS_JS.find("// Load older messages", start)
+ assert start != -1 and end != -1
+ helper_src = SESSIONS_JS[start:end]
+ script = f"""
+const assert = require('assert');
+{helper_src}
+
+const inflight = {{
+ currentActivityBurstId: 3,
+ activityBurstAnchors: [
+ {{id: 1, textEnd: 'First progress.'.length}},
+ {{id: 2, textEnd: 'First progress.\\n\\nSecond progress.'.length}},
+ ],
+ toolCalls: [
+ {{name:'read_file', activityBurstId: 1, activitySegmentSeq: undefined}},
+ {{name:'search', activityBurstId: 2, activitySegmentSeq: undefined}},
+ ],
+ messages: [
+ {{role:'user', content:'go'}},
+ {{role:'assistant', _live:true, content:'First progress.\\n\\nSecond progress.\\n\\nTail progress.'}},
+ ],
+}};
+const projected = _projectInflightMessagesForActivityBursts(inflight);
+assert.strictEqual(projected.length, 4);
+assert.strictEqual(projected[1]._liveSegmentSeq, 1);
+ assert.strictEqual(projected[2]._liveSegmentSeq, 2);
+ assert.strictEqual(projected[3]._liveSegmentSeq, 3);
+ assert.strictEqual(inflight.toolCalls[0].activitySegmentSeq, 1);
+ assert.strictEqual(inflight.toolCalls[1].activitySegmentSeq, 2);
+ """
+ result = subprocess.run([NODE, "-e", script], capture_output=True, text=True, check=False)
+ assert result.returncode == 0, result.stderr
+
+
+def test_upsert_live_tool_call_preserves_start_seq_for_complete():
+ """tool_complete should inherit the seq captured by the matching tool_start.
+
+ This guarantees a single in-flight tool row per call and keeps Activity
+ placement stable even when complete arrives on a different segment.
+ """
+ assert NODE, "node not on PATH"
+ helper_defs = "\n".join([
+ _function_decl(MESSAGES_JS, "_stableStringify"),
+ _function_decl(MESSAGES_JS, "_hashString"),
+ _function_decl(MESSAGES_JS, "_toolCallSignature"),
+ _function_decl(MESSAGES_JS, "_liveToolTid"),
+ _function_decl(MESSAGES_JS, "_coerceLiveToolCallSignature"),
+ _function_decl(MESSAGES_JS, "_coerceLiveToolCallSeq"),
+ _function_decl(MESSAGES_JS, "_currentLiveToolAnchor"),
+ _function_decl(MESSAGES_JS, "_findPendingLiveToolCallIndex"),
+ _function_decl(MESSAGES_JS, "upsertLiveToolCall"),
+ ])
+ script = (
+ "const assert = require('assert');\n"
+ f"{helper_defs}\n\n"
+ "const uploaded=[];\n"
+ "let activeSid='sid';\n"
+ "const INFLIGHT={};\n"
+ "const S={\"toolCalls\":[],\"messages\":[]};\n"
+ "let assistantRow={getAttribute:()=>\"7\"};\n"
+ "let _assistantSegmentSeq=7;\n"
+ "let _currentLiveSegmentSeq=7;\n"
+ "let _currentActivityBurstId=1;\n"
+ "const assistantBody=null;\n"
+ "global.persistInflightState=()=>{};\n"
+ "global.S=S;\n"
+ "global.INFLIGHT=INFLIGHT;\n"
+ "global.activeSid=activeSid;\n"
+ "global.uploaded=uploaded;\n"
+ "global.assistantRow=assistantRow;\n"
+ "global.assistantBody=assistantBody;\n"
+ "global._assistantSegmentSeq=_assistantSegmentSeq;\n"
+ "global._currentLiveSegmentSeq=_currentLiveSegmentSeq;\n"
+ "global._currentActivityBurstId=_currentActivityBurstId;\n\n"
+ "const start=upsertLiveToolCall({\"name\":\"read_file\",\"args\":{\"path\":\"/tmp/a\"},\"preview\":\"start\"}, 'start');\n"
+ "assert(start);\n"
+ "start.started_at=111;\n"
+ "assert.strictEqual(start.activitySegmentSeq, 7);\n"
+ "assert.strictEqual(start._toolCallStartSeq, 7);\n"
+ "_currentLiveSegmentSeq=11;\n"
+ "_assistantSegmentSeq=11;\n"
+ "const complete=upsertLiveToolCall({\"name\":\"read_file\",\"args\":{\"path\":\"/tmp/a\"},\"duration\":2}, 'complete');\n"
+ "assert(complete);\n"
+ "assert.strictEqual(complete.activitySegmentSeq, 7);\n"
+ "assert.strictEqual(complete._toolCallStartSeq, 7);\n"
+ "assert.strictEqual(complete===start, true);\n"
+ )
+ result = subprocess.run([NODE, '-e', script], capture_output=True, text=True, check=False)
+ assert result.returncode == 0, result.stderr
+
+
+def test_upsert_live_tool_call_complete_matches_by_name_burst_without_tid():
+ """A complete event without tid must still match the in-flight tool by name+burst.
+
+ This is needed when the provider's complete stream payload does not carry a
+ stable tool call id.
+ """
+ assert NODE, "node not on PATH"
+ helper_defs = "\n".join([
+ _function_decl(MESSAGES_JS, "_stableStringify"),
+ _function_decl(MESSAGES_JS, "_hashString"),
+ _function_decl(MESSAGES_JS, "_toolCallSignature"),
+ _function_decl(MESSAGES_JS, "_liveToolTid"),
+ _function_decl(MESSAGES_JS, "_coerceLiveToolCallSignature"),
+ _function_decl(MESSAGES_JS, "_coerceLiveToolCallSeq"),
+ _function_decl(MESSAGES_JS, "_currentLiveToolAnchor"),
+ _function_decl(MESSAGES_JS, "_findPendingLiveToolCallIndex"),
+ _function_decl(MESSAGES_JS, "upsertLiveToolCall"),
+ ])
+ script = (
+ "const assert = require('assert');\n"
+ f"{helper_defs}\n\n"
+ "const uploaded=[];\n"
+ "let activeSid='sid';\n"
+ "const INFLIGHT={\"sid\":{\"toolCalls\":[{\"name\":\"search\",\"activityBurstId\":3,\"activitySegmentSeq\":4,\"_toolCallStartSeq\":4,\"_liveToolCallSignature\":\"search|3|4|{\\\"query\\\":\\\"x\\\"}\",\"done\":false}],\"messages\":[],\"uploaded\":[]}};\n"
+ "const S={\"toolCalls\":[],\"messages\":[]};\n"
+ "let _assistantSegmentSeq=9;\n"
+ "let _currentLiveSegmentSeq=9;\n"
+ "let _currentActivityBurstId=3;\n"
+ "let assistantRow={getAttribute:()=>\"7\"};\n"
+ "let assistantBody=null;\n"
+ "global.persistInflightState=()=>{};\n"
+ "global.S=S;\n"
+ "global.INFLIGHT=INFLIGHT;\n"
+ "global.activeSid=activeSid;\n"
+ "global.uploaded=uploaded;\n"
+ "global.assistantRow=assistantRow;\n"
+ "global.assistantBody=assistantBody;\n"
+ "global._assistantSegmentSeq=_assistantSegmentSeq;\n"
+ "global._currentLiveSegmentSeq=_currentLiveSegmentSeq;\n"
+ "global._currentActivityBurstId=_currentActivityBurstId;\n\n"
+ "const complete=upsertLiveToolCall({\"name\":\"search\",\"args\":{\"query\":\"x\"}}, 'complete');\n"
+ "assert(complete);\n"
+ "assert.strictEqual(complete.activitySegmentSeq, 4);\n"
+ "assert.strictEqual(complete._toolCallStartSeq, 4);\n"
+ "assert.strictEqual(INFLIGHT[activeSid].toolCalls.length, 1);\n"
+ )
+ result = subprocess.run([NODE, '-e', script], capture_output=True, text=True, check=False)
+ assert result.returncode == 0, result.stderr
+
+
+def test_upsert_flags_orphan_complete_but_not_normal_start_complete():
+ """`_createdByComplete` must be set ONLY when a tool_complete creates a fresh
+ record with no matching tool_start (orphan completion). The SSE handler uses
+ this flag to decide whether to force a fresh segment: an orphan completion is
+ a real tail boundary, but a normal in-place start->complete update must leave
+ the active segment untouched (otherwise interleaved completions fragment the
+ streaming text into spurious empty segments)."""
+ assert NODE, "node not on PATH"
+ helper_defs = "\n".join([
+ _function_decl(MESSAGES_JS, "_stableStringify"),
+ _function_decl(MESSAGES_JS, "_hashString"),
+ _function_decl(MESSAGES_JS, "_toolCallSignature"),
+ _function_decl(MESSAGES_JS, "_liveToolTid"),
+ _function_decl(MESSAGES_JS, "_coerceLiveToolCallSignature"),
+ _function_decl(MESSAGES_JS, "_coerceLiveToolCallSeq"),
+ _function_decl(MESSAGES_JS, "_currentLiveToolAnchor"),
+ _function_decl(MESSAGES_JS, "_findPendingLiveToolCallIndex"),
+ _function_decl(MESSAGES_JS, "upsertLiveToolCall"),
+ ])
+ script = (
+ "const assert = require('assert');\n"
+ f"{helper_defs}\n\n"
+ "const uploaded=[];\n"
+ "let activeSid='sid';\n"
+ "const INFLIGHT={};\n"
+ "const S={\"toolCalls\":[],\"messages\":[]};\n"
+ "let assistantRow={getAttribute:()=>\"7\"};\n"
+ "let assistantBody=null;\n"
+ "let _assistantSegmentSeq=7;\n"
+ "let _currentLiveSegmentSeq=7;\n"
+ "let _currentActivityBurstId=1;\n"
+ "global.persistInflightState=()=>{};\n"
+ "global.S=S;\n"
+ "global.INFLIGHT=INFLIGHT;\n"
+ "global.activeSid=activeSid;\n"
+ "global.uploaded=uploaded;\n"
+ "global.assistantRow=assistantRow;\n"
+ "global.assistantBody=assistantBody;\n"
+ "global._assistantSegmentSeq=_assistantSegmentSeq;\n"
+ "global._currentLiveSegmentSeq=_currentLiveSegmentSeq;\n"
+ "global._currentActivityBurstId=_currentActivityBurstId;\n\n"
+ # Case A: normal start -> complete. The start record must NOT be flagged,
+ # and the matching complete must reuse it without setting the flag.
+ "const start=upsertLiveToolCall({\"name\":\"read_file\",\"args\":{\"path\":\"/tmp/a\"},\"tid\":\"T1\"}, 'start');\n"
+ "assert(start);\n"
+ "assert.strictEqual(!!start._createdByComplete, false, 'tool_start must not be flagged');\n"
+ "const completeMatched=upsertLiveToolCall({\"name\":\"read_file\",\"args\":{\"path\":\"/tmp/a\"},\"tid\":\"T1\"}, 'complete');\n"
+ "assert.strictEqual(completeMatched===start, true, 'complete must reuse the start record');\n"
+ "assert.strictEqual(!!completeMatched._createdByComplete, false, 'in-place complete must not be flagged');\n"
+ "assert.strictEqual(INFLIGHT[activeSid].toolCalls.length, 1, 'no duplicate record');\n\n"
+ # Case B: orphan complete (no prior start). The freshly created record
+ # MUST be flagged so the handler forces a fresh segment.
+ "const orphan=upsertLiveToolCall({\"name\":\"write_file\",\"args\":{\"path\":\"/tmp/b\"},\"tid\":\"T2\"}, 'complete');\n"
+ "assert(orphan);\n"
+ "assert.strictEqual(orphan===start, false);\n"
+ "assert.strictEqual(orphan._createdByComplete, true, 'orphan complete must be flagged');\n"
+ "assert.strictEqual(orphan.done, true);\n"
+ "assert.strictEqual(INFLIGHT[activeSid].toolCalls.length, 2);\n"
+ )
+ result = subprocess.run([NODE, '-e', script], capture_output=True, text=True, check=False)
+ assert result.returncode == 0, result.stderr
+
+
+def test_tool_complete_handler_gates_segment_reset_on_orphan_flag():
+ """The tool_complete SSE handler must only force a fresh segment for orphan
+ completions (`_createdByComplete`), updating the card in place otherwise."""
+ handler_start = MESSAGES_JS.find("source.addEventListener('tool_complete'")
+ assert handler_start != -1
+ handler_end = MESSAGES_JS.find("source.addEventListener('approval'", handler_start)
+ assert handler_end != -1
+ handler = MESSAGES_JS[handler_start:handler_end]
+ # The reset trio must live behind the orphan-flag branch.
+ guard_pos = handler.find("if(tc._createdByComplete)")
+ reset_pos = handler.find("_resetAssistantSegment()")
+ assert guard_pos != -1, "tool_complete must branch on tc._createdByComplete"
+ assert reset_pos != -1 and guard_pos < reset_pos, (
+ "segment reset must be gated behind the orphan-completion branch"
+ )
+ # The non-orphan branch must still place the card (in place).
+ assert handler.count("appendLiveToolCard(tc,{sessionId:activeSid,streamId})") >= 2, (
+ "both orphan and in-place branches must append/update the tool card"
+ )
+
+
+def test_project_inflight_with_no_visible_anchor_maps_tools_to_run_anchor_segment():
+ """Without a visible burst anchor, in-flight tools should still map to the first
+ segment instead of falling back to the last segment in render order."""
+ assert NODE, "node not on PATH"
+ start = SESSIONS_JS.find("function _messageComparableText")
+ end = SESSIONS_JS.find("// Load older messages", start)
+ assert start != -1 and end != -1
+ helper_src = SESSIONS_JS[start:end]
+ script = f"""
+const assert = require('assert');
+{helper_src}
+
+const inflight = {{
+ currentActivityBurstId: 2,
+ activityBurstAnchors: [
+ {{ id: 1, textEnd: 0 }},
+ ],
+ toolCalls: [
+ {{name:'read_file', activityBurstId:0}},
+ ],
+ messages: [
+ {{role:'user', content:'go'}},
+ {{role:'assistant', _live:true, _activityBurstId: 2, content:'First progress line'}},
+ ],
+}};
+const projected = _projectInflightMessagesForActivityBursts(inflight);
+ assert.strictEqual(projected.length, 2);
+assert.strictEqual(projected[1].content, 'First progress line');
+assert.strictEqual(projected[1]._liveSegmentSeq, 1);
+assert.strictEqual(inflight.toolCalls[0].activitySegmentSeq, 1);
+"""
+ result = subprocess.run([NODE, "-e", script], capture_output=True, text=True, check=False)
+ assert result.returncode == 0, result.stderr
+
+
+def test_load_session_rebuilds_live_tail_before_snapshot_fallback():
+ body = _function_body(SESSIONS_JS, "loadSession")
+ ensure_pos = body.find("_ensureInflightLiveAssistantMessage(INFLIGHT[sid]);")
+ inflight_pos = body.find("const inflightMessages=_projectInflightMessagesForActivityBursts(INFLIGHT[sid]);")
+ prepare_pos = body.find("const liveTailPrepared=_prepareRunningLiveTail(S.messages,inflightMessages);")
+ drop_assistant_pos = body.find("S.messages=_dropCurrentTurnAssistantMessages(S.messages);")
+ merge_pos = body.find("S.messages=_mergeInflightTailMessages(S.messages,inflightMessages);")
+ restore_pos = body.find("restoreLiveTurnHtmlForSession(sid)")
+ assert ensure_pos != -1 and inflight_pos != -1
+ assert prepare_pos != -1
+ assert drop_assistant_pos != -1 and merge_pos != -1 and restore_pos != -1
+ assert "delete INFLIGHT[sid].liveTurnHtml" not in body
+ assert ensure_pos < inflight_pos < prepare_pos < drop_assistant_pos < merge_pos < restore_pos
+
+
+def test_load_session_prefers_structured_inflight_state_over_live_turn_snapshot():
+ """Structured INFLIGHT state is authoritative during reattach.
+
+ The memory-only liveTurnHtml snapshot can be stale across session switches.
+ If loadSession restores that DOM after renderMessages() rebuilt the
+ per-burst live tail, old snapshots can alternately erase progress text and
+ leave Activity groups piled at the bottom of the turn.
+ """
+ body = _function_body(SESSIONS_JS, "loadSession")
+ structured_pos = body.find("const hasStructuredLiveState=!!(INFLIGHT[sid]&&(")
+ restore_pos = body.find("restoreLiveTurnHtmlForSession(sid)")
+ fallback_pos = body.find("if(!restoredLiveTurn){", restore_pos)
+ assert structured_pos != -1, "loadSession must compute structured live-state presence"
+ assert restore_pos != -1, "loadSession must still retain DOM snapshot fallback"
+ assert fallback_pos != -1
+ assert structured_pos < restore_pos < fallback_pos
+ guard_block = body[structured_pos:fallback_pos]
+ assert "lastAssistantText" in guard_block
+ assert "lastReasoningText" in guard_block
+ assert "activityBurstAnchors" in guard_block
+ assert "toolCalls" in guard_block
+ assert "if(!hasStructuredLiveState)" in guard_block
+ assert "hasCurrentWorklogContent" in guard_block
+ assert "if(hasCurrentWorklogContent) restoredLiveTurn=true;" in guard_block
+ assert "else restoredLiveTurn=restoreLiveTurnHtmlForSession(sid);" in guard_block
+
+
+def test_load_session_restores_worklog_shell_before_reattach_replay():
+ """Reattaching before replay/new SSE should not leave the active stream blank."""
+ body = _function_body(SESSIONS_JS, "loadSession")
+ fallback_pos = body.find("if(!restoredLiveTurn){")
+ assert fallback_pos != -1, "loadSession must have a live-turn fallback branch"
+ fallback_block = body[fallback_pos:body.find("loadDir('.')", fallback_pos)]
+ clear_pos = fallback_block.find("clearLiveToolCards();")
+ shell_pos = fallback_block.find("ensureLiveWorklogShell()")
+ legacy_pos = fallback_block.find("else appendThinking();")
+ replay_pos = fallback_block.find("appendLiveToolCard(tc);")
+ invariant_pos = fallback_block.find("!liveTurn||!liveTurn.querySelector")
+ assert clear_pos != -1, "fallback must clear stale live tool DOM first"
+ assert shell_pos != -1, "fallback must restore a quiet live Worklog shell"
+ assert legacy_pos != -1, "fallback should retain legacy thinking-card behavior"
+ assert replay_pos != -1, "fallback must still replay persisted live tools"
+ assert invariant_pos != -1, "reattach must enforce a Worklog shell even after an empty restored snapshot"
+ assert clear_pos < shell_pos < replay_pos
+ assert replay_pos < invariant_pos
+
+
+def test_merge_inflight_tail_preserves_all_segmented_live_progress():
+ """The reattach merge must keep every projected live progress segment.
+
+ _projectInflightMessagesForActivityBursts() can split one live assistant
+ accumulator into multiple _live messages. If the merge starts at the last
+ _live segment, the earlier process-text anchors disappear and Activity
+ groups whose burst ids point to those anchors pile up at the bottom.
+ """
+ assert NODE, "node not on PATH"
+ fn_start = SESSIONS_JS.index("function _mergeInflightTailMessages")
+ fn_end = SESSIONS_JS.index("// Load older messages", fn_start)
+ merge_fn = SESSIONS_JS[fn_start:fn_end]
+ script = f"""
+const assert = require('assert');
+function _messageComparableText(m) {{ return String((m&&m.content)||'').trim(); }}
+function _sameTranscriptMessage(a,b) {{
+ return !!(a&&b&&a.role===b.role&&_messageComparableText(a)===_messageComparableText(b));
+}}
+{merge_fn}
+const base = [{{role:'user', content:'go'}}];
+const inflight = [
+ {{role:'user', content:'go'}},
+ {{role:'assistant', _live:true, content:'first progress', _activityBurstId:1}},
+ {{role:'assistant', _live:true, content:'second progress', _activityBurstId:2}},
+ {{role:'assistant', _live:true, content:'third progress', _activityBurstId:3}},
+];
+const merged = _mergeInflightTailMessages(base, inflight);
+assert.deepStrictEqual(
+ merged.filter(m => m.role === 'assistant').map(m => m.content),
+ ['first progress', 'second progress', 'third progress']
+);
+"""
+ result = subprocess.run([NODE, "-e", script], capture_output=True, text=True, check=False)
+ assert result.returncode == 0, result.stderr
+
+
+def test_load_session_does_not_advance_replay_cursor_from_session_journal_summary():
+ body = _function_body(SESSIONS_JS, "loadSession")
+ assert "INFLIGHT[sid].lastRunJournalSeq=journalSeq;" not in body
+ assert "const journalSeq=_runJournalSeqFromSession(S.session);" not in body
+ assert "function _runJournalSeqFromSession" not in SESSIONS_JS
+
+
+def test_session_switch_reattach_discards_tail_cache_for_full_journal_replay():
+ close_body = _function_body(MESSAGES_JS, "closeLiveStream")
+ load_body = _function_body(SESSIONS_JS, "loadSession")
+ compact_body = _function_body(UI_JS, "_compactInflightState")
+
+ assert "INFLIGHT[sessionId].journalReplayFromStart=true" in close_body
+ assert "journalReplayFromStart:true" in close_body
+ assert "journalReplayFromStart:!!state.journalReplayFromStart" in compact_body
+ assert "journalReplayFromStart:!!stored.journalReplayFromStart" in load_body
+ assert "delete INFLIGHT[sid]" in load_body
+ assert "clearInflightState(sid)" in load_body
+
+
+def test_load_session_discards_cursor_only_inflight_before_reattach():
+ """A cursor-only INFLIGHT cache must not skip historical journal replay.
+
+ Real active sessions can have an empty sidecar transcript while the durable
+ run journal has the full prose/tool timeline. If the browser kept only a
+ lastRunJournalSeq cursor but lost visible INFLIGHT content, reattaching from
+ that cursor makes the session look blank after switching away and back.
+ """
+ load_body = _function_body(SESSIONS_JS, "loadSession")
+ helper_start = SESSIONS_JS.index("function _inflightHasVisibleLiveState")
+ helper_body = SESSIONS_JS[
+ helper_start : SESSIONS_JS.index("function _rememberRenderedSessionSnapshot", helper_start)
+ ]
+
+ assert "function _inflightHasVisibleLiveState" in SESSIONS_JS
+ assert "lastAssistantText" in helper_body
+ assert "lastReasoningText" in helper_body
+ assert "liveTurnHtml" in helper_body
+ assert "toolCalls" in helper_body
+ assert "activityBurstAnchors" in helper_body
+ assert "msg.role !== 'assistant'" in helper_body
+
+ compact_load = re.sub(r"\s+", "", load_body)
+ guard = "if(activeStreamId&&INFLIGHT[sid]&&!_inflightHasVisibleLiveState(INFLIGHT[sid]))"
+ assert guard in compact_load
+ guard_pos = compact_load.find(guard)
+ inflight_branch_pos = compact_load.find("if(INFLIGHT[sid]){")
+ assert 0 <= guard_pos < inflight_branch_pos
+
+
+def test_reconnect_prefers_trimmed_live_message_over_stale_full_assistant_cache():
+ body = _function_body(MESSAGES_JS, "attachLiveStream")
+ live_msg_pos = body.find("const _liveInflightAssistant")
+ last_text_pos = body.find("const _lastLiveAssistant")
+ assert live_msg_pos != -1 and last_text_pos != -1
+ assert live_msg_pos < last_text_pos
+ assistant_block = body[last_text_pos:body.find("const _lastLiveReasoning", last_text_pos)]
+ assert "_liveInflightAssistant.content" in assistant_block
+ assert "_fullInflightAssistant" in assistant_block
+ assert "lastAssistantText" in body[live_msg_pos:last_text_pos]
+
+
+def test_reconnect_uses_full_accumulator_when_live_tail_is_segmented():
+ """When reattach projection splits the live assistant into multiple
+ visible process-text segments, reconnect must resume from the full
+ accumulator instead of the last segment.
+
+ Otherwise the next syncInflightAssistantMessage() write truncates
+ lastAssistantText to only the latest visible segment, so earlier process
+ text anchors disappear on the next session switch and Activity groups fall
+ back to the end of the turn.
+ """
+ body = _function_body(MESSAGES_JS, "attachLiveStream")
+ helper_pos = body.find("const _liveInflightAssistantMessages")
+ last_text_pos = body.find("const _lastLiveAssistant")
+ assert helper_pos != -1, (
+ "attachLiveStream() should collect all live assistant segments before "
+ "choosing reconnect text"
+ )
+ assert helper_pos < last_text_pos
+ assistant_block = body[last_text_pos:body.find("const _lastLiveReasoning", last_text_pos)]
+ assert "_liveInflightAssistantMessages.length>1" in assistant_block.replace(" ", "")
+ assert "_fullInflightAssistant" in assistant_block
+ assert "lastAssistantText" in body[helper_pos:last_text_pos]
+
+
+def test_reconnect_seeds_segment_start_from_last_burst_anchor():
+ """On reattach, segmentStart must align with the last burst anchor's textEnd.
+
+ Without this, _doRender at segmentStart===0 uses the full visible text as
+ displayText, so the smd parser (after _smdReconnect clears assistantBody)
+ rewrites the entire accumulated text into the first live assistant segment.
+ The per-burst segments rendered by _projectInflightMessagesForActivityBursts
+ are left stale, Activity groups end up visually marooned among duplicate
+ text, and the user sees Activity cards pile up at the tail of the turn.
+ """
+ body = _function_body(MESSAGES_JS, "attachLiveStream")
+ seg_start_pos = body.find("let segmentStart=(()=>{")
+ assert seg_start_pos != -1, (
+ "segmentStart must be initialized via a reconnect-aware IIFE that reads "
+ "INFLIGHT.activityBurstAnchors so the smd parser rewrites only the "
+ "tail-burst segment, not the full text."
+ )
+ seg_end_pos = body.find("})();", seg_start_pos)
+ assert seg_end_pos != -1, "segmentStart IIFE must close with })();"
+ seg_block = body[seg_start_pos:seg_end_pos]
+ assert "activityBurstAnchors" in seg_block
+ assert "reconnecting" in seg_block, "segmentStart should only shift when reconnecting"
+ assert "textEnd" in seg_block
+
+
+def test_ensure_assistant_row_reattaches_to_last_live_segment():
+ """ensureAssistantRow must pick the LAST live segment, not the first.
+
+ After session-switch reattach, the projected DOM holds one
+ [data-live-assistant="1"] per recorded burst anchor plus a tail. New
+ tokens belong to the tail segment. querySelector returns the first
+ match, which would funnel all post-reattach tokens into segment 1,
+ leaving the per-burst segments stale and Activity anchors visually
+ detached.
+ """
+ body = _function_body(MESSAGES_JS, "ensureAssistantRow")
+ assert "querySelectorAll('[data-live-assistant=\"1\"]')" in body, (
+ "must enumerate every live segment so the tail can be selected"
+ )
+ # Sanity: still has the fresh-segment guard so post-tool turns don't
+ # reuse the previous text segment that sits above the new tool card.
+ assert "if(!_freshSegment)" in body
+ # The selected segment must be the last entry, not the first.
+ assert "liveSegments[liveSegments.length-1]" in body
+
+
+def test_reconnect_without_tail_forces_fresh_segment_after_activity():
+ """If reconnect resumes at the last recorded boundary, no tail segment exists.
+
+ The next token should create a new segment after the previous Activity group
+ instead of reusing the last burst's text segment above that Activity.
+ """
+ body = _function_body(MESSAGES_JS, "attachLiveStream")
+ fresh_pos = body.find("let _freshSegment=")
+ seg_pos = body.find("let segmentStart=(()=>{")
+ assert seg_pos != -1 and fresh_pos != -1
+ assert seg_pos < fresh_pos
+ fresh_line = body[fresh_pos:body.find(";", fresh_pos)]
+ assert "reconnecting" in fresh_line
+ assert "segmentStart>0" in fresh_line
+ assert "segmentStart>=String(assistantText||'').length" in fresh_line
diff --git a/tests/test_issue1240_generic_cli_catalog_sync.py b/tests/test_issue1240_generic_cli_catalog_sync.py
index 0f59de5229..e98b4174ed 100644
--- a/tests/test_issue1240_generic_cli_catalog_sync.py
+++ b/tests/test_issue1240_generic_cli_catalog_sync.py
@@ -11,6 +11,7 @@
import types
import api.config as config
+import api.profiles as profiles
_PROVIDER_ENV_VARS = (
@@ -80,6 +81,9 @@ def get_auth_status(pid):
def _configure(monkeypatch, tmp_path, *, provider: str, default: str = ""):
+ hermes_home = tmp_path / "hermes-home"
+ hermes_home.mkdir()
+ monkeypatch.setattr(profiles, "get_active_hermes_home", lambda: hermes_home)
monkeypatch.setattr(config, "_get_config_path", lambda: tmp_path / "missing-config.yaml")
monkeypatch.setattr(config, "_models_cache_path", tmp_path / "models_cache.json")
monkeypatch.setattr(
diff --git a/tests/test_issue1298_cancel_and_activity.py b/tests/test_issue1298_cancel_and_activity.py
index 90c00a493b..69569bb221 100644
--- a/tests/test_issue1298_cancel_and_activity.py
+++ b/tests/test_issue1298_cancel_and_activity.py
@@ -317,8 +317,7 @@ def test_ensure_activity_group_restores_expand_intent(self):
)
def test_finalize_thinking_card_respects_user_expand(self):
- """finalizeThinkingCard() must NOT force-collapse the live activity
- group when the user has explicitly expanded it (#1298)."""
+ """finalizeThinkingCard() must not force-collapse the live Worklog."""
src = (REPO_ROOT / "static" / "ui.js").read_text()
m = re.search(
r"function finalizeThinkingCard\(\)\{(.*?)\n\}",
@@ -326,16 +325,11 @@ def test_finalize_thinking_card_respects_user_expand(self):
)
assert m, "finalizeThinkingCard() must exist in ui.js"
body = m.group(1)
- assert "_liveActivityUserExpanded" in body, (
- "finalizeThinkingCard() must respect the user's expand intent — "
- "without this guard, the panel snaps shut on every tool boundary"
- )
- # Hard fail if force-collapse is unconditional
- assert "_liveActivityUserExpanded !== true" in body or \
- "_liveActivityUserExpanded!==true" in body.replace(" ", ""), (
- "finalizeThinkingCard() must skip the force-collapse path when "
- "_liveActivityUserExpanded === true"
+ assert "tool-call-group-collapsed" not in body, (
+ "Live Worklog must remain expanded until the settled render replaces "
+ "it with the final collapsed L1 Activity summary."
)
+ assert "aria-expanded','false'" not in body
def test_inline_onclick_records_user_intent(self):
"""The summary button's click path must call _onLiveActivityToggle
diff --git a/tests/test_issue1527_lmstudio_base_url_classification.py b/tests/test_issue1527_lmstudio_base_url_classification.py
index b42f704604..a6c796ee1c 100644
--- a/tests/test_issue1527_lmstudio_base_url_classification.py
+++ b/tests/test_issue1527_lmstudio_base_url_classification.py
@@ -137,7 +137,8 @@ def test_lmstudio_configured_base_url_keeps_discovered_models(
assert "custom" not in groups
assert "lmstudio" in groups
model_ids = {model["id"] for model in groups["lmstudio"]["models"]}
- assert {"qwen3.6-35b-a3b@q6_k", "second-lmstudio-model"} <= model_ids
+ bare_model_ids = {mid.removeprefix("@lmstudio:") for mid in model_ids}
+ assert {"qwen3.6-35b-a3b@q6_k", "second-lmstudio-model"} <= bare_model_ids
def test_custom_configured_base_url_is_not_reclassified_as_ollama(tmp_path, monkeypatch):
diff --git a/tests/test_issue1690_scroll_completion.py b/tests/test_issue1690_scroll_completion.py
index afd601a1ac..f96b410d9e 100644
--- a/tests/test_issue1690_scroll_completion.py
+++ b/tests/test_issue1690_scroll_completion.py
@@ -51,12 +51,16 @@ def test_terminal_done_render_preserves_manual_scroll_after_active_stream_is_cle
def test_render_messages_preserve_scroll_option_uses_user_pin_state_not_stream_liveness():
render_body = _function_body(UI_JS, "renderMessages")
scroll_helper = _function_body(UI_JS, "_scrollAfterMessageRender")
+ follow_helper = _function_body(UI_JS, "_followMessagesAfterDomReplace")
assert "function renderMessages(options)" in render_body
assert "const preserveScroll=!!(options&&options.preserveScroll);" in render_body
assert "_scrollAfterMessageRender(preserveScroll, scrollSnapshot);" in render_body
assert "const scrollSnapshot=preserveScroll?_captureMessageScrollSnapshot():null" in render_body
- assert "if(preserveScroll){\n if(_scrollPinned) scrollIfPinned();\n else _restoreMessageScrollSnapshot(scrollSnapshot);\n return;\n }" in scroll_helper
+ assert "if(_followMessagesAfterDomReplace()) return;" in scroll_helper
+ assert "_restoreMessageScrollSnapshot(scrollSnapshot);" in scroll_helper
+ assert "_shouldFollowMessagesOnDomReplace()" in follow_helper
+ assert "scrollToBottom();" in follow_helper
assert "if(S.activeStreamId){\n scrollIfPinned();\n return;\n }" in scroll_helper
diff --git a/tests/test_issue2454_active_session_spinner.py b/tests/test_issue2454_active_session_spinner.py
index dd6bb35785..b2c9ffa6c0 100644
--- a/tests/test_issue2454_active_session_spinner.py
+++ b/tests/test_issue2454_active_session_spinner.py
@@ -50,6 +50,22 @@ def test_active_session_idle_reconcile_clears_stale_busy_and_inflight_state():
assert "_sessionStreamingById.set(sid, false)" in body, "observed active streaming state must be reset"
assert "_forgetObservedStreamingSession(sid)" in body, "persisted observed streaming marker must be cleared"
assert "updateSendBtn()" in body, "composer controls must reflect the idle state after cleanup"
+ assert "hideApprovalCard(true)" in body, "stale approval UI must be cleared when server says the run is idle"
+ assert "hideLiveRunStatus(sid)" in body, "stale live footer must be cleared when server says the run is idle"
+ assert "clearLiveToolCards()" in body, "stale live-only tool cards must not survive idle reconciliation"
+ assert "_scheduleActiveSessionIdleReload(sid)" in body, (
+ "idle reconciliation must reload the current transcript from server truth "
+ "so missed stream_end events do not leave the active pane stale"
+ )
+
+
+def test_active_session_idle_reconcile_schedules_forced_transcript_reload():
+ helper_body = _function_body(SESSIONS_SRC, "function _scheduleActiveSessionIdleReload(")
+
+ assert "setTimeout(async () =>" in helper_body
+ assert "S.session.session_id !== sid" in helper_body
+ assert "S.busy || S.activeStreamId" in helper_body
+ assert "loadSession(sid, {force:true, externalRefreshReason:'idle-reconcile'})" in helper_body
def test_session_list_payload_reconciles_active_idle_state_before_optimistic_merge_and_render():
diff --git a/tests/test_issue2565_reasoning_accumulation.py b/tests/test_issue2565_reasoning_accumulation.py
index c3475b681b..168bafcbc4 100644
--- a/tests/test_issue2565_reasoning_accumulation.py
+++ b/tests/test_issue2565_reasoning_accumulation.py
@@ -1,15 +1,17 @@
"""Regression tests for issue #2565: reasoning display bugs.
-Issue 1: reasoningText accumulates across turns within a single SSE stream.
- - reasoningText must be reset at each turn boundary (tool and interim_assistant
- events) so the done event only persists the current turn's reasoning.
-
-Issue 2: ui.js display prefers m.reasoning over m.reasoning_content.
- - The rendering path must prefer m.reasoning_content (the clean per-turn value
- from the backend) over m.reasoning (which can be corrupted by Issue 1).
-
-Both fixes are needed: Issue 2 alone cannot cover providers that stream reasoning
-events without populating reasoning_content on the final API message.
+Issue 1: liveReasoningText is segment-local, while reasoningText is durable for
+the whole assistant turn.
+ - liveReasoningText must reset at tool and interim_assistant boundaries so
+ later reasoning renders in a fresh Thinking Card.
+ - reasoningText must not be reset at those boundaries; it is the fallback
+ durable payload for providers that stream reasoning without final metadata.
+
+Issue 2: provider reasoning metadata should become a Worklog Thinking Card, not
+visible Worklog process prose or final-answer text.
+
+Both fixes are needed: Issue 1 keeps live cards scoped to a segment without data
+loss, while Issue 2 preserves reasoning as low-priority Worklog detail.
"""
import pathlib
@@ -22,13 +24,12 @@ def read(rel):
return (REPO / rel).read_text(encoding='utf-8')
-# ── Issue 1: reasoningText reset at turn boundaries ──────────────────────────
+# ── Issue 1: live reasoning segment reset at turn boundaries ─────────────────
-class TestReasoningTextResetOnTool:
- """reasoningText must be reset alongside liveReasoningText in the tool
- listener so multi-tool-turn sessions don't accumulate reasoning across
- turns."""
+class TestLiveReasoningTextResetOnTool:
+ """liveReasoningText must reset in the tool listener so later provider
+ reasoning renders in a fresh Worklog Thinking Card."""
def _tool_listener_body(self):
"""Extract the full tool listener body between the tool and
@@ -42,12 +43,11 @@ def _tool_listener_body(self):
assert tool_complete_start >= 0, "tool_complete listener not found"
return src[tool_start:tool_complete_start]
- def test_reasoning_text_reset_in_tool_listener(self):
+ def test_durable_reasoning_text_not_reset_in_tool_listener(self):
body = self._tool_listener_body()
- assert "reasoningText=''" in body, (
- "reasoningText must be reset to '' inside the tool listener "
- "(Issue 1: accumulated reasoning from prior turns was assigned "
- "to the last assistant message on the done event)"
+ assert "reasoningText=''" not in body and 'reasoningText = ""' not in body, (
+ "reasoningText must stay durable across tool boundaries so streamed "
+ "provider reasoning is not silently dropped"
)
def test_live_reasoning_text_also_reset_in_tool_listener(self):
@@ -57,13 +57,11 @@ def test_live_reasoning_text_also_reset_in_tool_listener(self):
)
-class TestReasoningTextResetOnInterimAssistant:
- """reasoningText must be reset at the interim_assistant boundary — the
- other turn boundary where the previous turn's reasoning closes out.
- Without this, providers that emit reasoning before an interim_assistant
- event will still co-mingle reasoning across turns."""
+class TestLiveReasoningTextResetOnInterimAssistant:
+ """liveReasoningText must reset at the interim_assistant boundary — the
+ other segment boundary where the previous Thinking Card closes out."""
- def test_reasoning_text_reset_in_interim_assistant_listener(self):
+ def test_durable_reasoning_text_not_reset_in_interim_assistant_listener(self):
src = read('static/messages.js')
m = re.search(
r"source\.addEventListener\('interim_assistant'\s*,\s*(?:e|ev)\s*=>\s*\{(.*?)\n\s*\}\);",
@@ -71,9 +69,9 @@ def test_reasoning_text_reset_in_interim_assistant_listener(self):
)
assert m, "interim_assistant listener not found in messages.js"
body = m.group(1)
- assert "reasoningText=''" in body, (
- "reasoningText must be reset to '' inside the interim_assistant "
- "listener (Issue 1: turn boundary where prior reasoning closes)"
+ assert "reasoningText=''" not in body and 'reasoningText = ""' not in body, (
+ "reasoningText must stay durable across interim assistant boundaries "
+ "so streamed provider reasoning is not silently dropped"
)
def test_live_reasoning_text_reset_in_interim_assistant_listener(self):
@@ -89,44 +87,43 @@ def test_live_reasoning_text_reset_in_interim_assistant_listener(self):
)
-# ── Issue 2: reasoning_content preference on read ────────────────────────────
+# ── Issue 2: reasoning metadata renders as Worklog Thinking Card ─────────────
class TestReasoningContentPreference:
- """The rendering path in ui.js must prefer m.reasoning_content (the clean
- per-turn value from the backend) over m.reasoning (which can be corrupted
- by Issue 1's accumulation bug)."""
+ """Provider reasoning metadata is retained and rendered as Thinking Card
+ detail, but must not become process prose or final-answer text."""
- def test_reasoning_content_checked_before_reasoning(self):
+ def test_reasoning_payload_still_in_message_signature(self):
src = read('static/ui.js')
- assert 'm.reasoning_content' in src, (
- "ui.js must reference m.reasoning_content so the clean per-turn "
- "value from the backend is used for thinking card display"
+ sig_fn = src.split("function _messageHasReasoningPayload(m)", 1)[1].split("function", 1)[0]
+ assert 'm.reasoning' in sig_fn, (
+ "ui.js should still treat persisted reasoning as message metadata "
+ "for cache/signature invalidation"
)
- def test_reasoning_content_preferred_in_thinking_text_fallback(self):
+ def test_reasoning_metadata_not_used_as_inline_content_extraction(self):
src = read('static/ui.js')
- lines = src.splitlines()
- for line in lines:
- if 'thinkingText' in line and 'm.reasoning' in line:
- if 'm.reasoning_content' not in line and 'reasoning_content' not in line:
- if 'Array.isArray' not in line:
- raise AssertionError(
- f"Line references m.reasoning without checking "
- f"m.reasoning_content first: {line.strip()}"
- )
-
- def test_reasoning_content_has_priority_over_reasoning(self):
- """The fallback expression must evaluate reasoning_content first."""
+ extraction = src.split("let thinkingText='';", 1)[1].split("const isUser=m.role==='user';", 1)[0]
+ assert 'm.reasoning_content' not in extraction
+ assert 'm.reasoning' not in extraction
+
+ def test_reasoning_payload_feeds_worklog_thinking_card_helper(self):
+ src = read('static/ui.js')
+ helper = src.split("function _worklogReasoningTextFromMessage", 1)[1].split("function _thinkingCardHtml", 1)[0]
+ assert "_assistantReasoningPayloadText(m)" in helper
+ assert "_stripVisibleAssistantEchoFromThinking" in helper
+
+ def test_no_direct_reasoning_content_to_inline_thinking_assignment(self):
+ """Provider reasoning should not be promoted into inline assistant prose."""
src = read('static/ui.js')
m = re.search(
r"thinkingText\s*=\s*(m\.reasoning_content\s*\|\|\s*m\.reasoning)",
src,
)
- assert m, (
- "thinkingText assignment must use m.reasoning_content || m.reasoning "
- "so the clean backend value takes priority over the potentially "
- "corrupted frontend-accumulated value"
+ assert not m, (
+ "thinkingText must not be assigned from reasoning_content/reasoning; "
+ "those fields are Worklog Thinking Card detail, not final-answer text"
)
diff --git a/tests/test_issue2713_streaming_segment_flush.py b/tests/test_issue2713_streaming_segment_flush.py
index e93643e120..47f0b17f76 100644
--- a/tests/test_issue2713_streaming_segment_flush.py
+++ b/tests/test_issue2713_streaming_segment_flush.py
@@ -177,6 +177,37 @@ def test_interim_handler_flush_before_last_reset(self):
"_resetAssistantSegment in the interim_assistant handler"
)
+ def test_already_streamed_interim_handler_flushes_before_reset(self):
+ """already_streamed interim events are still visible-progress boundaries.
+
+ The visible text already arrived through token events, so the client
+ must not append it again. It must still flush any pending token render
+ before resetting the segment; otherwise a fast tool boundary can orphan
+ the text until a later render or session switch.
+ """
+ src = read("static/messages.js")
+ fn = _extract_handler(src, "interim_assistant")
+ branch_start = fn.index("if(alreadyStreamed)")
+ branch = fn[branch_start : fn.index("assistantText +=", branch_start)]
+ assert "ensureAssistantRow(true)" in branch, (
+ "already_streamed interim boundaries must materialize the current "
+ "token segment before reset"
+ )
+ assert "_flushPendingSegmentRender({force:true})" in branch, (
+ "already_streamed interim boundaries must flush pending token DOM "
+ "before reset"
+ )
+ inactive_guard = "if(!S.session||S.session.session_id!==activeSid){"
+ if inactive_guard in branch:
+ active_branch_start = branch.index(inactive_guard) + branch[branch.index(inactive_guard):].index("}") + 1
+ else:
+ active_branch_start = 0
+ flush_pos = branch.index("_flushPendingSegmentRender({force:true})", active_branch_start)
+ reset_pos = branch.index("_resetAssistantSegment()", active_branch_start)
+ assert flush_pos < reset_pos, (
+ "already_streamed interim flush must happen before segment reset"
+ )
+
def test_interim_handler_creates_visible_segment_before_forced_flush(self):
src = read("static/messages.js")
fn = _extract_handler(src, "interim_assistant")
diff --git a/tests/test_issue3592_thinking_settlement.py b/tests/test_issue3592_thinking_settlement.py
index 673ea04438..628c7c9f0a 100644
--- a/tests/test_issue3592_thinking_settlement.py
+++ b/tests/test_issue3592_thinking_settlement.py
@@ -1,10 +1,8 @@
-"""#3592 -- Thinking-only messages must render inline, not hidden in a collapsed activity group.
+"""#3592 / #3401 -- Thinking-only messages settle into folded Worklog detail.
-Under Simplified Tool Calling mode, the settlement loop wraps ALL post-settlement
-assistant content via ensureActivityGroup({collapsed:true}). When an assistant
-message has thinking but no tool calls, the thinking trace vanished behind a
-collapsed dropdown. Fix: early-continue guard so thinking-only messages render
-inline via _thinkingCardHtml instead of being wrapped.
+Under the #3401 Worklog model, provider reasoning is not inline assistant prose
+and not a Tool Card. It is preserved as an independent Thinking Card inside the
+folded Worklog above the final answer.
"""
from __future__ import annotations
@@ -15,86 +13,60 @@
def test_thinking_card_html_function_exists():
- """_thinkingCardHtml must be defined so the inline path can call it."""
+ """_thinkingCardHtml must be defined so Worklog Thinking Cards can render."""
assert "function _thinkingCardHtml(" in UI_JS, (
"_thinkingCardHtml function must exist in ui.js"
)
-def test_settlement_loop_has_empty_cards_guard():
- """The simplified-tool-calling settlement loop must check cards.length before
- calling ensureActivityGroup, so thinking-only messages skip the collapsed group."""
- assert "!cards.length&&assistantThinking.has(aIdx)" in UI_JS, (
- "Settlement loop must guard on empty cards + thinking presence before "
- "wrapping in a collapsed activity group"
+def test_settlement_loop_does_not_inline_thinking_only_messages():
+ """Thinking-only messages should flow through the Worklog group path."""
+ assert "!cards.length&&assistantThinking.has(aIdx)" not in UI_JS, (
+ "Thinking-only messages must not use the old inline early-continue path"
+ )
+ assert "_thinkingActivityNode(thinkingText, false)" in UI_JS, (
+ "settled reasoning should render as a collapsed Worklog Thinking Card"
)
-def test_early_continue_present_in_settlement_loop():
- """The guard path must contain a continue statement so the activity group
- path is skipped for thinking-only messages."""
- guard_pattern = re.compile(
- r"!cards\.length&&assistantThinking\.has\(aIdx\).*?continue",
- re.DOTALL,
- )
- assert guard_pattern.search(UI_JS), (
- "The early-continue guard for thinking-only messages must be present "
- "in the settlement loop"
- )
+def test_worklog_thinking_card_is_not_a_tool_card():
+ """Thinking Cards should be sibling Worklog items, not Tool Card rows."""
+ thinking_fn = UI_JS.split("function _thinkingActivityNode", 1)[1].split("function", 1)[0]
+ assert "data-worklog-thinking-card" in thinking_fn
+ assert "tool-card-row" not in thinking_fn
+ assert "buildToolCard" not in thinking_fn
-def test_alternative_path_calls_thinking_card_html_inline():
- """The guard branch must call _thinkingCardHtml directly so thinking renders
- inline rather than inside a collapsed activity group."""
- guard_block = re.search(
- r"!cards\.length&&assistantThinking\.has\(aIdx\)(.*?)continue",
- UI_JS,
- re.DOTALL,
- )
- assert guard_block, "Guard block not found"
- block_text = guard_block.group(1)
- assert "_thinkingCardHtml(" in block_text, (
- "The early-continue branch must call _thinkingCardHtml to render "
- "thinking inline"
- )
+def test_final_answer_reasoning_worklog_is_placed_before_anchor():
+ """If the visible final answer carries reasoning metadata, its folded
+ Worklog must be inserted before that final-answer segment.
+ """
+ assert "beforeAnchor:!!thinkingText&&!anchorIsWorklogSource" in UI_JS
+ ensure_fn = UI_JS.split("function ensureActivityGroup", 1)[1].split("function normalizeLiveActivityGroupPlacement", 1)[0]
+ assert "if(opts.beforeAnchor) inner.insertBefore(group, anchor);" in ensure_fn
+ assert "opts.syncAnchorReason!==false" in ensure_fn
def test_show_thinking_preference_respected():
- """The inline thinking path must check _showThinking so the preference is
- honoured the same way as the non-simplified path."""
- guard_block = re.search(
- r"!cards\.length&&assistantThinking\.has\(aIdx\)(.*?)continue",
- UI_JS,
- re.DOTALL,
- )
- assert guard_block, "Guard block not found"
- block_text = guard_block.group(1)
- assert "_showThinking" in block_text, (
- "The early-continue branch must respect window._showThinking"
- )
+ """The simplified render path must respect _showThinking for visible cards."""
+ render_match = re.search(r"if\(thinkingText&&window\._showThinking!==false\)\{(.*?)\n\s*\}", UI_JS, re.DOTALL)
+ assert render_match, "thinking render branch not found"
+ assert "assistantThinking.set(rawIdx, thinkingText)" in render_match.group(1)
-def test_messages_with_tool_calls_still_use_activity_group():
- """Messages that have tool calls must still flow through ensureActivityGroup
- so the existing collapsed-group behaviour is preserved."""
+def test_messages_with_tool_calls_still_use_worklog_group():
+ """Messages that have tool calls must still flow through the Worklog group."""
assert "ensureActivityGroup(" in UI_JS, (
"ensureActivityGroup must still be called for messages with tool calls"
)
-def test_thinking_only_turns_keep_footer_duration():
- """#3592 review regression: a thinking-only turn now renders inline with NO
- activity group, so the footer "Done in …" duration must NOT be suppressed for
- it — suppression belongs only to turns that actually build an activity group
- (tool-call turns). The old condition suppressed on assistantThinking.has(mi)
- too, which silently dropped the duration for thinking-only turns once the
- inline-render `continue` skipped group creation."""
- m = re.search(r"const compactActivityForMessage=isSimplifiedToolCalling\(\)&&([^;]+);", UI_JS)
- assert m, "compactActivityForMessage suppression condition not found"
+def test_thinking_only_turns_use_worklog_duration():
+ """Thinking-only turns now create a folded Worklog group, so that group owns
+ the "Done in ..." duration instead of the final answer footer.
+ """
+ m = re.search(r"const compactWorklogForMessage=isSimplifiedToolCalling\(\)&&([^;]+);", UI_JS)
+ assert m, "compactWorklogForMessage suppression condition not found"
cond = m.group(1)
- assert "toolCallAssistantIdxs.has(mi)" in cond, (
- "duration suppression must key on toolCallAssistantIdxs (group actually created)"
- )
- assert "assistantThinking.has(mi)" not in cond, (
- "thinking-only turns must NOT suppress the footer duration (no group carries it)"
- )
+ assert "toolCallAssistantIdxs.has(mi)" in cond
+ assert "assistantThinking.has(mi)" in cond
diff --git a/tests/test_issue3709_thinking_double_render.py b/tests/test_issue3709_thinking_double_render.py
index d65045c0a0..1b6244958d 100644
--- a/tests/test_issue3709_thinking_double_render.py
+++ b/tests/test_issue3709_thinking_double_render.py
@@ -1,34 +1,16 @@
-"""#3709 -- Thinking card must not render twice (inside Activity AND below the answer).
-
-Regression coverage for the double-render introduced by #3592's inline branch
-(v0.51.258). In a turn that has BOTH a tool-bearing message and a trailing
-thinking-only message, two code paths emitted a thinking card from the same
-``assistantThinking`` map:
-
- 1. the Activity-group path (tool-bearing message) put the thinking at the top
- of the collapsed Activity group, and
- 2. the inline path (thinking-only message) appended a SECOND card via
- ``insertAdjacentHTML('beforeend')`` -- which, because the segment already
- carried the answer body + ``msg-foot`` footer, stranded the card *below*
- the "Done in ..." line.
-
-The fix keeps the #3592 inline behaviour for genuinely thinking-only turns (so
-their thinking is not buried in a collapsed group) but:
-
- A1. only renders inline when the turn has NO Activity group at all
- (``turnsWithActivityGroup`` gate), so a tool-bearing turn's thinking-only
- sibling does not emit a duplicate card;
- A2. inserts the inline card BEFORE the answer body / footer
- (``insertAdjacentHTML('beforebegin')`` on ``.msg-body,.msg-foot``) so it
- reads above the answer instead of orphaned below "Done in ...";
- B. strips the thinking against the TURN's combined visible answer
- (``_turnVisibleTextByRawIdx``) so a trailing thinking-only message whose
- answer prose lives on a sibling message still gets its answer-echo removed.
-
-These are static source-structure assertions (the render path is DOM-driven and
-exercised live); they lock the invariants so the double-render cannot silently
-return, and so a future blunt "just delete the inline branch" change (which would
-re-break #3592) fails fast here instead.
+"""#3709 -- #3401 Worklog Thinking must not render exact duplicates.
+
+Master fixed a double-render bug in the older Activity rendering path: a turn
+with tools plus a sibling thinking-only message could show the same Thinking
+card twice. #3401 replaces that old structure with a folded Worklog made of
+sibling items: process prose, Thinking Card, and Tool Card/Group.
+
+These static assertions keep the #3709 invariant in the #3401 model:
+
+* settled Thinking is rendered through the Worklog item path, not the old inline
+ sibling path below the answer;
+* exact duplicate Thinking cards are keyed by normalized content and suppressed;
+* different sibling reasoning can still become distinct Worklog items.
"""
from __future__ import annotations
@@ -39,127 +21,123 @@
def _render_messages_body() -> str:
- """Return the body of renderMessages() (best-effort slice) for scoped asserts."""
start = UI_JS.find("function renderMessages(")
assert start != -1, "renderMessages() not found"
- # Slice a generous window; the activityIdxs loop + footer logic live within.
- return UI_JS[start:start + 60000]
-
-
-def test_inline_thinking_branch_still_exists_for_thinking_only_turns():
- """#3592 must NOT be reverted: a thinking-only turn still renders its thinking
- inline (not buried in a collapsed Activity group)."""
- assert "!cards.length&&assistantThinking.has(aIdx)" in UI_JS, (
- "the thinking-only inline branch (#3592) must remain — deleting it "
- "re-buries thinking-only turns in a collapsed Activity group"
- )
- assert "_thinkingCardHtml(" in UI_JS
-
-
-def test_inline_branch_gated_on_turn_having_no_activity_group():
- """A1: the inline card must only render when the turn has no Activity group,
- so a tool-bearing turn's thinking-only sibling does not duplicate the card."""
+ return UI_JS[start:start + 80000]
+
+
+def _function_body(name: str) -> str:
+ match = re.search(rf"function\s+{re.escape(name)}\s*\(", UI_JS)
+ assert match, f"{name}() not found"
+ brace = UI_JS.find("{", match.end())
+ assert brace != -1, f"{name}() has no body"
+ depth = 1
+ i = brace + 1
+ in_string = None
+ escaped = False
+ in_line_comment = False
+ in_block_comment = False
+ while i < len(UI_JS) and depth:
+ ch = UI_JS[i]
+ nxt = UI_JS[i + 1] if i + 1 < len(UI_JS) else ""
+ if in_line_comment:
+ if ch == "\n":
+ in_line_comment = False
+ i += 1
+ continue
+ if in_block_comment:
+ if ch == "*" and nxt == "/":
+ in_block_comment = False
+ i += 2
+ continue
+ i += 1
+ continue
+ if in_string:
+ if escaped:
+ escaped = False
+ elif ch == "\\":
+ escaped = True
+ elif ch == in_string:
+ in_string = None
+ i += 1
+ continue
+ if ch == "/" and nxt == "/":
+ in_line_comment = True
+ i += 2
+ continue
+ if ch == "/" and nxt == "*":
+ in_block_comment = True
+ i += 2
+ continue
+ if ch in ("'", '"', "`"):
+ in_string = ch
+ i += 1
+ continue
+ if ch == "{":
+ depth += 1
+ elif ch == "}":
+ depth -= 1
+ i += 1
+ return UI_JS[brace + 1:i - 1]
+
+
+def test_settled_thinking_renders_through_worklog_item_path():
body = _render_messages_body()
- assert "turnsWithActivityGroup" in body, (
- "must precompute the set of turns that already own an Activity group (#3709 A1)"
+ assert "_appendWorklogStep(state.group, anchorRow, cards, thinkingText" in body, (
+ "Settled Thinking should render through the #3401 Worklog item path."
)
- # The inline render must be guarded by a membership check on that set.
- assert re.search(
- r"turnsWithActivityGroup\.has\(\s*anchorTurn\s*\)",
- body,
- ), "the inline thinking render must be gated on turnsWithActivityGroup.has(anchorTurn)"
-
-
-def test_turns_with_activity_group_built_from_tool_bearing_segments():
- """The turnsWithActivityGroup set must be populated from tool-bearing message
- segments' enclosing .assistant-turn nodes."""
- body = _render_messages_body()
- block = re.search(
- r"const turnsWithActivityGroup=new Set\(\);(.*?)const activityIdxs=",
- body,
- re.DOTALL,
+ assert "_thinkingActivityNode(thinkingText, false)" in UI_JS, (
+ "Thinking should remain a dedicated Worklog Thinking Card node."
)
- assert block, "turnsWithActivityGroup population block not found"
- text = block.group(1)
- assert "closest('.assistant-turn')" in text, (
- "must map tool-bearing segments to their enclosing .assistant-turn"
+ assert "data-worklog-thinking-card" in UI_JS, (
+ "Thinking Cards need a stable Worklog-specific hook."
)
- assert "turnsWithActivityGroup.add(" in text
-def test_inline_card_inserted_before_body_and_footer():
- """A2: when the inline render is correct, the card must land BEFORE the answer
- body / msg-foot (beforebegin), not appended after the 'Done in ...' footer."""
- body = _render_messages_body()
- # The inline branch selects the body/foot element and inserts before it.
- assert re.search(r"querySelector\(\s*'\.msg-body,\.msg-foot'\s*\)", body), (
- "inline branch must locate the .msg-body/.msg-foot element to anchor before it"
- )
- assert "insertAdjacentHTML('beforebegin'" in body, (
- "the inline thinking card must be inserted 'beforebegin' the answer body/footer "
- "(not 'beforeend', which strands it below 'Done in ...') (#3709 A2)"
+def test_settled_worklog_thinking_uses_content_key_for_exact_duplicate_suppression():
+ body_min = re.sub(r"\s+", "", _render_messages_body())
+ assert "thinkingKey:thinkingText?`thinking:${_normalizeThinkingEchoCompare(thinkingText)}`:''" in body_min, (
+ "Settled Worklog should suppress duplicate Thinking by normalized content, "
+ "not by assistant message index."
)
+ append_body = _function_body("_appendWorklogStep")
+ assert "seenReasons.has(thinkingKey)" in append_body
+ assert "seenReasons.add(thinkingKey)" in append_body
-def test_no_unconditional_beforeend_thinking_in_inline_branch():
- """The old orphaning insert ('beforeend' of the raw thinking card on the anchor
- row) must be gone from the inline branch."""
+def test_exact_echo_suppression_compares_turn_visible_texts():
body = _render_messages_body()
- # The specific regression pattern: appending the thinking card to the end of
- # the anchor row unconditionally. It must no longer be the inline path.
- assert "anchorRow.insertAdjacentHTML('beforeend',_thinkingCardHtml(assistantThinking.get(aIdx)))" not in body, (
- "the inline branch must not append the thinking card to the end of the "
- "anchor row (that stranded it below the footer — the #3709 bug)"
+ helper = _function_body("_worklogReasoningTextFromMessage")
+ assert "assistantTurnVisibleContentByRawIdx" in body
+ assert "_worklogReasoningTextFromMessage(m, rawIdx, toolCallAssistantIdxs, displayContent, turnFinalVisibleContent, turnVisibleContents)" in body
+ assert "_stripVisibleAssistantEchoFromThinking(thinkingText, visibleContent, turnFinalVisibleContent, ...visibleTexts)" in helper, (
+ "A thinking-only sibling that exactly echoes the visible process/final text "
+ "should be suppressed after settlement."
)
-def test_turn_level_echo_strip_exists():
- """B: thinking is stripped against the TURN's combined visible answer, not only
- the same message's body — so a trailing thinking-only message that echoes the
- answer gets de-duped too."""
+def test_distinct_sibling_reasoning_is_still_available_to_worklog():
body = _render_messages_body()
- assert "_turnVisibleTextByRawIdx" in body, (
- "must build a per-turn combined visible-answer map (#3709 defect B)"
+ assert "for(const aIdx of assistantThinking.keys())" in body, (
+ "Each assistant reasoning entry should still be eligible for a Worklog item."
)
- # The strip site must consult the turn-level text in addition to displayContent.
- assert re.search(
- r"_turnVisibleTextByRawIdx\.get\(\s*rawIdx\s*\)",
- body,
- ), "the echo-strip must look up the turn's combined visible text"
- # And it must feed that into the echo-strip helper.
- strip_block = re.search(
- r"_turnVisibleTextByRawIdx\.get\(\s*rawIdx\s*\)(.*?)_stripVisibleAssistantEchoFromThinking\(\s*thinkingText\s*,\s*turnVisible\s*\)",
- body,
- re.DOTALL,
- )
- assert strip_block, (
- "the turn-level visible text must be passed to "
- "_stripVisibleAssistantEchoFromThinking"
+ assert "const thinkingText=thinkingIdx!==null?assistantThinking.get(thinkingIdx):''" in body
+ assert "seenReasons:state.seenReasons" in body, (
+ "Duplicate suppression should be scoped to rendered Worklog keys, not by "
+ "dropping reasoning metadata up front."
)
-def test_suppressed_sibling_thinking_merged_into_group_not_dropped():
- """When the A1 gate suppresses a thinking-only sibling's inline card (because
- its turn has an Activity group), that sibling's thinking must NOT be lost — the
- group must render the TURN's merged thinking, not only the tool message's own
- entry. (Codex re-gate finding: rendering only assistantThinking.get(aIdx) for
- the tool index dropped a distinct sibling's reasoning.)"""
+def test_old_inline_activity_double_render_path_is_not_restored():
body = _render_messages_body()
- # A per-turn thinking aggregation must exist...
- assert "turnThinkingParts" in body, (
- "must aggregate thinking per turn so a suppressed sibling's reasoning is "
- "carried into the Activity group, not dropped (#3709 / Codex re-gate)"
+ assert "!cards.length&&assistantThinking.has(aIdx)" not in body, (
+ "The old thinking-only inline Activity branch should not return in the "
+ "#3401 Worklog model."
)
- # ...and the Activity group must render the MERGED text, de-duped, once per turn.
- assert "mergedThinking" in body, (
- "the Activity group must render the turn's merged thinking"
+ assert "anchorRow.insertAdjacentHTML('beforeend',_thinkingCardHtml(assistantThinking.get(aIdx)))" not in body, (
+ "Thinking must not be appended below the final answer/footer."
)
- assert "_renderedTurnThinking" in body, (
- "merged thinking must render once per turn (guard against double-emit when "
- "a turn has multiple tool messages)"
+ assert "mergedThinking" not in body, (
+ "The old Activity mergedThinking implementation should not be required "
+ "after #3401 moves Thinking into Worklog sibling items."
)
- # The group node must be built from the merged text, not the single-index entry.
- assert re.search(
- r"_thinkingActivityNode\(\s*mergedThinking\s*,",
- body,
- ), "the Activity group thinking node must be built from mergedThinking"
diff --git a/tests/test_live_activity_timeline.py b/tests/test_live_activity_timeline.py
index 26d337c813..5235542557 100644
--- a/tests/test_live_activity_timeline.py
+++ b/tests/test_live_activity_timeline.py
@@ -6,69 +6,386 @@
"""
import pathlib
+import shutil
+import subprocess
REPO = pathlib.Path(__file__).parent.parent
UI_JS = (REPO / "static" / "ui.js").read_text(encoding="utf-8")
MESSAGES_JS = (REPO / "static" / "messages.js").read_text(encoding="utf-8")
STYLE_CSS = (REPO / "static" / "style.css").read_text(encoding="utf-8")
+NODE = shutil.which("node")
-def test_live_activity_group_has_observable_baseline_events():
+def _function_source(src, name):
+ marker = f"function {name}("
+ start = src.find(marker)
+ assert start != -1
+ brace = src.find("{", start)
+ depth = 0
+ for idx in range(brace, len(src)):
+ ch = src[idx]
+ if ch == "{":
+ depth += 1
+ elif ch == "}":
+ depth -= 1
+ if depth == 0:
+ return src[start:idx + 1]
+ raise AssertionError(f"function {name} did not close")
+
+
+def test_run_activity_group_has_observable_baseline_events():
assert "function _ensureLiveActivityBaseline(group)" in UI_JS
+ assert "function ensureRunActivityGroup(inner, opts)" in UI_JS
+ assert "data-run-activity-group" in UI_JS
assert "Run started" in UI_JS
assert "Observable activity will appear here as the agent works." in UI_JS
assert "Model: ${modelLabel}" in UI_JS
assert "_ensureLiveActivityBaseline(group);" in UI_JS
+ assert "ensureActivityGroup(inner, opts)" in UI_JS
+
+
+def test_per_segment_tool_activity_does_not_include_run_metadata_rows():
+ activity_fn = UI_JS.split("function ensureActivityGroup(inner, opts)", 1)[1].split("function ensureRunActivityGroup", 1)[0]
+ tool_fn = UI_JS.split("function appendLiveToolCard(tc)", 1)[1].split("function clearLiveToolCards", 1)[0]
+ assert "_ensureLiveActivityBaseline" not in activity_fn
+ assert "_appendActivityEvent(group" not in tool_fn
+ assert "Tool finished: ${toolName}" not in UI_JS
+ assert "Running tool: ${toolName}" not in UI_JS
+ assert "_worklogReasonNodeFromText(thinkingText" not in UI_JS
+ assert "_thinkingActivityNode(clean, false)" in UI_JS
+ assert "data-live-thinking-key" in UI_JS
-def test_empty_thinking_placeholder_becomes_status_row_not_raw_thinking_card():
- assert "data-activity-event-id=\"thinking-placeholder\"" in UI_JS
- assert "Starting agent" in UI_JS
- assert "Creating the stream and sending your message…" in UI_JS
- assert "Waiting for first model token" in UI_JS
- assert "Stream connected; no model output has arrived yet." in UI_JS
- assert "Waiting on model" in UI_JS
- assert "Reviewing the prompt and context, then choosing the next action or composing the response." in UI_JS
- assert "Reviewing prompt and context" in UI_JS
- assert "Waiting on tool result" in UI_JS
- assert "Last step: ${action} (${toolName}); now choosing the next action or composing a response." in UI_JS
- assert "_thinkingActivityNode(thinkingText, false)" in UI_JS
-
-
-def test_stream_start_refreshes_waiting_status_after_stream_id_arrives():
- active_idx = MESSAGES_JS.find("S.activeStreamId = streamId;")
- assert active_idx != -1
- refresh_idx = MESSAGES_JS.find("appendThinking('',{pending:true})", active_idx)
- attach_idx = MESSAGES_JS.find("attachLiveStream(activeSid, streamId, uploadedNames);", active_idx)
- assert refresh_idx != -1
- assert attach_idx != -1
- assert refresh_idx < attach_idx
-
-
-def test_activity_feed_default_expand_setting_is_wired():
- index_html = (REPO / "static" / "index.html").read_text(encoding="utf-8")
- panels_js = (REPO / "static" / "panels.js").read_text(encoding="utf-8")
- boot_js = (REPO / "static" / "boot.js").read_text(encoding="utf-8")
- config_py = (REPO / "api" / "config.py").read_text(encoding="utf-8")
-
- assert 'id="settingsActivityFeedExpandedDefault"' in index_html
- assert "settings_label_activity_feed_expanded_default" in index_html
- assert '"activity_feed_expanded_default": False' in config_py
- assert "activity_feed_expanded_default" in panels_js
- assert "window._activityFeedExpandedDefault=!!s.activity_feed_expanded_default;" in boot_js
- assert "if(window._activityFeedExpandedDefault===true) collapsed=false;" in UI_JS
- finalize_fn = UI_JS.split("function finalizeThinkingCard")[1].split("\nfunction ")[0]
- assert "_activityFeedExpandedDefault" in finalize_fn
- assert "_liveActivityUserExpanded !== false" in finalize_fn
-
-
-def test_tool_events_update_activity_timeline_and_summary():
- assert "Tool finished: ${toolName}" in UI_JS
- assert "Running tool: ${toolName}" in UI_JS
- assert "No recent activity for ${_formatActiveElapsedTimer(idleAge)}" in UI_JS
- assert "Activity · Running" in UI_JS
+def test_tool_activity_uses_tool_cards_and_run_activity_owns_timer():
+ assert "buildToolCard(tc)" in UI_JS
+ build_fn = UI_JS.split("function buildToolCard(tc)", 1)[1].split("function _syncToolCallGroupSummary", 1)[0]
+ assert "tool-card-duration" not in build_fn
+ assert "tool-worklog-list" in UI_JS
+ assert "tool-worklog-summary" in UI_JS
+ assert "tool-call-group-duration" in UI_JS
+ assert "Activity · Running" not in UI_JS
assert "Working for ${label}" in UI_JS
+ assert "_isActivityTimerGroup(group)" in UI_JS
+ assert "opts.turnDuration" in UI_JS
+ assert "data-turn-duration" in UI_JS
+ assert "durationText?` Done in ${durationText}`" in UI_JS
+ assert "return !!(group&&group.getAttribute('data-run-activity-group')==='1');" in UI_JS
+ live_summary_fn = UI_JS.split("function _syncToolCallGroupSummary(group)", 1)[1].split("function _activityProgressLabelForToolName", 1)[0]
+ assert "_activityLiveProgressLabel(group)" in live_summary_fn
+ assert "[progressText, activeText].filter(Boolean).join(' · ')" in live_summary_fn
+
+
+def test_settled_activity_render_keeps_tools_bound_to_progress_bursts():
+ render_fn = UI_JS.split("if(!S.busy){", 1)[1].split("// Render per-turn duration", 1)[0]
+ assert "_assistantAnchorForActivity" in render_fn
+ assert "const byActivity = new Map()" in render_fn
+ assert "tc.activityBurstId" in render_fn
+ assert "activityByTurn" in render_fn
+ assert "_appendWorklogStep(state.group" in render_fn
+ assert "ensureActivityGroup(anchorParent,{" in render_fn
+ assert "_toolWorklogListEl(group)" in render_fn
+
+
+def test_settled_final_answer_segment_is_not_folded_into_worklog():
+ helper = _function_source(UI_JS, "_assistantMessageBelongsInWorklog")
+ render_fn = UI_JS.split("const messageBelongsInWorklog=", 1)[1].split("if(_ERR_MSG_RE", 1)[0]
+
+ assert "hasVisibleText&&isTurnFinalAssistant" in helper
+ assert "return false;" in helper
+ assert "{isTurnFinalAssistant}" in render_fn
+ assert "assistant-segment-worklog-source" in render_fn
+
+
+def test_settled_worklog_only_reads_anchor_reasons_from_hidden_progress_segments():
+ render_fn = UI_JS.split("for(const [aIdx,seg] of assistantSegments)", 1)[1].split("activityOrder.sort", 1)[0]
+
+ assert "contains('assistant-segment-worklog-source')" in render_fn
+ assert "_assistantMessageBelongsInWorklog(msg,aIdx,toolCallAssistantIdxs)" not in render_fn
+
+
+def test_settled_worklog_does_not_reuse_run_activity_group():
+ activity_fn = UI_JS.split("function ensureActivityGroup(inner, opts)", 1)[1].split("function normalizeLiveActivityGroupPlacement", 1)[0]
+ assert ':not([data-run-activity-group="1"])' in activity_fn
+ assert "if(!group&&!activityKey)" in activity_fn
+ assert "data-tool-worklog-group" in activity_fn
+
+
+def test_settled_worklog_can_move_anchor_text_into_reason():
+ reason_fn = UI_JS.split("function _worklogReasonHtmlFromAnchor(anchor, textOverride)", 1)[1].split("function _syncWorklogReasonFromAnchor", 1)[0]
+ assert "matches('.assistant-segment')" in reason_fn
+ assert "matches('[data-live-assistant=\"1\"]')" not in reason_fn
+
+
+def test_settled_render_skips_empty_activity_buckets():
+ render_fn = UI_JS.split("for(const entry of activityOrder){", 1)[1].split("// Render per-turn duration", 1)[0]
+ assert "const anchorReasonHtml=_worklogReasonHtmlFromAnchor(anchorRow);" in render_fn
+ assert "if(!cards.length&&!anchorReasonHtml&&!thinkingText) continue;" in render_fn
+
+
+def test_reattach_normalizes_live_activity_group_placement_by_burst_anchor():
+ assert "function normalizeLiveActivityGroupPlacement(turn)" in UI_JS
+ assert "normalizeLiveActivityGroupPlacement(restored)" in UI_JS
+ activity_fn = UI_JS.split("function ensureActivityGroup(inner, opts)", 1)[1].split("function normalizeLiveActivityGroupPlacement", 1)[0]
+ assert "anchor.insertAdjacentElement('afterend',group);" in activity_fn
+ normalize_fn = UI_JS.split("function normalizeLiveActivityGroupPlacement(turn)", 1)[1].split("function ensureRunActivityGroup", 1)[0]
+ assert '.tool-call-group[data-live-tool-worklog-group="1"],.tool-call-group[data-live-tool-call-group="1"]' in normalize_fn
+ assert "_findLiveAssistantAnchorForSegment(blocks, segmentSeq)" in normalize_fn
+ assert "_findLatestVisibleLiveAssistantByBurst(blocks, burstId)" in normalize_fn
+ assert "_findLatestVisibleLiveAssistant(blocks)" in normalize_fn
+
+
+def test_done_handler_preserves_live_tool_burst_metadata_for_settled_render():
+ assert "function _mergeSettledToolCallsWithLiveMetadata(rawCalls)" in MESSAGES_JS
+ assert "activityBurstId" in MESSAGES_JS
+ assert "S.toolCalls=_mergeSettledToolCallsWithLiveMetadata(d.session.tool_calls);" in MESSAGES_JS
+ assert "S.toolCalls=_mergeSettledToolCallsWithLiveMetadata(session.tool_calls||[]);" in MESSAGES_JS
+
+
+def test_message_tool_metadata_path_keeps_live_burst_metadata_available():
+ assert "S._settledLiveToolMetadata=S.toolCalls.map" in MESSAGES_JS
+ assert "S.toolCalls=hasMessageToolMetadata?[]:S.toolCalls.map" in MESSAGES_JS
+ render_fn = UI_JS.split("const derived=[];", 1)[1].split("if(derived.length) S.toolCalls=derived;", 1)[0]
+ assert "S._settledLiveToolMetadata" in render_fn
+ assert "liveToolMetadata" in render_fn
+ assert "copyLiveToolMetadata" in render_fn
+ assert "activityBurstId" in render_fn
+
+
+def test_message_tool_metadata_empty_assistant_tools_reuse_previous_visible_anchor():
+ assert "function _assistantToolAnchorIdxForMessage(messages, rawIdx)" in UI_JS
+ render_fn = UI_JS.split("const derived=[];", 1)[1].split("if(derived.length) S.toolCalls=derived;", 1)[0]
+ assert "const assistantToolAnchorIdx=_assistantToolAnchorIdxForMessage(S.messages,rawIdx);" in render_fn
+ assert "assistant_msg_idx:assistantToolAnchorIdx" in render_fn
+
+ assert NODE, "node not on PATH"
+ has_visible_fn = _function_source(UI_JS, "_assistantMessageHasVisibleContent")
+ empty_placeholder_fn = _function_source(UI_JS, "_isAssistantEmptyPlaceholderContent")
+ has_reasoning_fn = _function_source(UI_JS, "_messageHasReasoningPayload")
+ reasoning_fn = _function_source(UI_JS, "_assistantReasoningPayloadText")
+ anchor_fn = _function_source(UI_JS, "_assistantToolAnchorIdxForMessage")
+ script = f"""
+const assert = require('assert');
+function _isRecoveryControlMessage(){{ return false; }}
+function msgContent(m){{
+ if(!m) return '';
+ if(typeof m.content === 'string') return m.content;
+ if(Array.isArray(m.content)) return m.content.map(part => part && typeof part.text === 'string' ? part.text : '').join('');
+ return '';
+}}
+{has_reasoning_fn}
+{empty_placeholder_fn}
+{has_visible_fn}
+{reasoning_fn}
+{anchor_fn}
+const messages = [
+ {{role:'assistant', content:'visible progress'}},
+ {{role:'assistant', content:'', tool_calls:[{{id:'call-1'}}]}},
+ {{role:'assistant', content:'', tool_calls:[{{id:'call-2'}}]}},
+ {{role:'assistant', content:'next progress', tool_calls:[{{id:'call-3'}}]}},
+ {{role:'assistant', content:[{{type:'tool_use', id:'call-4', name:'read_file'}}]}},
+ {{role:'assistant', content:'', reasoning_content:'process text', tool_calls:[{{id:'call-5'}}]}},
+];
+assert.strictEqual(_assistantToolAnchorIdxForMessage(messages, 1), 0);
+assert.strictEqual(_assistantToolAnchorIdxForMessage(messages, 2), 0);
+assert.strictEqual(_assistantToolAnchorIdxForMessage(messages, 3), 3);
+assert.strictEqual(_assistantToolAnchorIdxForMessage(messages, 4), 3);
+assert.strictEqual(_assistantToolAnchorIdxForMessage(messages, 5), 5);
+"""
+ result = subprocess.run([NODE, "-e", script], capture_output=True, text=True, check=False)
+ assert result.returncode == 0, result.stderr
+
+
+def test_settled_tool_metadata_merge_replaces_null_activity_metadata():
+ assert NODE, "node not on PATH"
+ fn = _function_source(MESSAGES_JS, "_mergeSettledToolCallsWithLiveMetadata")
+ script = f"""
+const assert = require('assert');
+const S = {{
+ toolCalls: [{{tid:'tool-1', name:'read_file', activityBurstId:2, duration:1.25, started_at:123}}]
+}};
+{fn}
+const merged = _mergeSettledToolCallsWithLiveMetadata([
+ {{tid:'tool-1', name:'read_file', activityBurstId:null, duration:null, started_at:null}}
+]);
+assert.strictEqual(merged[0].activityBurstId, 2);
+assert.strictEqual(merged[0].duration, 1.25);
+assert.strictEqual(merged[0].started_at, 123);
+"""
+ result = subprocess.run([NODE, "-e", script], capture_output=True, text=True, check=False)
+ assert result.returncode == 0, result.stderr
+
+
+def test_settled_activity_render_treats_burst_zero_as_unanchored_activity():
+ render_fn = UI_JS.split("if(!S.busy){", 1)[1].split("// Render per-turn duration", 1)[0]
+ assert "String(burstId)!=='0'" in render_fn
+ assert "if(aIdx= 2
+ for idx in inactive_returns[:2]:
+ branch = wire_fn[idx:wire_fn.find("}", idx) + 1]
+ assert "recordActivityBoundary();" in branch
+ assert "_resetAssistantSegment();" in branch
+
+
+def test_tool_event_flushes_pending_text_before_inserting_activity():
+ """A tool card must not appear before the text segment it is anchored to.
+
+ Token rendering is throttled through rAF. On mobile/slow clients a `tool`
+ event can arrive while the current assistantRow exists but its pending text
+ has not been written into `.msg-body` yet. If appendLiveToolCard() runs
+ first, the Activity group appears, then the delayed flush fills the empty
+ segment above it a frame later, which looks like process text was inserted
+ before an already-visible Activity row.
+ """
+ tool_handler = MESSAGES_JS.split("source.addEventListener('tool',e=>{", 1)[1].split("source.addEventListener('tool_complete'", 1)[0]
+ flush_pos = tool_handler.find("_flushPendingSegmentRender({force:true});")
+ append_pos = tool_handler.find("appendLiveToolCard(tc")
+ assert flush_pos != -1 and append_pos != -1
+ assert flush_pos < append_pos
+
+
+def test_pending_text_flush_syncs_existing_worklog_reason():
+ """If Activity was created before text was flushed, the next text render must
+ move that text into the existing Worklog instead of leaving it as a separate
+ assistant segment above the tool rows.
+ """
+ assert "function _syncLiveWorklogReasonsForAnchor(anchor, displayTextOverride)" in UI_JS
+ flush_fn = MESSAGES_JS.split("function _flushPendingSegmentRender(options={})", 1)[1].split("function _resetAssistantSegment", 1)[0]
+ assert "_syncLiveWorklogReasonsForAnchor(assistantRow, displayText)" in flush_fn
+ render_fn = MESSAGES_JS.split("const _doRender=()=>{", 1)[1].split("scrollIfPinned();", 1)[0]
+ assert "_syncLiveWorklogReasonsForAnchor(assistantRow, displayText)" in render_fn
+
+
+def test_pending_text_flush_passes_display_text_to_worklog_reason_sync():
+ """Forced flush owns the authoritative displayText for this frame.
+
+ Do not make Worklog reason synchronization depend only on reading the
+ already-rendered DOM: streaming-markdown can lag a frame behind the known
+ displayText during bursty token/tool boundaries.
+ """
+ sync_fn = _function_source(UI_JS, "_syncLiveWorklogReasonsForAnchor")
+ assert "displayTextOverride" in sync_fn
+ assert "_syncWorklogReasonFromAnchor(group, anchor, displayTextOverride)" in sync_fn
+ flush_fn = MESSAGES_JS.split("function _flushPendingSegmentRender(options={})", 1)[1].split("function _resetAssistantSegment", 1)[0]
+ assert "_syncLiveWorklogReasonsForAnchor(assistantRow, displayText)" in flush_fn
+ render_fn = MESSAGES_JS.split("const _doRender=()=>{", 1)[1].split("scrollIfPinned();", 1)[0]
+ assert "_syncLiveWorklogReasonsForAnchor(assistantRow, displayText)" in render_fn
+
+
+def test_tool_event_does_not_create_blank_text_segment_without_pending_text():
+ """Tool-only bursts should not create empty assistant text segments.
+
+ A tool event can arrive before any visible answer text exists. Forcing
+ ensureAssistantRow(true) in that path creates a blank `.assistant-segment`
+ above every Activity group, making Live Stream look unstable during long
+ polling turns.
+ """
+ tool_handler = MESSAGES_JS.split("source.addEventListener('tool',e=>{", 1)[1].split("source.addEventListener('tool_complete'", 1)[0]
+ upsert_pos = tool_handler.find("const tc=upsertLiveToolCall(d,'start');")
+ guard_pos = tool_handler.find("String(pendingDisplayText||'').trim()")
+ force_pos = tool_handler.find("ensureAssistantRow(true);")
+ append_pos = tool_handler.find("appendLiveToolCard(tc")
+ assert upsert_pos != -1 and guard_pos != -1 and force_pos != -1 and append_pos != -1
+ assert upsert_pos < guard_pos < force_pos < append_pos
+ assert "if(!assistantRow||!assistantBody) ensureAssistantRow(true);" not in tool_handler
+
+
+def test_orphan_tool_complete_does_not_create_blank_text_segment_without_pending_text():
+ """An orphan tool_complete should not manufacture an empty assistant segment."""
+ complete_handler = MESSAGES_JS.split("source.addEventListener('tool_complete',e=>{", 1)[1].split("source.addEventListener('approval'", 1)[0]
+ orphan_branch = complete_handler.split("if(tc._createdByComplete){", 1)[1].split("} else {", 1)[0]
+ guard_pos = orphan_branch.find("String(pendingDisplayText||'').trim()")
+ force_pos = orphan_branch.find("ensureAssistantRow(true);")
+ flush_pos = orphan_branch.find("_flushPendingSegmentRender({force:true});")
+ append_pos = orphan_branch.find("appendLiveToolCard(tc")
+ assert guard_pos != -1 and force_pos != -1 and flush_pos != -1 and append_pos != -1
+ assert guard_pos < force_pos < flush_pos < append_pos
+ assert "if(!assistantRow||!assistantBody) ensureAssistantRow(true);" not in orphan_branch
+
+
+def test_reattach_segment_start_aligns_with_last_burst_anchor():
+ """Simulate the reattach segmentStart initializer with multiple anchors.
+
+ The initializer must clamp to the actual assistantText length and ignore
+ stale anchors past the end of the text, otherwise displayText slicing in
+ _doRender would produce empty output for the tail segment.
+ """
+ assert NODE, "node not on PATH"
+ body = MESSAGES_JS.split("function attachLiveStream(", 1)[1]
+ seg_block_start = body.find("let segmentStart=(()=>{")
+ assert seg_block_start != -1, "expected reconnect-aware segmentStart IIFE"
+ seg_block_end = body.find("})();", seg_block_start) + len("})();")
+ initializer = body[seg_block_start:seg_block_end] + ";"
+ # Wrap as a callable with explicit reconnecting + INFLIGHT/activeSid stand-ins.
+ script = f"""
+const assert = require('assert');
+function computeStart(reconnecting, inflight, assistantText) {{
+ const INFLIGHT = {{ 'sid': inflight }};
+ const activeSid = 'sid';
+ {initializer}
+ return segmentStart;
+}}
+// No anchors -> 0
+assert.strictEqual(computeStart(true, {{activityBurstAnchors:[]}}, 'hello world'), 0);
+// Single anchor inside text length -> anchor textEnd
+assert.strictEqual(computeStart(true, {{activityBurstAnchors:[{{id:1,textEnd:5}}]}}, 'hello world'), 5);
+// Multiple anchors -> picks max textEnd within text length
+assert.strictEqual(computeStart(true, {{activityBurstAnchors:[
+ {{id:1,textEnd:5}}, {{id:2,textEnd:11}}, {{id:3,textEnd:7}}
+]}}, 'hello world'), 11);
+// Anchor textEnd past assistantText length -> ignored
+assert.strictEqual(computeStart(true, {{activityBurstAnchors:[
+ {{id:1,textEnd:5}}, {{id:2,textEnd:99}}
+]}}, 'hello world'), 5);
+// Not reconnecting -> always 0
+assert.strictEqual(computeStart(false, {{activityBurstAnchors:[
+ {{id:1,textEnd:5}}, {{id:2,textEnd:11}}
+]}}, 'hello world'), 0);
+// Missing inflight entry -> 0
+assert.strictEqual(computeStart(true, undefined, 'hello'), 0);
+"""
+ result = subprocess.run([NODE, "-e", script], capture_output=True, text=True, check=False)
+ assert result.returncode == 0, result.stderr
def test_activity_status_rows_have_quiet_metadata_styling():
diff --git a/tests/test_live_stream_ux.py b/tests/test_live_stream_ux.py
index cc7ca0140d..2f03f34bf8 100644
--- a/tests/test_live_stream_ux.py
+++ b/tests/test_live_stream_ux.py
@@ -36,8 +36,6 @@ def test_recovery_control_detection_is_not_broad_phrase_matching():
assert "|| /continue exactly where you left off/i.test(normalized)" not in MESSAGES_JS
assert "const systemRecovery=/^\\[System:/i.test(normalized)" in UI_JS
assert "const backendRecovery=/^the live worker stopped before this run finished\\.?$/i.test(normalized)" in UI_JS
-
-
def test_recovery_control_does_not_filter_genuine_interruption_card():
"""A real 'Response interrupted' card carries provider_details_label
'Interruption details' but is NOT a recovery-control row — it must stay
diff --git a/tests/test_live_tool_callback_events.py b/tests/test_live_tool_callback_events.py
index 74c8b3430f..2997b45413 100644
--- a/tests/test_live_tool_callback_events.py
+++ b/tests/test_live_tool_callback_events.py
@@ -68,6 +68,10 @@ def test_tool_callback_events_keep_existing_frontend_event_contract():
assert "source.addEventListener('tool',e=>{" in messages
assert "source.addEventListener('tool_complete',e=>{" in messages
- assert "tid:d.tid" in messages
+ assert "String(d&&d.tid" in messages or "explicitTid=String(d&&d.tid" in messages, (
+ "frontend tool handlers must still consume explicit server tid when present"
+ )
+ assert "upsertLiveToolCall(d,'start')" in messages
+ assert "upsertLiveToolCall(d,'complete')" in messages
assert "data-live-tid" in ui
assert "existing.replaceWith(replacement)" in ui
diff --git a/tests/test_regressions.py b/tests/test_regressions.py
index a887e8f76d..890413ba5f 100644
--- a/tests/test_regressions.py
+++ b/tests/test_regressions.py
@@ -428,7 +428,7 @@ def test_loadSession_inflight_restores_live_tool_cards(cleanup_test_sessions):
# INFLIGHT branch must call appendLiveToolCard
inflight_idx = src.find("if(INFLIGHT[sid]){")
assert inflight_idx >= 0, "INFLIGHT branch not found in loadSession"
- inflight_block = src[inflight_idx:inflight_idx+1600]
+ inflight_block = src[inflight_idx:inflight_idx+4200]
assert "appendLiveToolCard" in inflight_block, "loadSession INFLIGHT branch must restore live tool cards via appendLiveToolCard"
assert "clearLiveToolCards" in inflight_block, "loadSession INFLIGHT branch must clear old live cards before restoring"
@@ -543,6 +543,18 @@ def test_chat_start_persists_pending_turn_metadata_for_reload_recovery(cleanup_t
assert '"pending_user_message": getattr(s, "pending_user_message", None)' in routes_src
+def test_session_detail_uses_runtime_streaming_state(cleanup_test_sessions):
+ """GET /api/session must agree with /api/sessions on live stream ownership."""
+ routes_src = (REPO_ROOT / "api/routes.py").read_text()
+ session_route = routes_src.split('if parsed.path == "/api/session":', 1)[1].split(
+ 'if parsed.path == "/api/session/lineage/report":', 1
+ )[0]
+ assert "active_stream_ids = _active_stream_ids()" in session_route
+ assert "s.compact(" in session_route
+ assert "include_runtime=True" in session_route
+ assert "active_stream_ids=active_stream_ids" in session_route
+
+
def test_reload_path_restores_pending_message_and_reattaches_live_stream(cleanup_test_sessions):
"""R15d: the frontend reload path must show the pending user turn and
reattach to the live SSE stream after loadSession().
@@ -558,6 +570,19 @@ def test_reload_path_restores_pending_message_and_reattaches_live_stream(cleanup
'const activeStreamId=S.session.active_stream_id||null;' in sessions_src)
assert 'attachLiveStream(sid, activeStreamId' in sessions_src
assert 'if (S.activeStreamId && S.activeStreamId === streamId) return;' in ui_src
+ active_branch_start = sessions_src.index("if(activeStreamId){\n S.busy=true;")
+ active_branch_end = sessions_src.index("}else{\n S.busy=false;", active_branch_start)
+ active_branch = sessions_src[active_branch_start:active_branch_end]
+ render_pos = active_branch.index("renderMessages(")
+ shell_pos = active_branch.index("ensureLiveWorklogShell")
+ assert render_pos < shell_pos, (
+ "Reloading an active stream must recreate the live worklog shell after "
+ "renderMessages() rebuilds msgInner; otherwise the stream stays invisible "
+ "until a session switch triggers another restore path."
+ )
+ assert "else appendThinking();" in active_branch, (
+ "Non-simplified tool-calling must keep the legacy fallback."
+ )
# ── R16: Switching away/back must preserve live partial assistant output ─────
@@ -609,7 +634,7 @@ def test_inflight_session_state_tracks_live_tool_cards_per_session(cleanup_test_
messages_src = (REPO_ROOT / "static/messages.js").read_text()
sessions_src = (REPO_ROOT / "static/sessions.js").read_text()
- assert "INFLIGHT[activeSid].toolCalls.push(tc);" in messages_src, \
+ assert "inflight.toolCalls.push(tc)" in messages_src, \
"tool SSE handler must persist live tool calls onto the in-flight session"
assert "S.toolCalls=(INFLIGHT[sid].toolCalls||[]);" in sessions_src, \
"loadSession() must restore live tool calls from the in-flight session state"
@@ -624,7 +649,7 @@ def test_loadSession_inflight_sets_busy_before_renderMessages(cleanup_test_sessi
src = (REPO_ROOT / "static/sessions.js").read_text()
inflight_idx = src.find("if(INFLIGHT[sid]){")
assert inflight_idx >= 0, "INFLIGHT branch not found in loadSession"
- inflight_block = src[inflight_idx:inflight_idx+1600]
+ inflight_block = src[inflight_idx:inflight_idx+4200]
busy_pos = inflight_block.find("S.busy=true;")
# #3326 added an optional {preserveScroll} arg to the INFLIGHT-branch render
# call, so match the call form rather than the bare `renderMessages();`.
@@ -652,6 +677,34 @@ def test_loadSession_inflight_merges_tail_with_persisted_transcript(cleanup_test
)
+def test_renderMessages_preserves_loading_placeholder_for_session_switch(cleanup_test_sessions):
+ """R16d: renderMessages should not repaint transcript during session-load window.
+
+ During loadSession(sid) after the loading metadata call, S.messages is
+ intentionally empty until _ensureMessagesLoaded(sid) settles. A concurrent
+ renderMessages() must keep the existing 'Loading conversation...' placeholder
+ instead of clearing #msgInner to an empty transcript.
+ """
+ ui_src = (REPO_ROOT / "static/ui.js").read_text()
+ fn_start = ui_src.find("function renderMessages")
+ assert fn_start >= 0, "renderMessages() not found in ui.js"
+ fn_body = ui_src[fn_start:fn_start + 1400]
+
+ compact = re.sub(r"\s+", "", fn_body)
+ assert (
+ "if(_loadingSessionId===sid&&msgCount===0&&inner)return;" in compact
+ ), (
+ "renderMessages() must return early when loadSession is active for"
+ " the current sid and S.messages is still empty."
+ )
+
+ # Guard must live before render-window reset and message-filter pass.
+ reset_pos = compact.find("if(sid!==_messageRenderWindowSid)_resetMessageRenderWindow(sid);")
+ guard_pos = compact.find("if(_loadingSessionId===sid&&msgCount===0&&inner)return;")
+ assert (
+ 0 <= guard_pos < reset_pos
+ ), "Session-load empty-state guard must run before render-window/state resets."
+
def test_browser_session_url_accepts_api_session_id_param(cleanup_test_sessions):
"""External links using ?session_id=... should open that session in the browser.
@@ -705,7 +758,7 @@ def test_loadSession_inflight_sets_active_stream_before_replaying_live_tool_card
src = (REPO_ROOT / "static/sessions.js").read_text()
inflight_idx = src.find("if(INFLIGHT[sid]){")
assert inflight_idx >= 0, "INFLIGHT branch not found in loadSession"
- inflight_block = src[inflight_idx:inflight_idx+1600]
+ inflight_block = src[inflight_idx:inflight_idx+4200]
active_pos = inflight_block.find("S.activeStreamId=activeStreamId;")
replay_pos = inflight_block.find("appendLiveToolCard(tc);")
attach_pos = inflight_block.find("attachLiveStream(sid, activeStreamId")
@@ -773,12 +826,19 @@ def test_messages_js_supports_live_reasoning_and_tool_completion(cleanup_test_se
# On initial connect it defaults to ''; on reconnect it restores from
# INFLIGHT so the already-rendered content survives the session switch.
assert ("let reasoningText=''" in src
- or "let reasoningText = _lastLiveAssistant" in src), \
+ or "let reasoningText = _lastLiveAssistant" in src
+ or "let reasoningText=_lastLiveReasoning" in src), \
"messages.js must track streamed reasoning text separately from assistant text"
- assert ("let liveReasoningText=''" in src or "let liveReasoningText = reasoningText" in src), \
+ assert ("let liveReasoningText=''" in src
+ or "let liveReasoningText = reasoningText" in src
+ or "let liveReasoningText=_lastLiveReasoning" in src), \
"messages.js must track the currently active reasoning segment separately from cumulative reasoning"
assert "source.addEventListener('reasoning'" in src or 'source.addEventListener("reasoning"' in src, \
"messages.js must listen for live reasoning SSE events"
+ assert "liveReasoningText += text" in src, \
+ "live reasoning SSE events must update the active Worklog Thinking Card text"
+ assert "_updateLiveThinkingCard(_liveThinkingText())" in src, \
+ "live reasoning SSE events must refresh the current segment's Worklog Thinking Card"
assert "source.addEventListener('tool_complete'" in src or 'source.addEventListener("tool_complete"' in src, \
"messages.js must listen for live tool completion SSE events"
assert "function _parseStreamState()" in src, \
@@ -803,14 +863,15 @@ def test_messages_js_supports_interim_assistant_events(cleanup_test_sessions):
def test_ui_js_can_upgrade_thinking_spinner_into_live_reasoning_card(cleanup_test_sessions):
- """R19: ui.js must be able to replace the placeholder thinking spinner with
- streamed reasoning text while a turn is in progress.
+ """R19: ui.js keeps the thinking helpers available while simplified mode
+ renders provider reasoning as a Worklog Thinking Card.
"""
src = (REPO_ROOT / "static/ui.js").read_text()
assert "function _thinkingMarkup(text='')" in src or 'function _thinkingMarkup(text="")' in src, \
"ui.js must centralize thinking row markup so it can switch between spinner and live text"
- assert "function updateThinking(text=''){appendThinking(text);}" in src or 'function updateThinking(text=""){appendThinking(text);}' in src, \
- "ui.js must expose an updateThinking helper for live reasoning rendering"
+ assert ("function updateThinking(text='', options){appendThinking(text, options);}" in src
+ or 'function updateThinking(text="", options){appendThinking(text, options);}' in src), \
+ "ui.js must expose an updateThinking helper that preserves live Thinking placement metadata"
assert "function finalizeThinkingCard()" in src, \
"ui.js must expose a helper to finalize one live thinking card before starting another"
@@ -841,15 +902,23 @@ def test_ui_js_keeps_reasoning_only_assistant_messages_visible(cleanup_test_sess
def test_ui_js_does_not_hide_anchor_segments_that_contain_thinking(cleanup_test_sessions):
- """R19c2/R19c3: reasoning-only messages must remain visible through the
- shared compact timeline activity UI, even when the anchor segment has no prose.
+ """R19c2/R19c3: reasoning-only metadata must remain preserved as a
+ collapsed Worklog Thinking Card.
"""
src = (REPO_ROOT / "static" / "ui.js").read_text()
compact = src.replace(' ', '').replace('\n', '')
assert "assistantThinking.set(rawIdx,thinkingText)" in compact, \
"renderMessages must preserve reasoning text before hiding empty anchor segments"
+ helper_start = src.find("function _worklogReasoningTextFromMessage")
+ helper_end = src.find("function _thinkingCardHtml", helper_start)
+ assert helper_start != -1 and helper_end != -1
+ helper = src[helper_start:helper_end]
+ assert "_assistantReasoningPayloadText(m)" in helper and "_stripVisibleAssistantEchoFromThinking" in helper, \
+ "provider reasoning metadata should feed the Worklog Thinking Card after exact duplicate suppression"
+ assert "data-worklog-thinking-card" in src, \
+ "Thinking Cards should be explicit Worklog items, not tool cards"
assert "_thinkingActivityNode(thinkingText, false)" in src, \
- "thinking-only assistant content should render as a collapsed timeline Thinking card"
+ "settled reasoning should render as a collapsed Worklog Thinking Card"
def test_messages_js_live_assistant_segment_reuses_live_turn_wrapper(cleanup_test_sessions):
@@ -872,12 +941,20 @@ def test_messages_js_live_assistant_segment_reuses_live_turn_wrapper(cleanup_tes
def test_messages_js_finalizes_thinking_card_before_tool_card(cleanup_test_sessions):
- """R19e: later reasoning after a tool call must render in a fresh card."""
+ """R19e: later reasoning after a tool call must render in a fresh Worklog
+ Thinking Card without discarding durable reasoning.
+ """
src = (REPO_ROOT / "static/messages.js").read_text()
- assert "finalizeThinkingCard" in src, \
+ tool_start = src.find("source.addEventListener('tool'")
+ tool_complete_start = src.find("source.addEventListener('tool_complete'", tool_start + 1)
+ assert tool_start >= 0 and tool_complete_start > tool_start
+ body = src[tool_start:tool_complete_start]
+ assert "finalizeThinkingCard()" in body, \
"tool handler must finalize the current live thinking card before appending a tool card"
- assert "liveReasoningText='';" in src or 'liveReasoningText = "";' in src, \
+ assert "liveReasoningText='';" in body or 'liveReasoningText = "";' in body, \
"tool handler must reset the active reasoning segment before post-tool reasoning arrives"
+ assert "reasoningText=''" not in body and 'reasoningText = ""' not in body, \
+ "tool handler must not discard durable reasoning already assigned to the Worklog"
# ── R17: Stack traces must not leak to clients in 500 responses ────────────
diff --git a/tests/test_run_journal.py b/tests/test_run_journal.py
index 22a7b24eb4..84f30e0924 100644
--- a/tests/test_run_journal.py
+++ b/tests/test_run_journal.py
@@ -27,6 +27,26 @@ def test_run_journal_appends_monotonic_seq_and_reads_after_cursor(tmp_path):
assert [event["event"] for event in journal["events"]] == ["done"]
+def test_run_journal_reads_bounded_replay_window(tmp_path):
+ writer = RunJournalWriter("session_1", "run_1", session_dir=tmp_path)
+
+ writer.append_sse_event("token", {"text": "one"})
+ writer.append_sse_event("token", {"text": "two"})
+ writer.append_sse_event("token", {"text": "three"})
+ writer.append_sse_event("token", {"text": "four"})
+
+ journal = read_run_events(
+ "session_1",
+ "run_1",
+ after_seq=1,
+ max_seq=3,
+ session_dir=tmp_path,
+ )
+
+ assert [event["seq"] for event in journal["events"]] == [2, 3]
+ assert [event["payload"]["text"] for event in journal["events"]] == ["two", "three"]
+
+
def test_run_journal_default_fsyncs_terminal_events_only(tmp_path, monkeypatch):
path = tmp_path / "_run_journal" / "session_1" / "run_1.jsonl"
path.parent.mkdir(parents=True)
diff --git a/tests/test_run_journal_frontend_static.py b/tests/test_run_journal_frontend_static.py
index 5b8ea81a1b..9a10058442 100644
--- a/tests/test_run_journal_frontend_static.py
+++ b/tests/test_run_journal_frontend_static.py
@@ -11,7 +11,7 @@ def test_reattach_path_uses_replay_when_status_reports_journal():
assert "st.replay_available" in block
assert "replayOnly=true" in block
- assert "replayOnly?_runJournalReplayParams():''" in block
+ assert "(reconnecting||replayOnly)?_runJournalReplayParams():''" in block
assert "_clearOwnerInflightState()" in block
@@ -64,8 +64,15 @@ def test_replayed_long_task_events_enter_the_same_live_timeline_handlers():
f"{event_name} must be handled by the shared live/replay SSE pipeline"
)
- assert "updateThinking(" in wire_block, "reasoning replay should use the live Thinking card path"
- assert "appendLiveToolCard(tc)" in wire_block, "tool replay should use live tool-card rendering"
+ thinking_helper = MESSAGES_SRC[
+ MESSAGES_SRC.index("function _updateLiveThinkingCard") :
+ MESSAGES_SRC.index("// Split a content string", MESSAGES_SRC.index("function _updateLiveThinkingCard"))
+ ]
+ assert "_updateLiveThinkingCard(" in wire_block, "reasoning replay should use the live Thinking card path"
+ assert "updateThinking(text, opts)" in thinking_helper and "appendThinking(text, opts)" in thinking_helper, (
+ "the shared Thinking helper should still route replay/live reasoning into the Worklog Thinking card path"
+ )
+ assert "appendLiveToolCard(tc" in wire_block, "tool replay should use live tool-card rendering"
# Compression replay must dispatch through setCompressionUi(...). The handler
# body may build the state object inline (`setCompressionUi({...})`) or hoist
# it into a `state` variable first (`setCompressionUi(state)`) — both forms
diff --git a/tests/test_run_journal_routes.py b/tests/test_run_journal_routes.py
index 862a18a391..bae7aacdc1 100644
--- a/tests/test_run_journal_routes.py
+++ b/tests/test_run_journal_routes.py
@@ -1,6 +1,8 @@
from pathlib import Path
from types import SimpleNamespace
+from urllib.parse import urlparse
import io
+import queue
ROOT = Path(__file__).resolve().parents[1]
@@ -24,10 +26,232 @@ def test_dead_stream_sse_replays_journal_before_404_fallback():
assert "find_run_summary(stream_id)" in block
assert "stream not found" in block
assert "_replay_run_journal" in block
- assert "_parse_run_journal_after_seq(qs, stream_id)" in block
+ assert "_parse_run_journal_after_seq" in block
assert 'Content-Type", "text/event-stream; charset=utf-8"' in block
+def test_active_stream_replay_uses_snapshot_cutoff_and_skips_duplicate_queue_items(monkeypatch):
+ import api.routes as routes
+
+ class FakeStream:
+ def __init__(self):
+ self.q = queue.Queue()
+ self.q.put_nowait(("token", {"text": "replayed"}, "run_1:1"))
+ self.q.put_nowait(("stream_end", {}, "run_1:2"))
+ self.unsubscribed = False
+
+ def subscribe_with_snapshot(self):
+ return self.q, {"last_event_id": "run_1:1", "offline_buffered_events": 1}
+
+ def unsubscribe(self, q):
+ self.unsubscribed = q is self.q
+
+ class Handler:
+ def __init__(self):
+ self.wfile = io.BytesIO()
+
+ def send_response(self, _code):
+ pass
+
+ def send_header(self, _name, _value):
+ pass
+
+ def end_headers(self):
+ pass
+
+ handler = Handler()
+ stream = FakeStream()
+ monkeypatch.setattr(
+ routes,
+ "find_run_summary",
+ lambda stream_id: {
+ "session_id": "session_1",
+ "run_id": stream_id,
+ "terminal": False,
+ },
+ )
+ monkeypatch.setattr(
+ routes,
+ "read_run_events",
+ lambda session_id, run_id, after_seq=None, max_seq=None: {
+ "events": [
+ {
+ "event": "token",
+ "payload": {"text": "replayed"},
+ "event_id": f"{run_id}:1",
+ }
+ ]
+ },
+ )
+ monkeypatch.setattr(routes, "stale_interrupted_event", lambda *_args, **_kwargs: None)
+ previous_streams = dict(routes.STREAMS)
+ routes.STREAMS.clear()
+ routes.STREAMS["run_1"] = stream
+ try:
+ routes._handle_sse_stream(handler, urlparse("/api/chat/stream?stream_id=run_1&replay=1&after_seq=0"))
+ finally:
+ routes.STREAMS.clear()
+ routes.STREAMS.update(previous_streams)
+
+ body = handler.wfile.getvalue().decode("utf-8")
+ assert body.count("event: token\n") == 1
+ assert "id: run_1:1\n" in body
+ assert "id: run_1:2\n" in body
+ assert stream.unsubscribed is True
+
+
+def test_active_stream_snapshot_keeps_items_for_new_run_with_same_seq_range(monkeypatch):
+ import api.routes as routes
+
+ class FakeStream:
+ def __init__(self):
+ self.q = queue.Queue()
+ self.q.put_nowait(("token", {"text": "fresh"}, "run_new:1"))
+ self.q.put_nowait(("stream_end", {}, "run_new:2"))
+ self.unsubscribed = False
+
+ def subscribe_with_snapshot(self):
+ return self.q, {
+ "last_event_id": "run_old:3",
+ "offline_buffered_events": 2,
+ }
+
+ def unsubscribe(self, q):
+ self.unsubscribed = q is self.q
+
+ class Handler:
+ def __init__(self):
+ self.wfile = io.BytesIO()
+
+ def send_response(self, _code):
+ pass
+
+ def send_header(self, _name, _value):
+ pass
+
+ def end_headers(self):
+ pass
+
+ handler = Handler()
+ stream = FakeStream()
+ monkeypatch.setattr(
+ routes,
+ "find_run_summary",
+ lambda stream_id: {
+ "session_id": "session_2",
+ "run_id": stream_id,
+ "terminal": False,
+ },
+ )
+ monkeypatch.setattr(
+ routes,
+ "read_run_events",
+ lambda session_id, run_id, after_seq=None, max_seq=None: {"events": []},
+ )
+ monkeypatch.setattr(routes, "stale_interrupted_event", lambda *_args, **_kwargs: None)
+ previous_streams = dict(routes.STREAMS)
+ routes.STREAMS.clear()
+ routes.STREAMS["run_new"] = stream
+ try:
+ routes._handle_sse_stream(
+ handler,
+ urlparse("/api/chat/stream?stream_id=run_new&replay=1&after_seq=0"),
+ )
+ finally:
+ routes.STREAMS.clear()
+ routes.STREAMS.update(previous_streams)
+
+ body = handler.wfile.getvalue().decode("utf-8")
+ assert "id: run_new:1\n" in body
+ assert "id: run_new:2\n" in body
+ assert body.count("id: run_new:1\n") == 1
+ assert stream.unsubscribed is True
+
+
+def test_active_stream_replay_without_journal_keeps_buffered_queue_items(monkeypatch):
+ import api.routes as routes
+
+ class FakeStream:
+ def __init__(self):
+ self.q = queue.Queue()
+ self.q.put_nowait(("token", {"text": "buffered"}, "missing_journal_run:1"))
+ self.q.put_nowait(("stream_end", {}, "missing_journal_run:2"))
+
+ def subscribe_with_snapshot(self):
+ return self.q, {"last_event_id": "missing_journal_run:1", "offline_buffered_events": 1}
+
+ def unsubscribe(self, _q):
+ pass
+
+ class Handler:
+ def __init__(self):
+ self.wfile = io.BytesIO()
+
+ def send_response(self, _code):
+ pass
+
+ def send_header(self, _name, _value):
+ pass
+
+ def end_headers(self):
+ pass
+
+ monkeypatch.setattr(routes, "find_run_summary", lambda _stream_id: None)
+ handler = Handler()
+ previous_streams = dict(routes.STREAMS)
+ routes.STREAMS.clear()
+ routes.STREAMS["missing_journal_run"] = FakeStream()
+ try:
+ routes._handle_sse_stream(
+ handler,
+ urlparse("/api/chat/stream?stream_id=missing_journal_run&replay=1&after_seq=0"),
+ )
+ finally:
+ routes.STREAMS.clear()
+ routes.STREAMS.update(previous_streams)
+
+ body = handler.wfile.getvalue().decode("utf-8")
+ assert "id: missing_journal_run:1\n" in body
+ assert "event: token\n" in body
+ assert "buffered" in body
+
+
+def test_live_sse_uses_each_queue_items_own_event_id():
+ import api.routes as routes
+ from api.config import create_stream_channel
+
+ class Handler:
+ def __init__(self):
+ self.wfile = io.BytesIO()
+
+ def send_response(self, _code):
+ pass
+
+ def send_header(self, _name, _value):
+ pass
+
+ def end_headers(self):
+ pass
+
+ stream = create_stream_channel()
+ stream.put_nowait(("token", {"text": "A"}, "run_own_id:1"))
+ stream.put_nowait(("stream_end", {"ok": True}, "run_own_id:2"))
+ handler = Handler()
+ previous_streams = dict(routes.STREAMS)
+ routes.STREAMS.clear()
+ routes.STREAMS["run_own_id"] = stream
+ try:
+ routes._handle_sse_stream(handler, urlparse("/api/chat/stream?stream_id=run_own_id"))
+ finally:
+ routes.STREAMS.clear()
+ routes.STREAMS.update(previous_streams)
+
+ body = handler.wfile.getvalue().decode("utf-8")
+ assert "id: run_own_id:1\nevent: token\n" in body
+ assert "id: run_own_id:2\nevent: stream_end\n" in body
+ assert body.count("id: run_own_id:2\n") == 1
+
+
def test_replay_emits_event_ids_and_stale_restart_diagnostic():
replay_pos = ROUTES_SRC.index("def _replay_run_journal")
block = ROUTES_SRC[replay_pos : replay_pos + 1200]
@@ -100,7 +324,7 @@ def test_replay_run_journal_writes_replayed_events_and_synthetic_terminal(monkey
monkeypatch.setattr(
routes,
"read_run_events",
- lambda session_id, run_id, after_seq=None: {
+ lambda session_id, run_id, after_seq=None, max_seq=None: {
"events": [
{
"event": "token",
@@ -113,7 +337,7 @@ def test_replay_run_journal_writes_replayed_events_and_synthetic_terminal(monkey
monkeypatch.setattr(
routes,
"stale_interrupted_event",
- lambda session_id, run_id, after_seq=None: {
+ lambda session_id, run_id, after_seq=None, max_seq=None: {
"event": "apperror",
"payload": {"type": "interrupted"},
"event_id": f"{run_id}:2",
@@ -143,8 +367,9 @@ def test_replay_run_journal_honors_after_seq_cursor(monkeypatch):
},
)
- def fake_read_run_events(session_id, run_id, after_seq=None):
+ def fake_read_run_events(session_id, run_id, after_seq=None, max_seq=None):
captured["after_seq"] = after_seq
+ captured["max_seq"] = max_seq
return {
"events": [
{
@@ -159,27 +384,75 @@ def fake_read_run_events(session_id, run_id, after_seq=None):
assert routes._replay_run_journal(handler, "run_1", 3) is True
assert captured["after_seq"] == 3
+ assert captured["max_seq"] is None
body = handler.wfile.getvalue().decode("utf-8")
assert "id: run_1:4\n" in body
assert "event: done\n" in body
-def test_parse_run_journal_after_seq_is_run_aware():
+def test_active_stream_replay_keeps_items_for_new_run_with_same_seq_range(monkeypatch):
import api.routes as routes
- assert routes._parse_run_journal_after_seq(
- {"after_event_id": ["new-run:7"], "after_seq": ["7"]},
- "new-run",
- ) == 7
- assert routes._parse_run_journal_after_seq(
- {"after_event_id": ["old-run:7"], "after_seq": ["7"]},
- "new-run",
- ) is None
- assert routes._parse_run_journal_after_seq({"after_seq": ["3"]}, "new-run") == 3
-
-
-def test_frontend_sends_run_aware_replay_cursor():
- messages_src = (ROOT / "static" / "messages.js").read_text(encoding="utf-8")
- assert "let _lastRunJournalEventId=''" in messages_src
- assert "_lastRunJournalEventId=raw" in messages_src
- assert "after_event_id=${encodeURIComponent(_lastRunJournalEventId||'')}" in messages_src
+ class FakeStream:
+ def __init__(self):
+ self.q = queue.Queue()
+ self.q.put_nowait(("token", {"text": "fresh"}, "run_new:1"))
+ self.q.put_nowait(("stream_end", {}, "run_new:2"))
+ self.unsubscribed = False
+
+ def subscribe_with_snapshot(self):
+ return self.q, {
+ "last_event_id": "run_old:3",
+ "offline_buffered_events": 2,
+ }
+
+ def unsubscribe(self, q):
+ self.unsubscribed = q is self.q
+
+ class Handler:
+ def __init__(self):
+ self.wfile = io.BytesIO()
+
+ def send_response(self, _code):
+ pass
+
+ def send_header(self, _name, _value):
+ pass
+
+ def end_headers(self):
+ pass
+
+ handler = Handler()
+ stream = FakeStream()
+ monkeypatch.setattr(
+ routes,
+ "find_run_summary",
+ lambda stream_id: {
+ "session_id": "session_2",
+ "run_id": stream_id,
+ "terminal": False,
+ },
+ )
+ monkeypatch.setattr(
+ routes,
+ "read_run_events",
+ lambda session_id, run_id, after_seq=None, max_seq=None: {"events": []},
+ )
+ monkeypatch.setattr(routes, "stale_interrupted_event", lambda *_args, **_kwargs: None)
+ previous_streams = dict(routes.STREAMS)
+ routes.STREAMS.clear()
+ routes.STREAMS["run_new"] = stream
+ try:
+ routes._handle_sse_stream(
+ handler,
+ urlparse("/api/chat/stream?stream_id=run_new&replay=1&after_seq=0"),
+ )
+ finally:
+ routes.STREAMS.clear()
+ routes.STREAMS.update(previous_streams)
+
+ body = handler.wfile.getvalue().decode("utf-8")
+ assert "id: run_new:1\n" in body
+ assert "id: run_new:2\n" in body
+ assert body.count("id: run_new:1\n") == 1
+ assert stream.unsubscribed is True
diff --git a/tests/test_run_journal_streaming_static.py b/tests/test_run_journal_streaming_static.py
index 90b64cca0c..ea455a13c8 100644
--- a/tests/test_run_journal_streaming_static.py
+++ b/tests/test_run_journal_streaming_static.py
@@ -15,14 +15,9 @@ def test_streaming_journals_sse_events_before_queue_delivery():
src = Path("api/streaming.py").read_text(encoding="utf-8")
put_idx = src.index("def put(event, data):")
journal_idx = src.index("run_journal.append_sse_event(event, data)", put_idx)
- # Stage-364 maintainer fix: put() now pushes 3-tuples (event, data, event_id)
- # so the SSE consumer can emit `id:` on live frames. Accept either shape
- # so this test survives both the v0.51.71 in-flight fix and a future revert.
- try:
- queue_idx = src.index("q.put_nowait((event, data, event_id))", put_idx)
- except ValueError:
- queue_idx = src.index("q.put_nowait((event, data))", put_idx)
+ queue_idx = src.index("q.put_nowait(queue_item)", put_idx)
block = src[put_idx:queue_idx]
assert put_idx < journal_idx < queue_idx
assert "Failed to append run journal event" in block
+ assert "queue_item = (event, data, event_id) if event_id and hasattr(q, \"subscribe_with_snapshot\") else (event, data)" in block
diff --git a/tests/test_session_attention_badges.py b/tests/test_session_attention_badges.py
index a2c75272cc..3772de013d 100644
--- a/tests/test_session_attention_badges.py
+++ b/tests/test_session_attention_badges.py
@@ -148,6 +148,13 @@ def test_session_sidebar_renders_attention_badge_and_semantic_classes():
assert ".session-item.attention-clarify" in style_css
# The text-badge styles were removed; the dot now carries the color.
assert ".session-attention-badge" not in style_css
+ assert "is-attention-clarify" in sessions_js, (
+ "renderSessionList must tag the state indicator with is-attention-clarify."
+ )
assert ".session-state-indicator.is-attention-approval" in style_css
assert ".session-state-indicator.is-attention-clarify" in style_css
+ assert ".session-state-indicator.is-attention-generic{visibility:visible;}" in style_css
+ assert ".session-state-indicator.is-attention-approval{color:var(--error);}" in style_css
+ assert ".session-state-indicator.is-attention-clarify{color:var(--warning);}" in style_css
+ assert ".session-state-indicator.is-attention-generic{color:var(--warning);}" in style_css
assert "prefers-reduced-motion" in style_css
diff --git a/tests/test_session_rotate_url_sync.py b/tests/test_session_rotate_url_sync.py
index a275f97814..9c0736ec0b 100644
--- a/tests/test_session_rotate_url_sync.py
+++ b/tests/test_session_rotate_url_sync.py
@@ -1,5 +1,6 @@
"""Regression tests for session id rotation URL sync."""
from pathlib import Path
+import re
REPO_ROOT = Path(__file__).parent.parent.resolve()
MESSAGES_JS = (REPO_ROOT / "static" / "messages.js").read_text(encoding="utf-8")
@@ -9,16 +10,20 @@ def test_stream_completion_syncs_rotated_session_id_to_tab_state():
"""When compact/restore returns a new session id, the tab anchor follows it."""
# #3018 inserted a carry-forward of ephemeral per-turn fields into both the
# completion (_finishDone) and settled-restore assignments; match the new shapes.
- completion_marker = "S.session=d.session;S.messages=_carryForwardEphemeralTurnFields(S.messages||[], d.session.messages||[]);"
+ completion_marker = re.compile(
+ r"S\.session=d\.session;\s*"
+ r"S\.messages=_carryForwardEphemeralTurnFields\(S\.messages\|\|\[\], d\.session\.messages\|\|\[\]\);"
+ )
settled_marker = "S.session=session;\n const _nextMsgs3018=(session.messages||[]).filter(m=>m&&m.role);"
- completion_pos = MESSAGES_JS.find(completion_marker)
+ completion_match = completion_marker.search(MESSAGES_JS)
+ completion_pos = completion_match.start() if completion_match else -1
settled_pos = MESSAGES_JS.find(settled_marker)
assert completion_pos != -1
assert settled_pos != -1
- completion_block = MESSAGES_JS[completion_pos : completion_pos + 900]
- settled_block = MESSAGES_JS[settled_pos : settled_pos + 900]
+ completion_block = MESSAGES_JS[completion_pos : completion_pos + 800]
+ settled_block = MESSAGES_JS[settled_pos : settled_pos + 800]
for block in (completion_block, settled_block):
assert "localStorage.setItem('hermes-webui-session',S.session.session_id);" in block
diff --git a/tests/test_sprint42.py b/tests/test_sprint42.py
index 8e37aded77..b2d6381e0d 100644
--- a/tests/test_sprint42.py
+++ b/tests/test_sprint42.py
@@ -403,12 +403,39 @@ def path(self):
self.assertTrue(callable(init_kwargs["interim_assistant_callback"]))
self.assertIn("WebUI progress guidance", captured["agent"].ephemeral_system_prompt)
self.assertIn("Match the normal Hermes messaging style", captured["agent"].ephemeral_system_prompt)
- self.assertIn("user-visible progress updates", captured["agent"].ephemeral_system_prompt)
+ self.assertIn(
+ "do not let long tool-running WebUI turns appear silent",
+ captured["agent"].ephemeral_system_prompt,
+ )
+ self.assertIn(
+ "emit brief user-visible progress updates as normal assistant content",
+ captured["agent"].ephemeral_system_prompt,
+ )
+ self.assertIn(
+ "Before the first tool batch in a long task",
+ captured["agent"].ephemeral_system_prompt,
+ )
+ self.assertIn(
+ "Do not run many independent tool batches back-to-back without visible assistant text between them",
+ captured["agent"].ephemeral_system_prompt,
+ )
+ self.assertIn(
+ "Do not keep progress only in reasoning, thinking, or tool-result channels",
+ captured["agent"].ephemeral_system_prompt,
+ )
+ self.assertNotIn(
+ "you may provide brief user-visible progress updates",
+ captured["agent"].ephemeral_system_prompt,
+ )
interim_events = []
while not fake_queue.empty():
try:
- interim_events.append(fake_queue.get_nowait())
+ item = fake_queue.get_nowait()
+ if isinstance(item, tuple) and len(item) >= 2:
+ interim_events.append((item[0], item[1]))
+ else:
+ interim_events.append(item)
except queue.Empty:
break
self.assertTrue(
@@ -724,11 +751,11 @@ def test_done_handler_patches_reasoning_field():
src = (REPO / 'static' / 'messages.js').read_text()
# The persistence comment must be present inside the done handler
- assert "Persist reasoning trace so thinking card survives page reload" in src, \
+ assert "Persist reasoning trace for Worklog Thinking Cards" in src, \
"Reasoning persistence comment not found in messages.js done handler"
# The guard and assignment must be present
- assert "if(reasoningText){" in src, \
+ assert "if(reasoningText&&lastAsst&&!lastAsst.reasoning)" in src, \
"reasoningText guard not found in messages.js"
assert "lastAsst.reasoning=reasoningText" in src, \
@@ -736,7 +763,7 @@ def test_done_handler_patches_reasoning_field():
# Verify the patch is inside the done handler (after 'source.addEventListener' for done)
done_handler_idx = src.index("source.addEventListener('done'")
- persist_idx = src.index("Persist reasoning trace so thinking card survives page reload")
+ persist_idx = src.index("Persist reasoning trace for Worklog Thinking Cards")
assert done_handler_idx < persist_idx, \
"Reasoning persistence patch must be inside the done SSE handler"
@@ -745,21 +772,21 @@ def test_done_handler_patches_reasoning_field():
"Guard '!lastAsst.reasoning' missing — would overwrite server-persisted reasoning"
-def test_rendermessages_reads_reasoning_from_messages():
- """ui.js renderMessages must read m.reasoning to display the thinking card."""
+def test_rendermessages_keeps_reasoning_metadata_out_of_worklog_display():
+ """ui.js renderMessages must not promote provider reasoning metadata into Worklog prose."""
src = (REPO / 'static' / 'ui.js').read_text()
- # m.reasoning must be read in the render path
- assert 'm.reasoning' in src, \
- "m.reasoning not referenced in ui.js — thinking card won't render on reload"
+ sig_fn = src.split("function _messageHasReasoningPayload(m)", 1)[1].split("function", 1)[0]
+ assert 'm.reasoning' in sig_fn, \
+ "m.reasoning should remain part of metadata/cache signature handling"
- # The thinking card rendering block must also be present
+ # Legacy thinking-card helpers may still exist for explicit debug surfaces.
assert 'thinking-card' in src, \
"thinking-card CSS class not found in ui.js"
- # Specifically, the fallback that reads from top-level m.reasoning field
- assert 'thinkingText=m.reasoning' in src.replace(' ', ''), \
- "thinkingText=m.reasoning assignment not found in ui.js renderMessages"
+ extraction = src.split("let thinkingText='';", 1)[1].split("const isUser=m.role==='user';", 1)[0]
+ assert 'm.reasoning' not in extraction
+ assert 'm.reasoning_content' not in extraction
def test_streaming_restores_prior_reasoning_metadata_after_followup():
diff --git a/tests/test_stage364_opus_live_sse_event_id.py b/tests/test_stage364_opus_live_sse_event_id.py
index 855212124d..eb36504c3e 100644
--- a/tests/test_stage364_opus_live_sse_event_id.py
+++ b/tests/test_stage364_opus_live_sse_event_id.py
@@ -1,4 +1,4 @@
-"""Regression test for stage-364 Opus-caught SHOULD-FIX (side-channel approach):
+"""Regression test for stage-364 Opus-caught SHOULD-FIX (per-frame cursor):
When the live SSE stream errors mid-stream and the frontend falls back to
journal replay, live frames must carry an `id:` field so the frontend's
@@ -7,19 +7,17 @@
seq 1, double-rendering tokens against the live-phase `assistantText`
accumulator.
-Implementation (stage-364 — side-channel approach to avoid breaking the
-queue tuple contract used by 4 existing tests):
+Implementation:
- api/config.py adds `STREAM_LAST_EVENT_ID: dict = {}` module-level dict.
- api/streaming.py `put()` captures `journaled["event_id"]` from
`RunJournalWriter.append_sse_event()` return and writes it to
`STREAM_LAST_EVENT_ID[stream_id]`.
- - api/routes.py `_handle_sse_stream` reads `STREAM_LAST_EVENT_ID[stream_id]`
- at SSE emit time and uses `_sse_with_id` when set.
+ - StreamChannel queue items carry `(event, data, event_id)` so active
+ subscribers emit each frame with its own id instead of the latest global id.
+ - Legacy plain queues keep `(event, data)` and use `STREAM_LAST_EVENT_ID` as a
+ compatibility fallback.
- api/streaming.py finally-block cleanup pops STREAM_LAST_EVENT_ID.
-
-The queue tuple shape is preserved as (event, data), so existing tests like
-test_cancel_puts_sentinel_in_queue still work.
"""
from pathlib import Path
@@ -28,6 +26,7 @@
STREAMING_PY = (REPO_ROOT / "api" / "streaming.py").read_text(encoding="utf-8")
ROUTES_PY = (REPO_ROOT / "api" / "routes.py").read_text(encoding="utf-8")
CONFIG_PY = (REPO_ROOT / "api" / "config.py").read_text(encoding="utf-8")
+GATEWAY_CHAT_PY = (REPO_ROOT / "api" / "gateway_chat.py").read_text(encoding="utf-8")
def test_stream_last_event_id_dict_exists_in_config():
@@ -54,16 +53,27 @@ def test_put_writes_event_id_to_side_channel_dict():
)
-def test_queue_tuple_shape_preserved_as_two_tuple():
- """The queue still uses 2-tuples (event, data) so existing consumers
- that unpack `event, data = q.get()` are not broken."""
+def test_stream_channel_queue_item_carries_per_event_id_with_legacy_fallback():
+ """StreamChannel queue items need per-frame ids; legacy queues stay 2-tuples."""
put_def_idx = STREAMING_PY.find("def put(event, data):")
put_body = STREAMING_PY[put_def_idx:put_def_idx + 2500]
- assert "q.put_nowait((event, data))" in put_body, (
- "Queue tuple shape must remain (event, data) — changing to 3-tuple "
- "breaks 4 existing tests in test_cancel_interrupt, test_sprint42, "
- "test_sprint51, test_issue1857_usage_overwrite"
+ assert 'queue_item = (event, data, event_id) if event_id and hasattr(q, "subscribe_with_snapshot") else (event, data)' in put_body, (
+ "StreamChannel events must carry their own event_id while legacy queue "
+ "consumers retain the 2-tuple shape"
+ )
+ assert "q.put_nowait(queue_item)" in put_body
+
+
+def test_gateway_queue_item_carries_per_event_id_with_legacy_fallback():
+ """Gateway-backed WebUI chat must preserve the same live cursor invariant."""
+ put_def_idx = GATEWAY_CHAT_PY.find("def put_gateway_event(event, data):")
+ assert put_def_idx != -1, "put_gateway_event(event, data) not found"
+ put_body = GATEWAY_CHAT_PY[put_def_idx:put_def_idx + 1800]
+ assert 'queue_item = (event, data, event_id) if event_id and hasattr(q, "subscribe_with_snapshot") else (event, data)' in put_body, (
+ "Gateway live events must carry their own event_id for StreamChannel "
+ "subscribers while preserving legacy queue compatibility"
)
+ assert "q.put_nowait(queue_item)" in put_body
def test_sse_handler_reads_event_id_from_side_channel():
diff --git a/tests/test_stale_stream_cleanup.py b/tests/test_stale_stream_cleanup.py
index c66098e72e..de0f37e404 100644
--- a/tests/test_stale_stream_cleanup.py
+++ b/tests/test_stale_stream_cleanup.py
@@ -81,6 +81,80 @@ def test_chat_start_clears_stale_pending_state_not_only_active_id():
assert stale_comment_pos < cleanup_pos < stream_id_pos
+def test_chat_start_rechecks_active_stream_under_session_lock(monkeypatch, tmp_path):
+ """A concurrent chat_start must not overwrite stream ownership.
+
+ The first request can pass the pre-lock active_stream_id check while another
+ request is waiting/running. Once this request enters the session lock, it
+ must re-read active_stream_id and reject instead of creating a ghost stream.
+ """
+ config.STREAMS.clear()
+ config.SESSION_AGENT_LOCKS.clear()
+ existing_stream_id = "already-running-stream"
+
+ class ChatStartSession:
+ session_id = "duplicate-start-session"
+
+ def __init__(self):
+ self.active_stream_id = None
+ self.pending_user_message = None
+ self.pending_attachments = []
+ self.pending_started_at = None
+ self.messages = []
+ self.title = "Untitled"
+ self.worktree_path = None
+ self.workspace = None
+ self.model = None
+ self.model_provider = None
+
+ def save(self, *args, **kwargs):
+ return None
+
+ session = ChatStartSession()
+
+ class MutatingSessionLock:
+ def __enter__(self):
+ session.active_stream_id = existing_stream_id
+ session.pending_user_message = "prompt already claimed by another start"
+ session.pending_started_at = 123.0
+ routes.STREAMS[existing_stream_id] = queue.Queue()
+ return self
+
+ def __exit__(self, exc_type, exc, tb):
+ return False
+
+ class NoopThread:
+ def __init__(self, *args, **kwargs):
+ self.args = args
+ self.kwargs = kwargs
+
+ def start(self):
+ return None
+
+ monkeypatch.setattr(routes, "_get_session_agent_lock", lambda sid: MutatingSessionLock())
+ monkeypatch.setattr(routes.uuid, "uuid4", lambda: type("FakeUuid", (), {"hex": "new-stream"})())
+ monkeypatch.setattr(routes, "set_last_workspace", lambda workspace: None)
+ monkeypatch.setattr(routes, "create_stream_channel", lambda: queue.Queue())
+ monkeypatch.setattr(routes.threading, "Thread", NoopThread)
+
+ try:
+ response = routes._start_chat_stream_for_session(
+ session,
+ msg="please start once",
+ attachments=[],
+ workspace=str(tmp_path),
+ model="test-model",
+ model_provider=None,
+ )
+
+ assert response["_status"] == 409
+ assert response["active_stream_id"] == existing_stream_id
+ assert session.active_stream_id == existing_stream_id
+ assert "new-stream" not in routes.STREAMS
+ finally:
+ routes.STREAMS.pop(existing_stream_id, None)
+
+
def test_stale_stream_cleanup_does_not_clobber_concurrent_chat_start(monkeypatch):
"""Regression for #1533: stale cleanup must not erase a new stream id.
diff --git a/tests/test_streaming_race_fix.py b/tests/test_streaming_race_fix.py
index 36aed1f807..cfaea91c26 100644
--- a/tests/test_streaming_race_fix.py
+++ b/tests/test_streaming_race_fix.py
@@ -51,7 +51,9 @@ def test_schedule_render_guards_on_stream_finalized(self):
def test_raf_handle_stored_in_schedule_render(self):
src = read('static/messages.js')
- assert '_pendingRafHandle=requestAnimationFrame' in src or \
+ assert '_pendingRafHandle=_pendingRafFrameHandle' in src or \
+ '_pendingRafHandle = _pendingRafFrameHandle' in src or \
+ '_pendingRafHandle=requestAnimationFrame' in src or \
'_pendingRafHandle = requestAnimationFrame' in src, (
"rAF handle must be stored in _pendingRafHandle for cancellation"
)
diff --git a/tests/test_tars_scroll_reset_regressions.py b/tests/test_tars_scroll_reset_regressions.py
index 9d5fa79592..ba4543b2c7 100644
--- a/tests/test_tars_scroll_reset_regressions.py
+++ b/tests/test_tars_scroll_reset_regressions.py
@@ -78,17 +78,21 @@ def test_message_scroll_listener_does_not_downgrade_explicit_bottom_pin_on_first
def test_user_scroll_cancels_delayed_bottom_settling():
listener_block = _scroll_listener_block()
record = _function_body(UI_JS, "function _recordNonMessageScrollIntent")
+ pinned = _function_body(UI_JS, "function scrollIfPinned")
assert "function _cancelBottomSettle" in UI_JS
assert "_cancelBottomSettle();" in listener_block
assert "e.deltaY<0" in record
assert "_cancelBottomSettle();" in record
assert "_scrollPinned=false" in record
+ assert "if(_messageUserUnpinned) return;" in pinned
+ assert "_recentMessageUpwardIntent()" not in pinned
def test_preserve_scroll_restores_unpinned_viewport_after_dom_rebuild():
render = _function_body(UI_JS, "function renderMessages")
after_render = _function_body(UI_JS, "function _scrollAfterMessageRender")
+ follow = _function_body(UI_JS, "function _followMessagesAfterDomReplace")
restore = _function_body(UI_JS, "function _restoreMessageScrollSnapshot")
snapshot_idx = render.index("const scrollSnapshot=preserveScroll?_captureMessageScrollSnapshot():null")
@@ -99,7 +103,9 @@ def test_preserve_scroll_restores_unpinned_viewport_after_dom_rebuild():
"renderMessages({preserveScroll:true}) must capture #messages.scrollTop before "
"replacing transcript DOM, then pass that snapshot to the post-render scroll helper"
)
- assert "if(_scrollPinned) scrollIfPinned()" in after_render
- assert "else _restoreMessageScrollSnapshot(scrollSnapshot)" in after_render
+ assert "if(_followMessagesAfterDomReplace()) return;" in after_render
+ assert "_restoreMessageScrollSnapshot(scrollSnapshot)" in after_render
+ assert "_shouldFollowMessagesOnDomReplace()" in follow
+ assert "scrollToBottom();" in follow
assert "el.scrollTop=Math.max(0,Math.min(Number(snapshot.top)||0,maxTop))" in restore
assert "_programmaticScroll=true" in restore
diff --git a/tests/test_turn_duration_display.py b/tests/test_turn_duration_display.py
index 2bd1aef528..ae1ea6c342 100644
--- a/tests/test_turn_duration_display.py
+++ b/tests/test_turn_duration_display.py
@@ -53,10 +53,14 @@ def test_ui_formats_and_renders_turn_duration_in_footer_and_activity_summary():
"Compact tool activity summary should have a dedicated duration span at the end of the line."
)
assert "data-turn-duration" in UI_JS, (
- "Activity groups need a stable data-turn-duration hook so settled duration can update the summary."
+ "The spec Activity summary needs a stable data-turn-duration hook so settled duration can update its summary."
)
- assert "compactActivityForMessage" in UI_JS, (
- "When compact activity is present, duration should live on the Activity row "
+ assert "turnDuration:includeTurnDuration?_turnDurationForAnchor(anchorRow):undefined" in UI_JS, (
+ "Settled compact activity should put turn duration on the first spec Activity row, "
+ "not resurrect the legacy top Run Activity."
+ )
+ assert "compactWorklogForMessage" in UI_JS, (
+ "When folded Worklog detail is present, duration should live on the Worklog row "
"instead of being duplicated in the assistant footer."
)
assert ".msg-duration-inline" in CSS and ".tool-call-group-duration" in CSS, (
@@ -73,6 +77,11 @@ def test_active_compact_activity_elapsed_timer_uses_persisted_start_time():
"send() should copy chat-start pending_started_at into S.session before "
"attaching the live stream."
)
+ assert "showLiveRunStatus(activeSid,{startedAt:_startedAt});" in MESSAGES_JS, (
+ "The first chat-start path should show the bottom live footer timer as soon "
+ "as stream_id and pending_started_at are known; reconnect should not be the "
+ "only path that restores it."
+ )
assert "function _formatActiveElapsedTimer" in UI_JS and "padStart(2,'0')" in UI_JS, (
"ui.js should format the running timer in MM:SS form."
)
@@ -88,3 +97,18 @@ def test_active_compact_activity_elapsed_timer_uses_persisted_start_time():
"The active elapsed label should tick while running and clear its interval "
"on terminal/error/session-switch cleanup paths."
)
+
+
+def test_live_footer_timer_is_re_synced_after_message_rerender():
+ assert "function _syncLiveRunStatusAfterRender()" in UI_JS, (
+ "renderMessages() needs a dedicated helper so the live footer timer "
+ "can be restored after DOM rebuilds."
+ )
+ assert "_syncLiveRunStatusAfterRender();" in UI_JS, (
+ "renderMessages() should call the live-status sync helper after it "
+ "rebuilds msgInner."
+ )
+ assert "showLiveRunStatus(sid,{startedAt,tokens:_liveRunStatusTokens});" in UI_JS, (
+ "If the timer node was torn down during a rerender, the helper should "
+ "recreate it for the active session."
+ )
diff --git a/tests/test_ui_card_animation.py b/tests/test_ui_card_animation.py
index 3a84f4aca0..1cbbd9e77b 100644
--- a/tests/test_ui_card_animation.py
+++ b/tests/test_ui_card_animation.py
@@ -8,20 +8,20 @@
def test_tool_card_toggle_uses_transformable_layout_and_transition():
- assert ".tool-card-toggle{" in COMPACT_CSS
+ assert ".tool-card-toggle,.tl-caret{" in COMPACT_CSS
assert "display:inline-flex" in COMPACT_CSS
assert "transition:transform.18sease" in COMPACT_CSS
def test_tool_card_detail_uses_transitionable_collapsed_state():
- assert ".tool-card-detail{display:block;max-height:0;opacity:0;overflow:hidden;" in COMPACT_CSS
+ assert ".tool-card-detail,.tl-detail{display:block;max-height:0;opacity:0;overflow:hidden;" in COMPACT_CSS
assert re.search(
- r"\.tool-card\.open\s+\.tool-card-detail\s*\{[^}]*max-height:\s*600px;[^}]*opacity:\s*1;",
+ r"\.tool-card\.open\s+\.tool-card-detail,\s*\.tl\.open\s+\.tl-detail\s*\{[^}]*max-height:\s*320px;[^}]*opacity:\s*1;",
STYLE_CSS,
)
# Open state must set overflow to auto so the inner
scroll is not clipped (#1170).
assert re.search(
- r"\.tool-card\.open\s+\.tool-card-detail\s*\{[^}]*overflow:\s*auto;",
+ r"\.tool-card\.open\s+\.tool-card-detail,\s*\.tl\.open\s+\.tl-detail\s*\{[^}]*overflow:\s*auto;",
STYLE_CSS,
)
diff --git a/tests/test_ui_tool_call_cleanup.py b/tests/test_ui_tool_call_cleanup.py
index 4d6c8d7364..20806879e6 100644
--- a/tests/test_ui_tool_call_cleanup.py
+++ b/tests/test_ui_tool_call_cleanup.py
@@ -4,8 +4,10 @@
source files, isolate the relevant function/rule, and assert implementation
invariants before changing the UI.
"""
+import json
import pathlib
import re
+import subprocess
REPO = pathlib.Path(__file__).parent.parent
UI_JS = (REPO / "static" / "ui.js").read_text(encoding="utf-8")
@@ -70,6 +72,86 @@ def _function_body(src: str, name: str) -> str:
return src[brace + 1:i - 1]
+def _function_src(src: str, name: str) -> str:
+ match = re.search(rf"function\s+{re.escape(name)}\s*\(", src)
+ assert match, f"{name}() not found"
+ brace = src.find("{", match.end())
+ assert brace != -1, f"{name}() has no body"
+ depth = 1
+ i = brace + 1
+ in_string = None
+ escaped = False
+ in_line_comment = False
+ in_block_comment = False
+ while i < len(src) and depth:
+ ch = src[i]
+ nxt = src[i + 1] if i + 1 < len(src) else ""
+ if in_line_comment:
+ if ch == "\n":
+ in_line_comment = False
+ i += 1
+ continue
+ if in_block_comment:
+ if ch == "*" and nxt == "/":
+ in_block_comment = False
+ i += 2
+ continue
+ i += 1
+ continue
+ if in_string:
+ if escaped:
+ escaped = False
+ elif ch == "\\":
+ escaped = True
+ elif ch == in_string:
+ in_string = None
+ i += 1
+ continue
+ if ch == "/" and nxt == "/":
+ in_line_comment = True
+ i += 2
+ continue
+ if ch == "/" and nxt == "*":
+ in_block_comment = True
+ i += 2
+ continue
+ if ch in "'\"`":
+ in_string = ch
+ i += 1
+ continue
+ if ch == "{":
+ depth += 1
+ elif ch == "}":
+ depth -= 1
+ i += 1
+ assert depth == 0, f"{name}() body did not close"
+ return src[match.start():i]
+
+
+def _run_thinking_echo_helper(*args: str) -> str:
+ helpers = "\n".join(
+ _function_src(UI_JS, name)
+ for name in (
+ "_stripXmlToolCallsDisplay",
+ "_sanitizeThinkingDisplayText",
+ "_normalizeThinkingEchoCompare",
+ "_stripVisibleAssistantEchoFromThinking",
+ )
+ )
+ script = (
+ helpers
+ + "\nconst args=JSON.parse(process.argv[1]);"
+ + "\nprocess.stdout.write(JSON.stringify(_stripVisibleAssistantEchoFromThinking(...args)));"
+ )
+ out = subprocess.run(
+ ["node", "-e", script, json.dumps(list(args))],
+ check=True,
+ capture_output=True,
+ text=True,
+ ).stdout
+ return json.loads(out)
+
+
class TestToolCallGroupingStatic:
def test_simplified_tool_calling_setting_is_wired_through_frontend(self):
assert "settingsSimplifiedToolCalling" in (REPO / "static" / "index.html").read_text(encoding="utf-8"), (
@@ -99,11 +181,14 @@ def test_simplified_tool_calling_autosave_hot_applies_renderer_mode(self):
def test_render_messages_gates_settled_activity_grouping(self):
fn = _function_body(UI_JS, "renderMessages")
helper = _function_body(UI_JS, "ensureActivityGroup")
- assert "isSimplifiedToolCalling()" in fn, (
- "Settled compact inline activity rendering should be gated by the Compact tool activity toggle."
+ assert "byActivity = new Map()" in fn, (
+ "Settled tool rendering should bucket by worklog segments/bursts."
+ )
+ assert "_toolWorklogListEl(group)" in fn, (
+ "Settled tools should render through the worklog list container."
)
- assert "tool-cards-toggle" in fn, (
- "The non-simplified path should preserve the upstream loose tool-card controls."
+ assert "_syncToolCallGroupSummary(state.group)" in fn, (
+ "Settled worklog groups should refresh summary state."
)
assert "data-tool-call-group" in helper, (
"Tool-call groups need a stable data-tool-call-group attribute for CSS and tests."
@@ -156,16 +241,20 @@ def test_activity_summary_keeps_header_compact_without_tool_names_or_thinking_pr
def test_live_tool_cards_use_grouping_only_when_simplified(self):
live_fn = _function_body(UI_JS, "appendLiveToolCard")
settled_fn = _function_body(UI_JS, "renderMessages")
- assert "isSimplifiedToolCalling()" in live_fn, (
- "Live streaming tool cards should branch on the Compact tool activity timeline mode."
+ assert "isSimplifiedToolCalling()" not in live_fn, (
+ "Live streaming tool cards should no longer branch on compact/timeline mode."
)
- assert "ensureActivityGroup" in live_fn, (
- "Compact live tool rendering should use the grouped activity container."
+ assert "ensureLiveWorklogContainer" in live_fn, (
+ "Live tool rendering should use the direct Worklog container."
)
- assert "toolRunningRow" in live_fn, (
- "The non-simplified live tool path should preserve the upstream running-dots row."
+ assert "ensureActivityGroup" not in live_fn, (
+ "Live tool rendering must not show the settled L1 Activity summary while streaming."
)
- assert "buildToolCard" in live_fn and "buildToolCard" in settled_fn, (
+ assert "_toolWorklogListEl(group)" in live_fn, (
+ "Live tool cards should insert into the worklog list container."
+ )
+ step_fn = _function_body(UI_JS, "_appendWorklogStep")
+ assert "buildToolCard" in live_fn and "buildToolCard" in step_fn and "_appendWorklogStep" in settled_fn, (
"Live and settled tool rendering should share buildToolCard() for consistent markup."
)
assert "data-live-tid" in live_fn, (
@@ -201,18 +290,31 @@ def test_activity_disclosure_state_is_session_and_turn_scoped(self):
assert "live:" in live_fn + thinking_fn, (
"Live Activity groups should be keyed by active stream id."
)
- assert "_copyActivityDisclosureState('live:'+streamId, 'assistant:'" in done_fn, (
- "When a live turn settles, its saved disclosure state should transfer to the persisted assistant turn."
+ assert "_copyActivityDisclosureState('live:'+streamId, 'assistant:'" not in done_fn, (
+ "Live disclosure state must not transfer to the final assistant turn; final L1 starts collapsed."
)
- def test_live_tool_activity_defaults_collapsed_unless_saved_open(self):
+ def test_live_tool_worklog_is_direct_until_settled(self):
live_fn = _function_body(UI_JS, "appendLiveToolCard")
+ live_container = _function_body(UI_JS, "ensureLiveWorklogContainer")
helper = _function_body(UI_JS, "ensureActivityGroup")
- assert "collapsed:false" not in re.sub(r"\s+", "", live_fn), (
- "Compact live tool activity should not force-open every time a chat is revisited."
+ assert "ensureLiveWorklogContainer" in live_fn, (
+ "Live tool events should append into the direct Worklog timeline."
+ )
+ assert "tool-worklog-list" in live_container and "data-live-worklog-shell" in live_container, (
+ "The direct live Worklog shell should own the L2 list without an L1 summary row."
+ )
+ assert "activity-summary" not in live_container and "tool-call-group-summary" not in live_container, (
+ "The settled Activity summary should not be present while the stream is running."
)
assert "savedState==='open'" in helper or 'savedState==="open"' in helper, (
- "A previously-open Activity group should still restore open from persisted state."
+ "Live Activity groups can still restore explicit live open state."
+ )
+ assert "if(live && savedState==='open')" in helper or 'if(live && savedState==="open")' in helper, (
+ "Saved open state must be scoped to live groups so final L1 defaults collapsed."
+ )
+ assert "savedState==='closed'" in helper or 'savedState==="closed"' in helper, (
+ "A saved closed Activity group should still override the live expanded default."
)
def test_live_activity_summary_shows_readable_progress_without_persisted_content(self):
@@ -234,30 +336,103 @@ def test_live_activity_summary_shows_readable_progress_without_persisted_content
"Readable progress must not reintroduce the noisy secondary tool-name list."
)
- def test_live_thinking_suppresses_visible_interim_echoes(self):
+ def test_terminal_worklog_titles_summarize_common_diagnostic_commands(self):
+ start = UI_JS.find("function _toolCommandTitle")
+ end = UI_JS.find("function _toolQueryTitle", start)
+ assert start != -1 and end != -1, "_toolCommandTitle() source window not found"
+ command_fn = UI_JS[start:end]
+ assert "git fetch" in command_fn and "git ahead/behind" in command_fn, (
+ "Terminal Worklog rows should distinguish common git audit commands "
+ "instead of falling back to the generic 'command' title."
+ )
+ assert "git log" in command_fn, (
+ "Commit/PR audit commands should show a git log title instead of "
+ "the generic command fallback."
+ )
+ assert "health check" in command_fn, (
+ "curl localhost /health checks should get a readable L2 title."
+ )
+ assert "process check" in command_fn and "port ${m[1]} check" in command_fn, (
+ "ps/grep and lsof diagnostics should be scannable in L2 while full "
+ "commands remain in L3 detail."
+ )
+ assert "launchctl" in command_fn, (
+ "launchd service checks should keep their service intent visible in "
+ "the Worklog row title."
+ )
+ assert "return _shortToolLabel(normalized,72);" in command_fn, (
+ "Long shell diagnostics should still expose a short L2 command "
+ "summary instead of falling back to the bare 'command' title."
+ )
+
+ def test_live_thinking_does_not_rewrite_visible_interim_echoes(self):
interim_match = re.search(r"source\.addEventListener\('interim_assistant',e=>\{(.*?)\n\s*\}\);", MESSAGES_JS, re.S)
assert interim_match, "interim_assistant listener not found"
interim_fn = interim_match.group(1)
live_thinking_fn = _function_body(MESSAGES_JS, "_liveThinkingText")
assert "visibleInterimSnippets.push(visible)" in interim_fn, (
- "Visible interim commentary should be remembered so the live Thinking card does not echo it."
+ "Visible interim commentary should remain available for process-prose boundaries."
+ )
+ assert "_stripLiveVisibleAssistantEchoFromThinking" not in live_thinking_fn, (
+ "Live Thinking should not run content-level echo suppression; the card is already low-priority Worklog detail."
+ )
+ assert "String(liveReasoningText||'').trim()" in live_thinking_fn, (
+ "Live Thinking should render the provider reasoning text as-is after normal trimming."
)
- assert "_stripLiveVisibleAssistantEchoFromThinking" in live_thinking_fn, (
- "Live Thinking text should suppress exact visible interim commentary echoes."
+
+ def test_settled_exact_duplicate_thinking_suppressed(self):
+ assert _run_thinking_echo_helper(
+ " I will check the PR status.\nThen inspect the diff. ",
+ "I will check the PR status. Then inspect the diff.",
+ "The final answer is different.",
+ ) == "", (
+ "Settled Thinking should be suppressed when normalized text exactly "
+ "matches visible process prose."
+ )
+
+ def test_genuine_reasoning_preserved_when_not_exact(self):
+ reasoning = "I need to inspect the stream state before deciding."
+ assert _run_thinking_echo_helper(
+ reasoning,
+ "I need to inspect the stream state.",
+ "The stream was running.",
+ ) == reasoning, (
+ "Non-exact reasoning should stay available as a Worklog Thinking Card."
)
+ helper = _function_body(UI_JS, "_stripVisibleAssistantEchoFromThinking")
+ assert ".split(snippet).join('')" not in helper
+ assert ".includes(" not in helper
+
+ def test_reasoning_first_interim_later_does_not_duplicate_settled_worklog(self):
+ render_fn = _function_body(UI_JS, "renderMessages")
+ helper = _function_body(UI_JS, "_worklogReasoningTextFromMessage")
+ assert "assistantTurnFinalVisibleContentByRawIdx" in render_fn, (
+ "renderMessages must compute current assistant-turn final text so "
+ "reasoning-first/interim-later turns can be compared at settlement."
+ )
+ assert "assistantTurnVisibleContentByRawIdx" in render_fn, (
+ "If done-time reasoning is attached to the final assistant message, "
+ "settlement must still compare against earlier visible process prose "
+ "from the same assistant turn."
+ )
+ assert "_worklogReasoningTextFromMessage(m, rawIdx, toolCallAssistantIdxs, displayContent, turnFinalVisibleContent, turnVisibleContents)" in render_fn
+ assert "_stripVisibleAssistantEchoFromThinking(thinkingText, visibleContent, turnFinalVisibleContent, ...visibleTexts)" in helper
+ assert _run_thinking_echo_helper(
+ "I am checking the 3401 review blocker.",
+ "I am checking the 3401 review blocker.",
+ "Conclusion: Thinking dedupe needs a small fix.",
+ ) == ""
- def test_settled_thinking_suppresses_visible_assistant_echoes(self):
+ def test_settled_thinking_uses_exact_dedupe_not_live_rewrite(self):
render_fn = _function_body(UI_JS, "renderMessages")
helper = _function_body(UI_JS, "_stripVisibleAssistantEchoFromThinking")
- assert "_stripVisibleAssistantEchoFromThinking(thinkingText, displayContent)" in render_fn, (
- "Settled Thinking cards should not repeat text already rendered as visible assistant content."
+ assert "_stripVisibleAssistantEchoFromThinking(thinkingText, displayContent)" not in render_fn, (
+ "Settled Thinking dedupe needs process prose plus turn-final answer, "
+ "not the old single visible-text input."
)
- assert "s.length>=20" in helper, (
- "Thinking echo suppression should ignore tiny snippets to avoid over-stripping reasoning."
- )
- assert "out.split(snippet).join('')" in helper, (
- "Thinking echo suppression should remove exact visible assistant snippets from reasoning display."
+ assert "_normalizeThinkingEchoCompare" in helper and "visibleNorm===thinkingNorm" in helper, (
+ "Settled Thinking dedupe must be exact / normalized-exact only."
)
def test_compact_activity_keeps_thinking_cards_after_session_switch(self):
@@ -270,20 +445,26 @@ def test_compact_activity_keeps_thinking_cards_after_session_switch(self):
)
render_fn = _function_body(UI_JS, "renderMessages")
assert "isSimplifiedToolCalling()" in render_fn and "assistantThinking.set(rawIdx, thinkingText)" in render_fn, (
- "Compact settled transcript rendering should preserve Thinking cards after switching sessions."
+ "Compact settled transcript rendering should keep reasoning metadata available without promoting it to visible prose."
+ )
+ helper = _function_body(UI_JS, "_worklogReasoningTextFromMessage")
+ assert "_assistantReasoningPayloadText(m)" in helper and "_stripVisibleAssistantEchoFromThinking" in helper, (
+ "Provider reasoning metadata should feed a sanitized Worklog Thinking Card "
+ "after settled exact-duplicate suppression."
+ )
+ assert "data-worklog-thinking-card" in UI_JS, (
+ "Thinking should be an explicit Worklog item, independent from Tool Cards."
)
- # #3709: the Activity disclosure now renders the TURN's merged thinking
- # (mergedThinking — all of a turn's thinking de-duped, incl. a suppressed
- # thinking-only sibling) rather than a single message's entry. Same node,
- # same Activity body — only the source variable changed.
- assert "_thinkingActivityNode(mergedThinking, false)" in render_fn, (
- "Settled Thinking cards should render inside the compact Activity disclosure."
+ render_min = re.sub(r"\s+", "", render_fn)
+ assert "thinkingKey:thinkingText?`thinking:${_normalizeThinkingEchoCompare(thinkingText)}`:''" in render_min, (
+ "Settled Worklog should key Thinking Cards by normalized content so exact duplicate "
+ "Thinking from sibling messages does not render twice."
)
- assert "body.appendChild(_thinkingActivityNode(mergedThinking, false))" in render_fn, (
- "Settled Thinking cards should stay inside the same Activity body as the related tools."
+ assert "_appendWorklogStep" in render_fn, (
+ "Visible assistant anchors, Thinking Cards, and tools should still build the compact Worklog disclosure."
)
- assert ".agent-activity-thinking:not([data-live-thinking=\"1\"])" in render_fn, (
- "Settled rerenders must remove previously inserted Thinking activity rows before rebuilding."
+ assert ".wl-reason[data-worklog-reason-source=\"reasoning\"]" in render_fn, (
+ "Settled rerenders must remove previously inserted reasoning Worklog rows before rebuilding."
)
assert "seg.insertAdjacentHTML('beforeend', _thinkingCardHtml(thinkingText))" in render_fn, (
"The non-simplified path should preserve standalone settled thinking cards."
@@ -293,17 +474,17 @@ def test_live_visible_interim_text_preserves_timeline_boundary(self):
live_thinking_fn = _function_body(UI_JS, "appendThinking")
live_tool_fn = _function_body(UI_JS, "appendLiveToolCard")
helper = _function_body(UI_JS, "ensureActivityGroup")
- assert "isSimplifiedToolCalling()" in live_thinking_fn, (
- "Live thinking should branch on the Compact tool activity toggle."
+ assert "_worklogReasonNodeFromText(thinkingText" not in live_thinking_fn, (
+ "Provider reasoning should not render as live Worklog process prose."
)
- assert "_thinkingActivityNode(thinkingText, false)" in live_thinking_fn, (
- "Compact live thinking should render inside the Activity disclosure."
+ assert "_thinkingActivityNode(clean, false)" in live_thinking_fn and "data-live-thinking" in live_thinking_fn, (
+ "Live provider thinking should render as a collapsed Worklog Thinking Card."
)
- assert "ensureActivityGroup(blocks,{live:true" in live_thinking_fn and "body.appendChild(row)" in live_thinking_fn, (
- "Compact live thinking should share the same Activity body as live tool cards."
+ assert "ensureLiveWorklogContainer" in live_thinking_fn, (
+ "Live Thinking Cards should use the shared Worklog container, not a Tool Card group."
)
assert "removeAttribute('data-live-activity-current')" not in live_thinking_fn, (
- "Reasoning/Thinking updates alone should not split consecutive tools into one-tool Activity rows."
+ "Reasoning/Thinking updates alone should not split consecutive tools into one-tool Worklog rows."
)
assert '.tool-call-group[data-live-tool-call-group="1"][data-live-activity-current="1"]' in helper, (
"Live tool cards should only reuse the current Activity burst, not the first group in the turn."
@@ -311,15 +492,15 @@ def test_live_visible_interim_text_preserves_timeline_boundary(self):
assert "group.setAttribute('data-live-activity-current','1')" in helper, (
"New live Activity bursts must be marked current so later tools append to the right group."
)
- assert "body.querySelector" in live_tool_fn and "data-live-tid" in live_tool_fn, (
+ assert "querySelector" in live_tool_fn and "data-live-tid" in live_tool_fn, (
"tool_complete must still update its current live Activity burst by tool id."
)
finalize_fn = _function_body(UI_JS, "finalizeThinkingCard")
- assert "turn.querySelector('.agent-activity-thinking[data-thinking-active=\"1\"]')" in finalize_fn, (
- "Compact Thinking cards live inside the assistant turn, so finalization must clear the active marker from the whole turn."
+ assert "turn.querySelector('.wl-reason[data-worklog-reason-active=\"1\"]')" in finalize_fn, (
+ "Finalization should still clean up any legacy active reasoning marker."
)
- assert "body.querySelector('.agent-activity-thinking[data-thinking-active=\"1\"]')" in live_thinking_fn and "setAttribute('data-thinking-active','1')" in live_thinking_fn, (
- "Compact live thinking should reactivate the latest existing Thinking card instead of stacking a new card after every tool boundary."
+ assert "data-worklog-reason-active" not in live_thinking_fn, (
+ "New live reasoning text should not create active Worklog prose rows."
)
reset_fn = _function_body(MESSAGES_JS, "_resetAssistantSegment")
assert "function closeCurrentLiveActivityGroup()" in UI_JS, (
@@ -347,6 +528,41 @@ def test_live_visible_interim_text_preserves_timeline_boundary(self):
"Tool starts must not split consecutive tools into one-tool Activity rows."
)
+ def test_live_thinking_card_is_segment_scoped_not_global_singleton(self):
+ live_thinking_fn = _function_body(UI_JS, "appendThinking")
+ placement_fn = _function_body(MESSAGES_JS, "_liveThinkingPlacement")
+ update_fn = _function_body(MESSAGES_JS, "_updateLiveThinkingCard")
+ interim_match = re.search(r"source\.addEventListener\('interim_assistant',e=>\{(.*?)\n\s*\}\);", MESSAGES_JS, re.S)
+ assert interim_match, "interim_assistant listener not found"
+ interim_fn = interim_match.group(1)
+
+ assert "data-live-thinking-key" in live_thinking_fn, (
+ "Live Thinking rows need a segment/burst key so later reasoning does not update "
+ "the first Thinking Card in the turn."
+ )
+ assert 'data-live-thinking="1"][data-live-thinking-key="' in live_thinking_fn, (
+ "appendThinking() must query the current segment's live Thinking Card, not a "
+ "turn-global singleton."
+ )
+ assert "segmentSeq" in placement_fn and "_currentLiveSegmentSeq" in placement_fn, (
+ "Thinking placement should reuse the live segment sequence instead of inventing "
+ "a second placement model."
+ )
+ assert "burstId:_currentActivityBurstId" in placement_fn, (
+ "Thinking placement should carry the current activity burst for Worklog ordering."
+ )
+ assert "updateThinking(text, opts)" in update_fn, (
+ "messages.js should pass segment placement into the UI Thinking helper."
+ )
+ assert "updateThinking('')" not in interim_fn, (
+ "Live interim boundaries should finalize the current Thinking Card instead of "
+ "clearing it mid-stream."
+ )
+ assert "finalizeThinkingCard()" in interim_fn, (
+ "Visible interim assistant progress must close the current Thinking segment "
+ "before the next segment starts."
+ )
+
def test_live_compression_card_splits_current_tool_activity_burst(self):
compression_fn = _function_body(UI_JS, "appendLiveCompressionCard")
close_fn = _function_body(UI_JS, "closeCurrentLiveActivityGroup")
@@ -415,28 +631,67 @@ def test_default_skin_preview_stays_upstream(self):
def test_tool_card_css_uses_design_tokens_for_chrome(self):
css_min = re.sub(r"\s+", "", CSS)
assert ".tool-card{" in css_min, ".tool-card rule missing"
- assert "border-radius:var(--radius-card)" in css_min, (
- ".tool-card border radius should use --radius-card, not hardcoded px."
- )
- assert "background:var(--surface-subtle)" in css_min, (
- ".tool-card background should use --surface-subtle."
- )
- assert "border:1pxsolidvar(--border-subtle)" in css_min, (
- ".tool-card border should use --border-subtle."
+ tool_card_rule = css_min.rsplit(".tool-card{", 1)[1].split("}", 1)[0]
+ rows_rule = css_min.split(".tg-rows{", 1)[1].split("}", 1)[0]
+ assert "background:transparent" in tool_card_rule
+ assert "border:0" in tool_card_rule
+ assert "border-left:0" in tool_card_rule
+ assert "border-left:1pxsolidvar(--border-subtle)" in rows_rule, (
+ "Nested tool groups should be expressed with only a subtle left guide line."
)
def test_tool_card_header_and_text_use_spacing_and_font_tokens(self):
css_min = re.sub(r"\s+", "", CSS)
assert ".tool-card-header{" in css_min, ".tool-card-header rule missing"
- assert "gap:var(--space-2)" in css_min, (
- ".tool-card-header gap should use --space-2."
- )
- assert "padding:var(--space-1)var(--space-3)" in css_min, (
- ".tool-card-header padding should use spacing tokens."
- )
- assert ".tool-card-name{" in css_min and "font-size:var(--font-size-xs)" in css_min, (
- ".tool-card-name should use --font-size-xs."
- )
- assert ".tool-card-preview{" in css_min and "font-size:var(--font-size-xs)" in css_min, (
- ".tool-card-preview should use --font-size-xs."
+ header_rule = css_min.rsplit(".tool-card-header{", 1)[1].split("}", 1)[0]
+ title_rule = css_min.split(".tl-title{", 1)[1].split("}", 1)[0]
+ assert "gap:7px" in header_rule
+ assert "padding:3px8px" in header_rule
+ assert "border-radius:7px" in header_rule
+ assert ".tool-card-name{" in css_min and "font-size:var(--message-body-font-size)" in css_min
+ assert "font-size:var(--message-body-font-size)" in title_rule
+ assert "font-family:var(--font-mono)" in title_rule
+
+ def test_worklog_thinking_card_uses_quiet_tool_row_hierarchy(self):
+ selector = ".tool-worklog-list > .agent-activity-thinking .thinking-card,"
+ assert selector in CSS, "Worklog Thinking Card quiet override missing"
+ card_rule = re.sub(r"\s+", "", CSS.split(selector, 1)[1].split("}", 1)[0])
+ header_rule = re.sub(
+ r"\s+",
+ "",
+ CSS.split(".tool-worklog-list > .agent-activity-thinking .thinking-card-header{", 1)[1].split("}", 1)[0],
+ )
+ label_rule = re.sub(
+ r"\s+",
+ "",
+ CSS.split(".tool-worklog-list > .agent-activity-thinking .thinking-card-label{", 1)[1].split("}", 1)[0],
+ )
+ icon_rule = re.sub(
+ r"\s+",
+ "",
+ CSS.split(".tool-worklog-list > .agent-activity-thinking .thinking-card-icon,", 1)[1].split("}", 1)[0],
+ )
+ body_rule = re.sub(
+ r"\s+",
+ "",
+ CSS.split(".tool-worklog-list > .agent-activity-thinking .thinking-card.open .thinking-card-body{", 1)[1].split("}", 1)[0],
+ )
+ pre_rule = re.sub(
+ r"\s+",
+ "",
+ CSS.split(".tool-worklog-list > .agent-activity-thinking .thinking-card-body pre{", 1)[1].split("}", 1)[0],
)
+
+ assert "background:transparent" in card_rule
+ assert "border:0" in card_rule
+ assert "border-radius:0" in card_rule
+ assert "display:flex" in header_rule and "align-items:center" in header_rule
+ assert "color:var(--muted)" in header_rule
+ assert "font-size:var(--message-body-font-size)" in header_rule
+ assert "font-weight:400" in header_rule
+ assert "font-weight:400" in label_rule
+ assert "letter-spacing:0" in label_rule
+ assert "color:var(--muted)" in icon_rule
+ assert "padding:6px8px7px8px" in body_rule
+ assert "font-size:var(--message-body-font-size)" in pre_rule
+ assert "line-height:var(--message-body-line-height)" in pre_rule
diff --git a/tests/test_webui_gateway_chat_backend.py b/tests/test_webui_gateway_chat_backend.py
index cd78789a36..364472da4b 100644
--- a/tests/test_webui_gateway_chat_backend.py
+++ b/tests/test_webui_gateway_chat_backend.py
@@ -315,6 +315,7 @@ def fake_urlopen(req, timeout=0):
events = []
while not subscriber.empty():
events.append(subscriber.get_nowait())
+ event_pairs = [(item[0], item[1]) for item in events]
assert ("tool", {
"event_type": "tool.started",
"name": "terminal",
@@ -322,7 +323,7 @@ def fake_urlopen(req, timeout=0):
"args": {},
"is_error": False,
"tid": "call-1",
- }) in events
+ }) in event_pairs
assert ("tool_complete", {
"event_type": "tool.completed",
"name": "terminal",
@@ -330,7 +331,8 @@ def fake_urlopen(req, timeout=0):
"args": {},
"is_error": False,
"tid": "call-1",
- }) in events
+ }) in event_pairs
+ assert all(len(item) == 3 and item[2] for item in events)
def test_gateway_chat_worker_normalizes_prefill_slice_before_system_prefix(tmp_path, monkeypatch):