Skip to content
Merged
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ coverage.xml
*.py,cover
.hypothesis/
.pytest_cache/
.playwright_cache/
cover/

# Translations
Expand Down
6 changes: 3 additions & 3 deletions claude_code_log/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,9 +421,9 @@ def _is_cache_version_compatible(self, cache_version: str) -> bool:
# Format: "cache_version": "minimum_library_version_required"
# If cache version is older than the minimum required, it needs invalidation
breaking_changes: dict[str, str] = {
# Example breaking changes (adjust as needed):
# "0.3.3": "0.3.4", # 0.3.4 introduced breaking changes to cache format
# "0.2.x": "0.3.0", # 0.3.0 introduced major cache format changes
# 0.9.0 introduced _compact_ide_tags_for_preview() which transforms
# first_user_message to use emoji indicators instead of raw IDE tags
"0.8.0": "0.9.0",
}

cache_ver = version.parse(cache_version)
Expand Down
37 changes: 25 additions & 12 deletions claude_code_log/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,15 +318,18 @@ def deduplicate_messages(messages: list[TranscriptEntry]) -> list[TranscriptEntr
-> Same timestamp, same message.id or tool_use_id -> SHOULD deduplicate
2. Concurrent tool results: Multiple tool results with same timestamp
-> Same timestamp, different tool_use_ids -> should NOT deduplicate
3. User text messages with same timestamp but different UUIDs (branch switch artifacts)
-> Same timestamp, no tool_use_id -> SHOULD deduplicate, keep the one with most content

Args:
messages: List of transcript entries to deduplicate

Returns:
List of deduplicated messages, preserving order (first occurrence kept)
List of deduplicated messages, preserving order (first occurrence kept,
but replaced in-place if a better version is found later)
"""
# Track seen (message_type, timestamp, is_meta, session_id, content_key) tuples
seen: set[tuple[str, str, bool, str, str]] = set()
# Track seen dedup_key -> index in deduplicated list (for in-place replacement)
seen: dict[tuple[str, str, bool, str, str], int] = {}
deduplicated: list[TranscriptEntry] = []

for message in messages:
Expand All @@ -350,9 +353,10 @@ def deduplicate_messages(messages: list[TranscriptEntry]) -> list[TranscriptEntr
# Get content key for differentiating concurrent messages
# - For assistant messages: use message.id (same for stutters, different for different msgs)
# - For user messages with tool results: use first tool_use_id
# - For user text messages: use empty string (deduplicate by timestamp alone)
# - For summary messages: use leafUuid (summaries have no timestamp/uuid)
# - For other messages: use uuid as fallback
content_key = ""
is_user_text = False
if isinstance(message, AssistantTranscriptEntry):
# For assistant messages, use the message id
content_key = message.message.id
Expand All @@ -362,20 +366,29 @@ def deduplicate_messages(messages: list[TranscriptEntry]) -> list[TranscriptEntr
if isinstance(item, ToolResultContent):
content_key = item.tool_use_id
break
else:
# No tool result found - this is a user text message
is_user_text = True
# content_key stays empty (dedupe by timestamp alone)
elif isinstance(message, SummaryTranscriptEntry):
# Summaries have no timestamp or uuid - use leafUuid to keep them distinct
content_key = message.leafUuid
# Fallback to uuid if no content key found
if not content_key:
content_key = getattr(message, "uuid", "")

# Create deduplication key - include content_key for proper handling
# of both version stutters and concurrent tool results
# Create deduplication key
dedup_key = (message_type, timestamp, is_meta, session_id, content_key)

# Keep only first occurrence
if dedup_key not in seen:
seen.add(dedup_key)
if dedup_key in seen:
# For user text messages, replace if new one has more content items
if is_user_text and isinstance(message, UserTranscriptEntry):
idx = seen[dedup_key]
existing = deduplicated[idx]
if isinstance(existing, UserTranscriptEntry) and len(
message.message.content
) > len(existing.message.content):
deduplicated[idx] = message # Replace with better version
# Otherwise skip duplicate
else:
seen[dedup_key] = len(deduplicated)
deduplicated.append(message)

return deduplicated
Expand Down
14 changes: 11 additions & 3 deletions claude_code_log/html/system_formatters.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,13 +96,21 @@ def format_dedup_notice_content(content: DedupNoticeContent) -> str:
"""Format a deduplication notice as HTML.

Args:
content: DedupNoticeContent with notice text
content: DedupNoticeContent with notice text and optional target link

Returns:
HTML for the dedup notice display
HTML for the dedup notice display with optional anchor link
"""
escaped_notice = html.escape(content.notice_text)
return f"<p><em>{escaped_notice}</em></p>"

if content.target_message_id:
# Create clickable link to the target message
return (
f'<p><em><a href="#msg-{content.target_message_id}">'
f"{escaped_notice}</a></em></p>"
)
else:
return f"<p><em>{escaped_notice}</em></p>"


__all__ = [
Expand Down
2 changes: 1 addition & 1 deletion claude_code_log/html/templates/transcript.html
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ <h3>🔍 Search & Filter</h3>
{% else %}
{%- set msg_css_class = css_class_from_message(message) %}
{% set markdown = message.type in ['assistant', 'thinking'] or message.modifiers.is_compacted %}
<div class='message {{ msg_css_class }}{% if message.is_paired %} {{ message.pair_role }}{% endif %}{% for ancestor_id in message.ancestry %} {{ ancestor_id }}{% endfor %}' data-message-id='{{ message.message_id }}' id='msg-{{ message.message_id }}{% if message.is_paired and message.pair_role %}-{{ message.pair_role.replace("pair_", "") }}{% endif %}'>
<div class='message {{ msg_css_class }}{% if message.is_paired %} {{ message.pair_role }}{% endif %}{% for ancestor_id in message.ancestry %} {{ ancestor_id }}{% endfor %}' data-message-id='{{ message.message_id }}' id='msg-{{ message.message_id }}'>
<div class='header'>
{% set msg_emoji = get_message_emoji(message) -%}
<span{% if message.title_hint %} title="{{ message.title_hint }}"{% endif %}>{% if message.message_title %}{%
Expand Down
6 changes: 3 additions & 3 deletions claude_code_log/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,9 +267,6 @@ class UserTextContent(MessageContent):

Wraps user text that may have been preprocessed to extract
IDE notifications, compacted summaries, or memory input markers.

TODO: Not currently instantiated - formatter exists but pipeline uses
separate IdeNotificationContent and plain text instead.
"""

text: str
Expand Down Expand Up @@ -472,6 +469,9 @@ class DedupNoticeContent(MessageContent):
"""

notice_text: str
target_uuid: Optional[str] = None # UUID of target message (for resolving link)
target_message_id: Optional[str] = None # Resolved message ID for anchor link
original_text: Optional[str] = None # Original duplicated content (for debugging)


# =============================================================================
Expand Down
18 changes: 11 additions & 7 deletions claude_code_log/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import json
import re
from typing import Any, Callable, Optional, Union, cast, TypeGuard
from typing import Any, Callable, Optional, Union, cast
from datetime import datetime

from anthropic.types import Message as AnthropicMessage
Expand Down Expand Up @@ -477,14 +477,18 @@ def is_warmup_only_session(messages: list[TranscriptEntry], session_id: str) ->
# =============================================================================


def is_user_entry(entry: TranscriptEntry) -> TypeGuard[UserTranscriptEntry]:
"""Check if entry is a user transcript entry."""
return entry.type == MessageType.USER
def as_user_entry(entry: TranscriptEntry) -> UserTranscriptEntry | None:
"""Return entry as UserTranscriptEntry if it is one, else None."""
if entry.type == MessageType.USER:
return cast(UserTranscriptEntry, entry)
return None


def is_assistant_entry(entry: TranscriptEntry) -> TypeGuard[AssistantTranscriptEntry]:
"""Check if entry is an assistant transcript entry."""
return entry.type == MessageType.ASSISTANT
def as_assistant_entry(entry: TranscriptEntry) -> AssistantTranscriptEntry | None:
"""Return entry as AssistantTranscriptEntry if it is one, else None."""
if entry.type == MessageType.ASSISTANT:
return cast(AssistantTranscriptEntry, entry)
return None


# =============================================================================
Expand Down
55 changes: 38 additions & 17 deletions claude_code_log/renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,13 @@
UserTextContent,
)
from .parser import (
as_assistant_entry,
as_user_entry,
extract_text_content,
is_assistant_entry,
is_bash_input,
is_bash_output,
is_command_message,
is_local_command_output,
is_user_entry,
)
from .utils import (
format_timestamp,
Expand Down Expand Up @@ -499,6 +499,10 @@ def generate_template_messages(
with log_timing("Build message hierarchy", t_start):
_build_message_hierarchy(template_messages)

# Resolve dedup notice targets (needs message_id from hierarchy)
with log_timing("Resolve dedup targets", t_start):
_resolve_dedup_targets(template_messages)

# Mark messages that have children for fold/unfold controls
with log_timing("Mark messages with children", t_start):
_mark_messages_with_children(template_messages)
Expand Down Expand Up @@ -1643,7 +1647,9 @@ def _reorder_sidechain_template_messages(
):
# Replace with note pointing to the Task result
sidechain_msg.content = DedupNoticeContent(
notice_text="(Task summary — already displayed in Task tool result above)"
notice_text="Task summary — see result above",
target_uuid=message.uuid,
original_text=sidechain_text,
)
# Mark as deduplicated for potential debugging
sidechain_msg.raw_text_content = None
Expand All @@ -1662,6 +1668,23 @@ def _reorder_sidechain_template_messages(
return result


def _resolve_dedup_targets(messages: list[TemplateMessage]) -> None:
"""Resolve dedup notice target UUIDs to message IDs for anchor links.

Must be called after _build_message_hierarchy assigns message_id values.
"""
# Build uuid -> message_id mapping
uuid_to_id: dict[str, str] = {}
for msg in messages:
if msg.uuid and msg.message_id:
uuid_to_id[msg.uuid] = msg.message_id

# Resolve dedup notice targets
for msg in messages:
if isinstance(msg.content, DedupNoticeContent) and msg.content.target_uuid:
msg.content.target_message_id = uuid_to_id.get(msg.content.target_uuid)


def _filter_messages(messages: list[TranscriptEntry]) -> list[TranscriptEntry]:
"""Filter messages to those that should be rendered.

Expand Down Expand Up @@ -1797,9 +1820,8 @@ def _collect_session_info(

# Get first user message content for preview
first_user_message = ""
if is_user_entry(message) and should_use_as_session_starter(text_content):
content = extract_text_content(message.message.content)
first_user_message = create_session_preview(content)
if as_user_entry(message) and should_use_as_session_starter(text_content):
first_user_message = create_session_preview(text_content)

sessions[session_id] = {
"id": session_id,
Expand All @@ -1816,11 +1838,10 @@ def _collect_session_info(
session_order.append(session_id)

# Update first user message if this is a user message and we don't have one yet
elif is_user_entry(message) and not sessions[session_id]["first_user_message"]:
first_user_content = extract_text_content(message.message.content)
if should_use_as_session_starter(first_user_content):
elif as_user_entry(message) and not sessions[session_id]["first_user_message"]:
if should_use_as_session_starter(text_content):
sessions[session_id]["first_user_message"] = create_session_preview(
first_user_content
text_content
)

sessions[session_id]["message_count"] += 1
Expand All @@ -1832,10 +1853,10 @@ def _collect_session_info(

# Extract and accumulate token usage for assistant messages
# Only count tokens for the first message with each requestId to avoid duplicates
if is_assistant_entry(message):
assistant_message = message.message
request_id = message.requestId
message_uuid = message.uuid
if assistant_entry := as_assistant_entry(message):
assistant_message = assistant_entry.message
request_id = assistant_entry.requestId
message_uuid = assistant_entry.uuid

if (
assistant_message.usage
Expand Down Expand Up @@ -2013,9 +2034,9 @@ def _render_messages(
# Extract token usage for assistant messages
# Only show token usage for the first message with each requestId to avoid duplicates
token_usage_str: Optional[str] = None
if is_assistant_entry(message):
assistant_message = message.message
message_uuid = message.uuid
if assistant_entry := as_assistant_entry(message):
assistant_message = assistant_entry.message
message_uuid = assistant_entry.uuid

if assistant_message.usage and message_uuid in show_tokens_for_message:
# Only show token usage for messages marked as first occurrence of requestId
Expand Down
Loading
Loading