daaain · cboos · Dec 18, 2025 · Dec 17, 2025 · Dec 15, 2025 · Dec 15, 2025
diff --git a/.gitignore b/.gitignore
@@ -50,6 +50,7 @@ coverage.xml
 *.py,cover
 .hypothesis/
 .pytest_cache/
+.playwright_cache/
 cover/
 
 # Translations

diff --git a/claude_code_log/cache.py b/claude_code_log/cache.py
@@ -421,9 +421,9 @@ def _is_cache_version_compatible(self, cache_version: str) -> bool:
         # Format: "cache_version": "minimum_library_version_required"
         # If cache version is older than the minimum required, it needs invalidation
         breaking_changes: dict[str, str] = {
-            # Example breaking changes (adjust as needed):
-            # "0.3.3": "0.3.4",  # 0.3.4 introduced breaking changes to cache format
-            # "0.2.x": "0.3.0",  # 0.3.0 introduced major cache format changes
+            # 0.9.0 introduced _compact_ide_tags_for_preview() which transforms
+            # first_user_message to use emoji indicators instead of raw IDE tags
+            "0.8.0": "0.9.0",
         }
 
         cache_ver = version.parse(cache_version)

diff --git a/claude_code_log/converter.py b/claude_code_log/converter.py
@@ -318,15 +318,18 @@ def deduplicate_messages(messages: list[TranscriptEntry]) -> list[TranscriptEntr
        -> Same timestamp, same message.id or tool_use_id -> SHOULD deduplicate
     2. Concurrent tool results: Multiple tool results with same timestamp
        -> Same timestamp, different tool_use_ids -> should NOT deduplicate
+    3. User text messages with same timestamp but different UUIDs (branch switch artifacts)
+       -> Same timestamp, no tool_use_id -> SHOULD deduplicate, keep the one with most content
 
     Args:
         messages: List of transcript entries to deduplicate
 
     Returns:
-        List of deduplicated messages, preserving order (first occurrence kept)
+        List of deduplicated messages, preserving order (first occurrence kept,
+        but replaced in-place if a better version is found later)
     """
-    # Track seen (message_type, timestamp, is_meta, session_id, content_key) tuples
-    seen: set[tuple[str, str, bool, str, str]] = set()
+    # Track seen dedup_key -> index in deduplicated list (for in-place replacement)
+    seen: dict[tuple[str, str, bool, str, str], int] = {}
     deduplicated: list[TranscriptEntry] = []
 
     for message in messages:
@@ -350,9 +353,10 @@ def deduplicate_messages(messages: list[TranscriptEntry]) -> list[TranscriptEntr
         # Get content key for differentiating concurrent messages
         # - For assistant messages: use message.id (same for stutters, different for different msgs)
         # - For user messages with tool results: use first tool_use_id
+        # - For user text messages: use empty string (deduplicate by timestamp alone)
         # - For summary messages: use leafUuid (summaries have no timestamp/uuid)
-        # - For other messages: use uuid as fallback
         content_key = ""
+        is_user_text = False
         if isinstance(message, AssistantTranscriptEntry):
             # For assistant messages, use the message id
             content_key = message.message.id
@@ -362,20 +366,29 @@ def deduplicate_messages(messages: list[TranscriptEntry]) -> list[TranscriptEntr
                 if isinstance(item, ToolResultContent):
                     content_key = item.tool_use_id
                     break
+            else:
+                # No tool result found - this is a user text message
+                is_user_text = True
+                # content_key stays empty (dedupe by timestamp alone)
         elif isinstance(message, SummaryTranscriptEntry):
             # Summaries have no timestamp or uuid - use leafUuid to keep them distinct
             content_key = message.leafUuid
-        # Fallback to uuid if no content key found
-        if not content_key:
-            content_key = getattr(message, "uuid", "")
 
-        # Create deduplication key - include content_key for proper handling
-        # of both version stutters and concurrent tool results
+        # Create deduplication key
         dedup_key = (message_type, timestamp, is_meta, session_id, content_key)
 
-        # Keep only first occurrence
-        if dedup_key not in seen:
-            seen.add(dedup_key)
+        if dedup_key in seen:
+            # For user text messages, replace if new one has more content items
+            if is_user_text and isinstance(message, UserTranscriptEntry):
+                idx = seen[dedup_key]
+                existing = deduplicated[idx]
+                if isinstance(existing, UserTranscriptEntry) and len(
+                    message.message.content
+                ) > len(existing.message.content):
+                    deduplicated[idx] = message  # Replace with better version
+            # Otherwise skip duplicate
+        else:
+            seen[dedup_key] = len(deduplicated)
             deduplicated.append(message)
 
     return deduplicated

diff --git a/claude_code_log/html/system_formatters.py b/claude_code_log/html/system_formatters.py
@@ -96,13 +96,21 @@ def format_dedup_notice_content(content: DedupNoticeContent) -> str:
     """Format a deduplication notice as HTML.
 
     Args:
-        content: DedupNoticeContent with notice text
+        content: DedupNoticeContent with notice text and optional target link
 
     Returns:
-        HTML for the dedup notice display
+        HTML for the dedup notice display with optional anchor link
     """
     escaped_notice = html.escape(content.notice_text)
-    return f"<p><em>{escaped_notice}</em></p>"
+
+    if content.target_message_id:
+        # Create clickable link to the target message
+        return (
+            f'<p><em><a href="#msg-{content.target_message_id}">'
+            f"{escaped_notice}</a></em></p>"
+        )
+    else:
+        return f"<p><em>{escaped_notice}</em></p>"
 
 
 __all__ = [

diff --git a/claude_code_log/html/templates/transcript.html b/claude_code_log/html/templates/transcript.html
@@ -103,7 +103,7 @@ <h3>🔍 Search & Filter</h3>
     {% else %}
     {%- set msg_css_class = css_class_from_message(message) %}
     {% set markdown = message.type in ['assistant', 'thinking'] or message.modifiers.is_compacted %}
-    <div class='message {{ msg_css_class }}{% if message.is_paired %} {{ message.pair_role }}{% endif %}{% for ancestor_id in message.ancestry %} {{ ancestor_id }}{% endfor %}' data-message-id='{{ message.message_id }}' id='msg-{{ message.message_id }}{% if message.is_paired and message.pair_role %}-{{ message.pair_role.replace("pair_", "") }}{% endif %}'>
+    <div class='message {{ msg_css_class }}{% if message.is_paired %} {{ message.pair_role }}{% endif %}{% for ancestor_id in message.ancestry %} {{ ancestor_id }}{% endfor %}' data-message-id='{{ message.message_id }}' id='msg-{{ message.message_id }}'>
         <div class='header'>
             {% set msg_emoji = get_message_emoji(message) -%}
             <span{% if message.title_hint %} title="{{ message.title_hint }}"{% endif %}>{% if message.message_title %}{%

diff --git a/claude_code_log/models.py b/claude_code_log/models.py
@@ -267,9 +267,6 @@ class UserTextContent(MessageContent):
 
     Wraps user text that may have been preprocessed to extract
     IDE notifications, compacted summaries, or memory input markers.
-
-    TODO: Not currently instantiated - formatter exists but pipeline uses
-    separate IdeNotificationContent and plain text instead.
     """
 
     text: str
@@ -472,6 +469,9 @@ class DedupNoticeContent(MessageContent):
     """
 
     notice_text: str
+    target_uuid: Optional[str] = None  # UUID of target message (for resolving link)
+    target_message_id: Optional[str] = None  # Resolved message ID for anchor link
+    original_text: Optional[str] = None  # Original duplicated content (for debugging)
 
 
 # =============================================================================

diff --git a/claude_code_log/parser.py b/claude_code_log/parser.py
@@ -3,7 +3,7 @@
 
 import json
 import re
-from typing import Any, Callable, Optional, Union, cast, TypeGuard
+from typing import Any, Callable, Optional, Union, cast
 from datetime import datetime
 
 from anthropic.types import Message as AnthropicMessage
@@ -477,14 +477,18 @@ def is_warmup_only_session(messages: list[TranscriptEntry], session_id: str) ->
 # =============================================================================
 
 
-def is_user_entry(entry: TranscriptEntry) -> TypeGuard[UserTranscriptEntry]:
-    """Check if entry is a user transcript entry."""
-    return entry.type == MessageType.USER
+def as_user_entry(entry: TranscriptEntry) -> UserTranscriptEntry | None:
+    """Return entry as UserTranscriptEntry if it is one, else None."""
+    if entry.type == MessageType.USER:
+        return cast(UserTranscriptEntry, entry)
+    return None
 
 
-def is_assistant_entry(entry: TranscriptEntry) -> TypeGuard[AssistantTranscriptEntry]:
-    """Check if entry is an assistant transcript entry."""
-    return entry.type == MessageType.ASSISTANT
+def as_assistant_entry(entry: TranscriptEntry) -> AssistantTranscriptEntry | None:
+    """Return entry as AssistantTranscriptEntry if it is one, else None."""
+    if entry.type == MessageType.ASSISTANT:
+        return cast(AssistantTranscriptEntry, entry)
+    return None
 
 
 # =============================================================================

diff --git a/claude_code_log/renderer.py b/claude_code_log/renderer.py
@@ -40,13 +40,13 @@
     UserTextContent,
 )
 from .parser import (
+    as_assistant_entry,
+    as_user_entry,
     extract_text_content,
-    is_assistant_entry,
     is_bash_input,
     is_bash_output,
     is_command_message,
     is_local_command_output,
-    is_user_entry,
 )
 from .utils import (
     format_timestamp,
@@ -499,6 +499,10 @@ def generate_template_messages(
     with log_timing("Build message hierarchy", t_start):
         _build_message_hierarchy(template_messages)
 
+    # Resolve dedup notice targets (needs message_id from hierarchy)
+    with log_timing("Resolve dedup targets", t_start):
+        _resolve_dedup_targets(template_messages)
+
     # Mark messages that have children for fold/unfold controls
     with log_timing("Mark messages with children", t_start):
         _mark_messages_with_children(template_messages)
@@ -1643,7 +1647,9 @@ def _reorder_sidechain_template_messages(
                     ):
                         # Replace with note pointing to the Task result
                         sidechain_msg.content = DedupNoticeContent(
-                            notice_text="(Task summary — already displayed in Task tool result above)"
+                            notice_text="Task summary — see result above",
+                            target_uuid=message.uuid,
+                            original_text=sidechain_text,
                         )
                         # Mark as deduplicated for potential debugging
                         sidechain_msg.raw_text_content = None
@@ -1662,6 +1668,23 @@ def _reorder_sidechain_template_messages(
     return result
 
 
+def _resolve_dedup_targets(messages: list[TemplateMessage]) -> None:
+    """Resolve dedup notice target UUIDs to message IDs for anchor links.
+
+    Must be called after _build_message_hierarchy assigns message_id values.
+    """
+    # Build uuid -> message_id mapping
+    uuid_to_id: dict[str, str] = {}
+    for msg in messages:
+        if msg.uuid and msg.message_id:
+            uuid_to_id[msg.uuid] = msg.message_id
+
+    # Resolve dedup notice targets
+    for msg in messages:
+        if isinstance(msg.content, DedupNoticeContent) and msg.content.target_uuid:
+            msg.content.target_message_id = uuid_to_id.get(msg.content.target_uuid)
+
+
 def _filter_messages(messages: list[TranscriptEntry]) -> list[TranscriptEntry]:
     """Filter messages to those that should be rendered.
 
@@ -1797,9 +1820,8 @@ def _collect_session_info(
 
             # Get first user message content for preview
             first_user_message = ""
-            if is_user_entry(message) and should_use_as_session_starter(text_content):
-                content = extract_text_content(message.message.content)
-                first_user_message = create_session_preview(content)
+            if as_user_entry(message) and should_use_as_session_starter(text_content):
+                first_user_message = create_session_preview(text_content)
 
             sessions[session_id] = {
                 "id": session_id,
@@ -1816,11 +1838,10 @@ def _collect_session_info(
             session_order.append(session_id)
 
         # Update first user message if this is a user message and we don't have one yet
-        elif is_user_entry(message) and not sessions[session_id]["first_user_message"]:
-            first_user_content = extract_text_content(message.message.content)
-            if should_use_as_session_starter(first_user_content):
+        elif as_user_entry(message) and not sessions[session_id]["first_user_message"]:
+            if should_use_as_session_starter(text_content):
                 sessions[session_id]["first_user_message"] = create_session_preview(
-                    first_user_content
+                    text_content
                 )
 
         sessions[session_id]["message_count"] += 1
@@ -1832,10 +1853,10 @@ def _collect_session_info(
 
         # Extract and accumulate token usage for assistant messages
         # Only count tokens for the first message with each requestId to avoid duplicates
-        if is_assistant_entry(message):
-            assistant_message = message.message
-            request_id = message.requestId
-            message_uuid = message.uuid
+        if assistant_entry := as_assistant_entry(message):
+            assistant_message = assistant_entry.message
+            request_id = assistant_entry.requestId
+            message_uuid = assistant_entry.uuid
 
             if (
                 assistant_message.usage
@@ -2013,9 +2034,9 @@ def _render_messages(
         # Extract token usage for assistant messages
         # Only show token usage for the first message with each requestId to avoid duplicates
         token_usage_str: Optional[str] = None
-        if is_assistant_entry(message):
-            assistant_message = message.message
-            message_uuid = message.uuid
+        if assistant_entry := as_assistant_entry(message):
+            assistant_message = assistant_entry.message
+            message_uuid = assistant_entry.uuid
 
             if assistant_message.usage and message_uuid in show_tokens_for_message:
                 # Only show token usage for messages marked as first occurrence of requestId