diff --git a/.gitignore b/.gitignore index 331e713c..51464bae 100644 --- a/.gitignore +++ b/.gitignore @@ -50,6 +50,7 @@ coverage.xml *.py,cover .hypothesis/ .pytest_cache/ +.playwright_cache/ cover/ # Translations diff --git a/claude_code_log/cache.py b/claude_code_log/cache.py index d0a3ea00..ad443726 100644 --- a/claude_code_log/cache.py +++ b/claude_code_log/cache.py @@ -421,9 +421,9 @@ def _is_cache_version_compatible(self, cache_version: str) -> bool: # Format: "cache_version": "minimum_library_version_required" # If cache version is older than the minimum required, it needs invalidation breaking_changes: dict[str, str] = { - # Example breaking changes (adjust as needed): - # "0.3.3": "0.3.4", # 0.3.4 introduced breaking changes to cache format - # "0.2.x": "0.3.0", # 0.3.0 introduced major cache format changes + # 0.9.0 introduced _compact_ide_tags_for_preview() which transforms + # first_user_message to use emoji indicators instead of raw IDE tags + "0.8.0": "0.9.0", } cache_ver = version.parse(cache_version) diff --git a/claude_code_log/converter.py b/claude_code_log/converter.py index 960e8b4e..12a2fa83 100644 --- a/claude_code_log/converter.py +++ b/claude_code_log/converter.py @@ -318,15 +318,18 @@ def deduplicate_messages(messages: list[TranscriptEntry]) -> list[TranscriptEntr -> Same timestamp, same message.id or tool_use_id -> SHOULD deduplicate 2. Concurrent tool results: Multiple tool results with same timestamp -> Same timestamp, different tool_use_ids -> should NOT deduplicate + 3. User text messages with same timestamp but different UUIDs (branch switch artifacts) + -> Same timestamp, no tool_use_id -> SHOULD deduplicate, keep the one with most content Args: messages: List of transcript entries to deduplicate Returns: - List of deduplicated messages, preserving order (first occurrence kept) + List of deduplicated messages, preserving order (first occurrence kept, + but replaced in-place if a better version is found later) """ - # Track seen (message_type, timestamp, is_meta, session_id, content_key) tuples - seen: set[tuple[str, str, bool, str, str]] = set() + # Track seen dedup_key -> index in deduplicated list (for in-place replacement) + seen: dict[tuple[str, str, bool, str, str], int] = {} deduplicated: list[TranscriptEntry] = [] for message in messages: @@ -350,9 +353,10 @@ def deduplicate_messages(messages: list[TranscriptEntry]) -> list[TranscriptEntr # Get content key for differentiating concurrent messages # - For assistant messages: use message.id (same for stutters, different for different msgs) # - For user messages with tool results: use first tool_use_id + # - For user text messages: use empty string (deduplicate by timestamp alone) # - For summary messages: use leafUuid (summaries have no timestamp/uuid) - # - For other messages: use uuid as fallback content_key = "" + is_user_text = False if isinstance(message, AssistantTranscriptEntry): # For assistant messages, use the message id content_key = message.message.id @@ -362,20 +366,29 @@ def deduplicate_messages(messages: list[TranscriptEntry]) -> list[TranscriptEntr if isinstance(item, ToolResultContent): content_key = item.tool_use_id break + else: + # No tool result found - this is a user text message + is_user_text = True + # content_key stays empty (dedupe by timestamp alone) elif isinstance(message, SummaryTranscriptEntry): # Summaries have no timestamp or uuid - use leafUuid to keep them distinct content_key = message.leafUuid - # Fallback to uuid if no content key found - if not content_key: - content_key = getattr(message, "uuid", "") - # Create deduplication key - include content_key for proper handling - # of both version stutters and concurrent tool results + # Create deduplication key dedup_key = (message_type, timestamp, is_meta, session_id, content_key) - # Keep only first occurrence - if dedup_key not in seen: - seen.add(dedup_key) + if dedup_key in seen: + # For user text messages, replace if new one has more content items + if is_user_text and isinstance(message, UserTranscriptEntry): + idx = seen[dedup_key] + existing = deduplicated[idx] + if isinstance(existing, UserTranscriptEntry) and len( + message.message.content + ) > len(existing.message.content): + deduplicated[idx] = message # Replace with better version + # Otherwise skip duplicate + else: + seen[dedup_key] = len(deduplicated) deduplicated.append(message) return deduplicated diff --git a/claude_code_log/html/system_formatters.py b/claude_code_log/html/system_formatters.py index 14182a25..72eff8b8 100644 --- a/claude_code_log/html/system_formatters.py +++ b/claude_code_log/html/system_formatters.py @@ -96,13 +96,21 @@ def format_dedup_notice_content(content: DedupNoticeContent) -> str: """Format a deduplication notice as HTML. Args: - content: DedupNoticeContent with notice text + content: DedupNoticeContent with notice text and optional target link Returns: - HTML for the dedup notice display + HTML for the dedup notice display with optional anchor link """ escaped_notice = html.escape(content.notice_text) - return f"

{escaped_notice}

" + + if content.target_message_id: + # Create clickable link to the target message + return ( + f'

' + f"{escaped_notice}

" + ) + else: + return f"

{escaped_notice}

" __all__ = [ diff --git a/claude_code_log/html/templates/transcript.html b/claude_code_log/html/templates/transcript.html index 2f812bdf..b531c688 100644 --- a/claude_code_log/html/templates/transcript.html +++ b/claude_code_log/html/templates/transcript.html @@ -103,7 +103,7 @@

🔍 Search & Filter

{% else %} {%- set msg_css_class = css_class_from_message(message) %} {% set markdown = message.type in ['assistant', 'thinking'] or message.modifiers.is_compacted %} -
+
{% set msg_emoji = get_message_emoji(message) -%} {% if message.message_title %}{% diff --git a/claude_code_log/models.py b/claude_code_log/models.py index eaf3315f..adb5267d 100644 --- a/claude_code_log/models.py +++ b/claude_code_log/models.py @@ -267,9 +267,6 @@ class UserTextContent(MessageContent): Wraps user text that may have been preprocessed to extract IDE notifications, compacted summaries, or memory input markers. - - TODO: Not currently instantiated - formatter exists but pipeline uses - separate IdeNotificationContent and plain text instead. """ text: str @@ -472,6 +469,9 @@ class DedupNoticeContent(MessageContent): """ notice_text: str + target_uuid: Optional[str] = None # UUID of target message (for resolving link) + target_message_id: Optional[str] = None # Resolved message ID for anchor link + original_text: Optional[str] = None # Original duplicated content (for debugging) # ============================================================================= diff --git a/claude_code_log/parser.py b/claude_code_log/parser.py index 89ead392..1ef56657 100644 --- a/claude_code_log/parser.py +++ b/claude_code_log/parser.py @@ -3,7 +3,7 @@ import json import re -from typing import Any, Callable, Optional, Union, cast, TypeGuard +from typing import Any, Callable, Optional, Union, cast from datetime import datetime from anthropic.types import Message as AnthropicMessage @@ -477,14 +477,18 @@ def is_warmup_only_session(messages: list[TranscriptEntry], session_id: str) -> # ============================================================================= -def is_user_entry(entry: TranscriptEntry) -> TypeGuard[UserTranscriptEntry]: - """Check if entry is a user transcript entry.""" - return entry.type == MessageType.USER +def as_user_entry(entry: TranscriptEntry) -> UserTranscriptEntry | None: + """Return entry as UserTranscriptEntry if it is one, else None.""" + if entry.type == MessageType.USER: + return cast(UserTranscriptEntry, entry) + return None -def is_assistant_entry(entry: TranscriptEntry) -> TypeGuard[AssistantTranscriptEntry]: - """Check if entry is an assistant transcript entry.""" - return entry.type == MessageType.ASSISTANT +def as_assistant_entry(entry: TranscriptEntry) -> AssistantTranscriptEntry | None: + """Return entry as AssistantTranscriptEntry if it is one, else None.""" + if entry.type == MessageType.ASSISTANT: + return cast(AssistantTranscriptEntry, entry) + return None # ============================================================================= diff --git a/claude_code_log/renderer.py b/claude_code_log/renderer.py index 680d558e..8e49471b 100644 --- a/claude_code_log/renderer.py +++ b/claude_code_log/renderer.py @@ -40,13 +40,13 @@ UserTextContent, ) from .parser import ( + as_assistant_entry, + as_user_entry, extract_text_content, - is_assistant_entry, is_bash_input, is_bash_output, is_command_message, is_local_command_output, - is_user_entry, ) from .utils import ( format_timestamp, @@ -499,6 +499,10 @@ def generate_template_messages( with log_timing("Build message hierarchy", t_start): _build_message_hierarchy(template_messages) + # Resolve dedup notice targets (needs message_id from hierarchy) + with log_timing("Resolve dedup targets", t_start): + _resolve_dedup_targets(template_messages) + # Mark messages that have children for fold/unfold controls with log_timing("Mark messages with children", t_start): _mark_messages_with_children(template_messages) @@ -1643,7 +1647,9 @@ def _reorder_sidechain_template_messages( ): # Replace with note pointing to the Task result sidechain_msg.content = DedupNoticeContent( - notice_text="(Task summary — already displayed in Task tool result above)" + notice_text="Task summary — see result above", + target_uuid=message.uuid, + original_text=sidechain_text, ) # Mark as deduplicated for potential debugging sidechain_msg.raw_text_content = None @@ -1662,6 +1668,23 @@ def _reorder_sidechain_template_messages( return result +def _resolve_dedup_targets(messages: list[TemplateMessage]) -> None: + """Resolve dedup notice target UUIDs to message IDs for anchor links. + + Must be called after _build_message_hierarchy assigns message_id values. + """ + # Build uuid -> message_id mapping + uuid_to_id: dict[str, str] = {} + for msg in messages: + if msg.uuid and msg.message_id: + uuid_to_id[msg.uuid] = msg.message_id + + # Resolve dedup notice targets + for msg in messages: + if isinstance(msg.content, DedupNoticeContent) and msg.content.target_uuid: + msg.content.target_message_id = uuid_to_id.get(msg.content.target_uuid) + + def _filter_messages(messages: list[TranscriptEntry]) -> list[TranscriptEntry]: """Filter messages to those that should be rendered. @@ -1797,9 +1820,8 @@ def _collect_session_info( # Get first user message content for preview first_user_message = "" - if is_user_entry(message) and should_use_as_session_starter(text_content): - content = extract_text_content(message.message.content) - first_user_message = create_session_preview(content) + if as_user_entry(message) and should_use_as_session_starter(text_content): + first_user_message = create_session_preview(text_content) sessions[session_id] = { "id": session_id, @@ -1816,11 +1838,10 @@ def _collect_session_info( session_order.append(session_id) # Update first user message if this is a user message and we don't have one yet - elif is_user_entry(message) and not sessions[session_id]["first_user_message"]: - first_user_content = extract_text_content(message.message.content) - if should_use_as_session_starter(first_user_content): + elif as_user_entry(message) and not sessions[session_id]["first_user_message"]: + if should_use_as_session_starter(text_content): sessions[session_id]["first_user_message"] = create_session_preview( - first_user_content + text_content ) sessions[session_id]["message_count"] += 1 @@ -1832,10 +1853,10 @@ def _collect_session_info( # Extract and accumulate token usage for assistant messages # Only count tokens for the first message with each requestId to avoid duplicates - if is_assistant_entry(message): - assistant_message = message.message - request_id = message.requestId - message_uuid = message.uuid + if assistant_entry := as_assistant_entry(message): + assistant_message = assistant_entry.message + request_id = assistant_entry.requestId + message_uuid = assistant_entry.uuid if ( assistant_message.usage @@ -2013,9 +2034,9 @@ def _render_messages( # Extract token usage for assistant messages # Only show token usage for the first message with each requestId to avoid duplicates token_usage_str: Optional[str] = None - if is_assistant_entry(message): - assistant_message = message.message - message_uuid = message.uuid + if assistant_entry := as_assistant_entry(message): + assistant_message = assistant_entry.message + message_uuid = assistant_entry.uuid if assistant_message.usage and message_uuid in show_tokens_for_message: # Only show token usage for messages marked as first occurrence of requestId diff --git a/test/__snapshots__/test_snapshot_html.ambr b/test/__snapshots__/test_snapshot_html.ambr index 018f4fa1..a86d0a8c 100644 --- a/test/__snapshots__/test_snapshot_html.ambr +++ b/test/__snapshots__/test_snapshot_html.ambr @@ -4961,7 +4961,7 @@ -
+
📝 Edit /tmp/decorator_example.py
@@ -4978,7 +4978,7 @@ -
+
@@ -5054,7 +5054,7 @@ -
+
🛠️ Bash Run the decorator example to show output
@@ -5071,7 +5071,7 @@ -
+
@@ -9774,7 +9774,7 @@ -
+
🛠️ FailingTool
@@ -9796,7 +9796,7 @@ -
+
🚨 Error
@@ -9813,7 +9813,7 @@ -
+
🤷 Slash Command
@@ -9832,7 +9832,7 @@ -
+
Command output
@@ -14777,7 +14777,7 @@ -
+
📝 Edit /tmp/decorator_example.py
@@ -14794,7 +14794,7 @@ -
+
@@ -14870,7 +14870,7 @@ -
+
🛠️ Bash Run the decorator example to show output
@@ -14887,7 +14887,7 @@ -
+
@@ -19571,7 +19571,7 @@ -
+
📝 Edit /tmp/decorator_example.py
@@ -19588,7 +19588,7 @@ -
+
@@ -19664,7 +19664,7 @@ -
+
🛠️ Bash Run the decorator example to show output
@@ -19681,7 +19681,7 @@ -
+
diff --git a/test/conftest.py b/test/conftest.py index 9f4de049..15abe64b 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -37,3 +37,57 @@ def browser_type_launch_args(browser_type_launch_args): "headless": True, # Set to False for debugging "slow_mo": 0, # Add delay for debugging } + + +@pytest.fixture(scope="session") +def _browser_user_data_dir(worker_id): + """Create a per-worker directory for browser user data (enables HTTP caching). + + Uses a fixed directory in the project that persists across test runs, + allowing vis-timeline CDN resources to remain cached between runs. + Each xdist worker gets its own subdirectory to avoid Chromium lock conflicts. + """ + # Use a fixed cache directory that persists across runs + cache_base = Path(__file__).parent.parent / ".playwright_cache" + # Each worker needs its own user data dir to avoid Chromium lock conflicts + # worker_id is "master" for non-xdist runs, or "gw0", "gw1", etc. for xdist + worker_dir = cache_base / worker_id + worker_dir.mkdir(parents=True, exist_ok=True) + return worker_dir + + +@pytest.fixture(scope="session") +def _persistent_context(playwright, browser_type_launch_args, _browser_user_data_dir): + """Create a persistent browser context that shares HTTP cache across tests. + + This solves flaky CDN loading issues by caching resources like vis-timeline + after the first load. + """ + browser_type = playwright.chromium + context = browser_type.launch_persistent_context( + _browser_user_data_dir, + **{ + **browser_type_launch_args, + "viewport": {"width": 1280, "height": 720}, + "ignore_https_errors": True, + }, + ) + yield context + context.close() + + +@pytest.fixture +def context(_persistent_context): + """Override pytest-playwright's context fixture to use persistent context. + + This ensures all browser tests share the same HTTP cache. + """ + return _persistent_context + + +@pytest.fixture +def page(context): + """Create a new page for each test using the shared persistent context.""" + page = context.new_page() + yield page + page.close() diff --git a/test/test_cache.py b/test/test_cache.py index 9a48d0fa..e7b481b8 100644 --- a/test/test_cache.py +++ b/test/test_cache.py @@ -569,14 +569,18 @@ def test_version_parsing_edge_cases(self, temp_project_dir): assert cache_manager._is_cache_version_compatible("1.0.0+build.1") is True assert cache_manager._is_cache_version_compatible("1.0.0+20230101") is True - def test_empty_breaking_changes_dict(self, temp_project_dir): - """Test that empty breaking changes dict allows all versions.""" - cache_manager = CacheManager(temp_project_dir, "2.0.0") + def test_breaking_changes_0_8_0(self, temp_project_dir): + """Test that 0.8.0 breaking change correctly invalidates old caches.""" + cache_manager = CacheManager(temp_project_dir, "0.9.0") - # With no breaking changes defined, all versions should be compatible + # Caches from 0.9.0+ should be compatible + assert cache_manager._is_cache_version_compatible("0.9.0") is True assert cache_manager._is_cache_version_compatible("1.0.0") is True - assert cache_manager._is_cache_version_compatible("0.5.0") is True - assert cache_manager._is_cache_version_compatible("3.0.0") is True + + # Caches from 0.8.0 and earlier should be invalidated + assert cache_manager._is_cache_version_compatible("0.8.0") is False + assert cache_manager._is_cache_version_compatible("0.7.0") is False + assert cache_manager._is_cache_version_compatible("0.5.0") is False class TestCacheErrorHandling: diff --git a/test/test_ide_tags.py b/test/test_ide_tags.py index a6734a5a..d992c1dc 100644 --- a/test/test_ide_tags.py +++ b/test/test_ide_tags.py @@ -15,9 +15,10 @@ from claude_code_log.html.assistant_formatters import format_assistant_text_content from claude_code_log.models import ( AssistantTextContent, - TextContent, ImageContent, ImageSource, + TextContent, + UserTextContent, ) @@ -310,7 +311,7 @@ def test_parse_user_message_with_multi_item_content(self): content_model = parse_user_message_content(content_list) # Should return UserTextContent with IDE notifications - assert content_model is not None + assert isinstance(content_model, UserTextContent) assert content_model.ide_notifications is not None assert len(content_model.ide_notifications.opened_files) == 1 assert ( diff --git a/test/test_sidechain_agents.py b/test/test_sidechain_agents.py index 3608bd70..880cf028 100644 --- a/test/test_sidechain_agents.py +++ b/test/test_sidechain_agents.py @@ -72,9 +72,11 @@ def test_deduplication_task_result_vs_sidechain(): # Verify deduplication occurred: # The sidechain assistant's final message should be replaced with a forward link - assert "(Task summary" in html - assert "already displayed in" in html - assert "Task tool result above" in html + assert "Task summary" in html + assert "see result above" in html + + # Verify the dedup notice has an anchor link to the Task result + assert 'href="#msg-' in html # The actual content "I created the test file successfully" should only appear once # in the Task result, not in the sidechain assistant @@ -110,7 +112,7 @@ def test_no_deduplication_when_content_different(): # No deduplication should occur - both "Done A" and "Done B" should appear assert "Done A" in html assert "Done B" in html - assert "(Task summary" not in html + assert "Task summary" not in html def test_agent_messages_marked_as_sidechain(): diff --git a/test/test_timeline_browser.py b/test/test_timeline_browser.py index 761d64b6..8c2260a8 100644 --- a/test/test_timeline_browser.py +++ b/test/test_timeline_browser.py @@ -51,7 +51,8 @@ def _wait_for_timeline_loaded(self, page: Page, expect_items: bool = True): page.wait_for_selector("#timeline-container", state="attached") # Wait for vis-timeline to create its DOM elements - page.wait_for_selector(".vis-timeline", timeout=10000) + # 30s timeout handles CDN cold loads (first load per xdist worker) + page.wait_for_selector(".vis-timeline", timeout=30000) # Wait for timeline items to be rendered (if expected) if expect_items: diff --git a/test/test_version_deduplication.py b/test/test_version_deduplication.py index 1c2ff2dc..0675fa85 100644 --- a/test/test_version_deduplication.py +++ b/test/test_version_deduplication.py @@ -270,3 +270,101 @@ def test_full_stutter_pair(self): content_count = html.count("Data content") assert content_count == 1, f"Expected 1 data content, got {content_count}" + + def test_user_text_message_deduplication(self): + """Test deduplication of user text messages with same timestamp but different UUIDs. + + This can happen during git branch switches where Claude Code logs the same + user input multiple times with content split across entries. + """ + from claude_code_log.models import TextContent + + timestamp = "2025-11-13T11:44:08.771Z" + + # Message 1: Has both IDE tag and actual text (2 content items) - this is the "best" + msg1 = UserTranscriptEntry( + type="user", + uuid="uuid-msg1", + parentUuid="parent-001", + timestamp=timestamp, + version="2.0.37", + isSidechain=False, + userType="external", + cwd="/test", + sessionId="session-test", + message=UserMessage( + role="user", + content=[ + TextContent( + type="text", + text="User opened test.md", + ), + TextContent( + type="text", + text="This is the actual user message content.", + ), + ], + ), + ) + + # Message 2: Only has the actual text (1 content item) + msg2 = UserTranscriptEntry( + type="user", + uuid="uuid-msg2", + parentUuid="parent-002", + timestamp=timestamp, # Same timestamp + version="2.0.37", + isSidechain=False, + userType="external", + cwd="/test", + sessionId="session-test", + message=UserMessage( + role="user", + content=[ + TextContent( + type="text", + text="This is the actual user message content.", + ), + ], + ), + ) + + # Message 3: Only has IDE tag (1 content item) + msg3 = UserTranscriptEntry( + type="user", + uuid="uuid-msg3", + parentUuid="parent-003", + timestamp=timestamp, # Same timestamp + version="2.0.37", + isSidechain=False, + userType="external", + cwd="/test", + sessionId="session-test", + message=UserMessage( + role="user", + content=[ + TextContent( + type="text", + text="User opened test.md", + ), + ], + ), + ) + + # Test all orderings - should always keep msg1 (most content items) + for messages in [ + [msg1, msg2, msg3], + [msg2, msg1, msg3], + [msg3, msg2, msg1], + ]: + deduped = deduplicate_messages(messages) + html = generate_html(deduped, "User Text Dedup Test") + + # The actual message should appear only once + content_count = html.count("This is the actual user message content.") + assert content_count == 1, ( + f"Expected 1 message content, got {content_count}" + ) + + # Should have kept msg1 which has the IDE notification + assert "test.md" in html, "Expected IDE notification to be present" diff --git a/ty.toml b/ty.toml deleted file mode 100644 index cc973da9..00000000 --- a/ty.toml +++ /dev/null @@ -1,8 +0,0 @@ -# ty type checker configuration -# https://github.com/astral-sh/ty - -[rules] -# Ignore false positives from TypeGuard not being recognized by ty -# The is_user_entry() and is_assistant_entry() functions use TypeGuard -# which pyright handles correctly, but ty doesn't support yet -"possibly-missing-attribute" = "ignore"