diff --git a/openhands-agent-server/openhands/agent_server/event_service.py b/openhands-agent-server/openhands/agent_server/event_service.py index f77e9e07c8..faa49161bb 100644 --- a/openhands-agent-server/openhands/agent_server/event_service.py +++ b/openhands-agent-server/openhands/agent_server/event_service.py @@ -251,7 +251,8 @@ async def send_message(self, message: Message, run: bool = False): with self._conversation.state as state: run = state.execution_status != ConversationExecutionStatus.RUNNING if run: - loop.run_in_executor(None, self._conversation.run) + await loop.run_in_executor(None, self._conversation.run) + await self._publish_state_update() async def subscribe_to_events(self, subscriber: Subscriber[Event]) -> UUID: subscriber_id = self._pub_sub.subscribe(subscriber) @@ -411,6 +412,8 @@ async def _publish_state_update(self): state ) + state.events.append(state_update_event) + # Publish the state update event await self._pub_sub(state_update_event) diff --git a/openhands-sdk/openhands/sdk/conversation/state.py b/openhands-sdk/openhands/sdk/conversation/state.py index 437a420c04..4e0a521480 100644 --- a/openhands-sdk/openhands/sdk/conversation/state.py +++ b/openhands-sdk/openhands/sdk/conversation/state.py @@ -186,7 +186,9 @@ def create( state._autosave_enabled = True state.agent = resolved - state.stats = ConversationStats() + # Note: stats should NOT be reset here - they are loaded from + # persisted state. Resetting would lose all accumulated metrics + # including context_window logger.info( f"Resumed conversation {state.id} from persistent storage.\n" diff --git a/tests/sdk/conversation/local/test_stats_persist_on_resume.py b/tests/sdk/conversation/local/test_stats_persist_on_resume.py new file mode 100644 index 0000000000..220bae3f40 --- /dev/null +++ b/tests/sdk/conversation/local/test_stats_persist_on_resume.py @@ -0,0 +1,180 @@ +"""Test that conversation stats are properly persisted and restored on resume.""" + +import tempfile +import uuid +from pathlib import Path + +from pydantic import SecretStr + +from openhands.sdk import Agent, Conversation +from openhands.sdk.conversation.impl.local_conversation import LocalConversation +from openhands.sdk.conversation.state import ConversationState +from openhands.sdk.event.conversation_state import ConversationStateUpdateEvent +from openhands.sdk.llm import LLM +from openhands.sdk.llm.llm_registry import RegistryEvent +from openhands.sdk.workspace import LocalWorkspace + + +def test_stats_preserved_on_resume(): + """Test that conversation stats including context_window are preserved on resume.""" + with tempfile.TemporaryDirectory() as temp_dir: + llm = LLM( + model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm" + ) + agent = Agent(llm=llm, tools=[]) + + conv_id = uuid.UUID("12345678-1234-5678-9abc-123456789010") + persist_path_for_state = LocalConversation.get_persistence_dir( + temp_dir, conv_id + ) + + # Create initial state + state = ConversationState.create( + workspace=LocalWorkspace(working_dir="/tmp"), + persistence_dir=persist_path_for_state, + agent=agent, + id=conv_id, + ) + + # Register LLM and add metrics + state.stats.register_llm(RegistryEvent(llm=llm)) + + # Simulate LLM usage by adding token usage to metrics + metrics = state.stats.get_metrics_for_usage("test-llm") + metrics.add_token_usage( + prompt_tokens=100, + completion_tokens=50, + cache_read_tokens=20, + cache_write_tokens=10, + context_window=8192, + response_id="test-response-1", + ) + metrics.add_cost(0.05) + + # Manually save since mutating stats doesn't trigger autosave + state._save_base_state(state._fs) + + # Verify stats were recorded + assert len(state.stats.usage_to_metrics) == 1 + assert "test-llm" in state.stats.usage_to_metrics + initial_metrics = state.stats.usage_to_metrics["test-llm"] + assert initial_metrics.accumulated_cost == 0.05 + assert initial_metrics.accumulated_token_usage is not None + assert initial_metrics.accumulated_token_usage.context_window == 8192 + assert initial_metrics.accumulated_token_usage.prompt_tokens == 100 + + # Verify base_state.json was saved + assert Path(persist_path_for_state, "base_state.json").exists() + + # Now reload the state (simulating conversation resume) + # This should preserve the stats + resumed_state = ConversationState.create( + workspace=LocalWorkspace(working_dir="/tmp"), + persistence_dir=persist_path_for_state, + agent=agent, + id=conv_id, + ) + + # BUG: Stats should be preserved but they are reset to empty + # After the fix, these assertions should pass + assert len(resumed_state.stats.usage_to_metrics) == 1 + assert "test-llm" in resumed_state.stats.usage_to_metrics + resumed_metrics = resumed_state.stats.usage_to_metrics["test-llm"] + assert resumed_metrics.accumulated_cost == 0.05 + assert resumed_metrics.accumulated_token_usage is not None + assert resumed_metrics.accumulated_token_usage.context_window == 8192 + assert resumed_metrics.accumulated_token_usage.prompt_tokens == 100 + + +def test_full_state_event_includes_stats(): + """Test that full_state event includes stats with context_window.""" + with tempfile.TemporaryDirectory() as temp_dir: + llm = LLM( + model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm" + ) + agent = Agent(llm=llm, tools=[]) + + conv_id = uuid.UUID("12345678-1234-5678-9abc-123456789011") + persist_path_for_state = LocalConversation.get_persistence_dir( + temp_dir, conv_id + ) + + # Create state + state = ConversationState.create( + workspace=LocalWorkspace(working_dir="/tmp"), + persistence_dir=persist_path_for_state, + agent=agent, + id=conv_id, + ) + + # Register LLM and add metrics + state.stats.register_llm(RegistryEvent(llm=llm)) + metrics = state.stats.get_metrics_for_usage("test-llm") + metrics.add_token_usage( + prompt_tokens=200, + completion_tokens=100, + cache_read_tokens=30, + cache_write_tokens=15, + context_window=16384, + response_id="test-response-2", + ) + metrics.add_cost(0.10) + + # Create a full_state event + event = ConversationStateUpdateEvent.from_conversation_state(state) + + # Verify event contains stats + assert event.key == "full_state" + assert "stats" in event.value + assert "usage_to_metrics" in event.value["stats"] + assert "test-llm" in event.value["stats"]["usage_to_metrics"] + + # Verify context_window is included and not 0 + llm_metrics = event.value["stats"]["usage_to_metrics"]["test-llm"] + assert "accumulated_token_usage" in llm_metrics + assert llm_metrics["accumulated_token_usage"]["context_window"] == 16384 + assert llm_metrics["accumulated_token_usage"]["prompt_tokens"] == 200 + assert llm_metrics["accumulated_token_usage"]["completion_tokens"] == 100 + assert llm_metrics["accumulated_cost"] == 0.10 + + +def test_stats_in_conversation_via_full_state(): + """Test that stats are properly sent via full_state in a Conversation.""" + with tempfile.TemporaryDirectory() as temp_dir: + llm = LLM( + model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm" + ) + agent = Agent(llm=llm, tools=[]) + + conv_id = uuid.UUID("12345678-1234-5678-9abc-123456789012") + + # Create conversation + conversation = Conversation( + agent=agent, + persistence_dir=temp_dir, + workspace=LocalWorkspace(working_dir="/tmp"), + conversation_id=conv_id, + ) + + # Register LLM and add metrics + conversation._state.stats.register_llm(RegistryEvent(llm=llm)) + metrics = conversation._state.stats.get_metrics_for_usage("test-llm") + metrics.add_token_usage( + prompt_tokens=300, + completion_tokens=150, + cache_read_tokens=40, + cache_write_tokens=20, + context_window=32768, + response_id="test-response-3", + ) + + # Create full_state event + event = ConversationStateUpdateEvent.from_conversation_state( + conversation._state + ) + + # Verify stats are in the event + assert event.key == "full_state" + assert "stats" in event.value + llm_metrics = event.value["stats"]["usage_to_metrics"]["test-llm"] + assert llm_metrics["accumulated_token_usage"]["context_window"] == 32768