@@ -426,3 +426,146 @@ def test_file_upload_endpoint_with_live_server(server_env, tmp_path: Path):
426426 assert check_result .stdout == test_content , (
427427 f"File content mismatch. Expected:\n { test_content } \n Got:\n { check_result .stdout } "
428428 )
429+
430+
431+ def test_conversation_stats_with_live_server (
432+ server_env , monkeypatch : pytest .MonkeyPatch
433+ ):
434+ """Integration test verifying conversation stats are correctly propagated.
435+
436+ This test validates the fix for issue #1041 where accumulated cost was
437+ always 0. It checks:
438+ 1. RemoteConversation reads stats from the correct 'stats' field (not
439+ 'conversation_stats')
440+ 2. Stats updates are propagated after run() completes
441+ 3. Accumulated cost and token usage are correctly tracked
442+
443+ This is a regression test for the field mismatch and state update issues.
444+ """
445+
446+ def fake_completion_with_cost (
447+ self ,
448+ messages ,
449+ tools ,
450+ return_metrics = False ,
451+ add_security_risk_prediction = False ,
452+ ** kwargs ,
453+ ): # type: ignore[no-untyped-def]
454+ from openhands .sdk .llm .llm_response import LLMResponse
455+ from openhands .sdk .llm .message import Message
456+ from openhands .sdk .llm .utils .metrics import TokenUsage
457+
458+ # Create a minimal ModelResponse with a single assistant message
459+ litellm_msg = LiteLLMMessage .model_validate (
460+ {"role" : "assistant" , "content" : "Test response" }
461+ )
462+ raw_response = ModelResponse (
463+ id = "test-resp-with-cost" ,
464+ created = int (time .time ()),
465+ model = "test-model" ,
466+ choices = [Choices (index = 0 , finish_reason = "stop" , message = litellm_msg )],
467+ )
468+
469+ # Convert to OpenHands Message
470+ message = Message .from_llm_chat_message (litellm_msg )
471+
472+ # Simulate cost accumulation in the LLM's metrics
473+ # The LLM should have metrics that track cost
474+ from openhands .sdk .llm .utils .metrics import MetricsSnapshot
475+
476+ if self .metrics :
477+ self .metrics .add_cost (0.0025 )
478+ self .metrics .add_token_usage (
479+ prompt_tokens = 100 ,
480+ completion_tokens = 50 ,
481+ cache_read_tokens = 0 ,
482+ cache_write_tokens = 0 ,
483+ context_window = 8192 ,
484+ response_id = "test-resp-with-cost" ,
485+ reasoning_tokens = 0 ,
486+ )
487+ metrics_snapshot = self .metrics .get_snapshot ()
488+ else :
489+ # Create a default metrics snapshot if no metrics exist
490+ metrics_snapshot = MetricsSnapshot (
491+ model_name = self .model ,
492+ accumulated_cost = 0.0025 ,
493+ accumulated_token_usage = TokenUsage (
494+ model = self .model ,
495+ prompt_tokens = 100 ,
496+ completion_tokens = 50 ,
497+ response_id = "test-resp-with-cost" ,
498+ ),
499+ )
500+
501+ return LLMResponse (
502+ message = message , metrics = metrics_snapshot , raw_response = raw_response
503+ )
504+
505+ # Patch LLM.completion with our cost-tracking version
506+ monkeypatch .setattr (LLM , "completion" , fake_completion_with_cost , raising = True )
507+
508+ # Create an Agent with a real LLM object
509+ llm = LLM (model = "gpt-4" , api_key = SecretStr ("test" ))
510+ agent = Agent (llm = llm , tools = [])
511+
512+ # Create conversation via factory pointing at the live server
513+ workspace = RemoteWorkspace (
514+ host = server_env ["host" ], working_dir = "/tmp/workspace/project"
515+ )
516+ conv : RemoteConversation = Conversation (agent = agent , workspace = workspace )
517+
518+ # Verify initial stats are empty/zero
519+ initial_stats = conv .conversation_stats
520+ assert initial_stats is not None
521+ initial_cost = initial_stats .get_combined_metrics ().accumulated_cost
522+ assert initial_cost == 0.0 , f"Expected initial cost to be 0.0, got { initial_cost } "
523+
524+ # Send a message and run the conversation
525+ conv .send_message ("Test message" )
526+ conv .run ()
527+
528+ # Wait for the conversation to finish and stats to update
529+ # The fix ensures stats are published after run() completes
530+ max_attempts = 50
531+ for attempt in range (max_attempts ):
532+ try :
533+ stats = conv .conversation_stats
534+ combined_metrics = stats .get_combined_metrics ()
535+ accumulated_cost = combined_metrics .accumulated_cost
536+
537+ # Check if we got non-zero cost (stats have been updated)
538+ if accumulated_cost > 0 :
539+ # Verify the stats are correctly populated
540+ assert accumulated_cost > 0 , (
541+ f"Expected accumulated_cost > 0 after run(), got { accumulated_cost } "
542+ )
543+
544+ # Verify token usage is tracked
545+ if combined_metrics .accumulated_token_usage :
546+ assert combined_metrics .accumulated_token_usage .prompt_tokens > 0 , (
547+ "Expected prompt_tokens > 0 after run()"
548+ )
549+ assert (
550+ combined_metrics .accumulated_token_usage .completion_tokens > 0
551+ ), "Expected completion_tokens > 0 after run()"
552+
553+ # Success - we got updated stats
554+ break
555+ except (KeyError , AttributeError , AssertionError ) as e :
556+ if attempt == max_attempts - 1 :
557+ raise AssertionError (
558+ f"Stats not properly updated after { max_attempts } attempts. "
559+ f"Last error: { e } "
560+ )
561+ time .sleep (0.1 )
562+
563+ # Final verification: stats are read from 'stats' field, not 'conversation_stats'
564+ info = conv .state ._get_conversation_info ()
565+ assert "stats" in info , "Expected 'stats' field in conversation info"
566+
567+ # Verify the RemoteConversation is correctly reading from 'stats'
568+ stats_from_field = info .get ("stats" , {})
569+ assert stats_from_field , "Expected non-empty stats in the 'stats' field after run()"
570+
571+ conv .close ()
0 commit comments