1414
1515from __future__ import annotations
1616
17- import dataclasses
1817import os
19- from typing import Any
20- from typing import Tuple
2118
2219from pydantic import alias_generators
2320from pydantic import BaseModel
2421from pydantic import ConfigDict
25- from typing_extensions import deprecated
2622
2723from ...evaluation .eval_case import Invocation
2824from ...evaluation .evaluation_generator import EvaluationGenerator
@@ -43,83 +39,6 @@ class GcsEvalManagers(BaseModel):
4339 eval_set_results_manager : GcsEvalSetResultsManager
4440
4541
46- @deprecated ('Use convert_session_to_eval_invocations instead.' )
47- def convert_session_to_eval_format (session : Session ) -> list [dict [str , Any ]]:
48- """Converts a session data into eval format.
49-
50- Args:
51- session: The session that should be converted.
52-
53- Returns:
54- list: A single evaluation dataset in the required format.
55- """
56- eval_case = []
57- events = session .events if session and session .events else []
58-
59- for event in events :
60- if event .author == 'user' :
61- if not event .content or not event .content .parts :
62- continue
63-
64- # Extract user query
65- content = event .content
66- parts = content .parts
67-
68- query = parts [0 ].text or ''
69-
70- # Find the corresponding tool usage or response for the query
71- expected_tool_use = []
72- intermediate_agent_responses = []
73-
74- # Check subsequent events to extract tool uses or responses for this turn.
75- for subsequent_event in events [events .index (event ) + 1 :]:
76- event_author = subsequent_event .author or 'agent'
77- if event_author == 'user' :
78- # We found an event where the author was the user. This means that a
79- # new turn has started. So close this turn here.
80- break
81-
82- if not subsequent_event .content or not subsequent_event .content .parts :
83- continue
84-
85- for subsequent_part in subsequent_event .content .parts :
86- # Some events have both function call and reference
87-
88- if subsequent_part .function_call :
89- tool_name = subsequent_part .function_call .name or ''
90- tool_input = subsequent_part .function_call .args or {}
91- expected_tool_use .append ({
92- 'tool_name' : tool_name ,
93- 'tool_input' : tool_input ,
94- })
95- elif subsequent_part .text :
96- # Also keep track of all the natural language responses that
97- # agent (or sub agents) generated.
98- intermediate_agent_responses .append (
99- {'author' : event_author , 'text' : subsequent_part .text }
100- )
101-
102- # If we are here then either we are done reading all the events or we
103- # encountered an event that had content authored by the end-user.
104- # This, basically means an end of turn.
105- # We assume that the last natural language intermediate response is the
106- # final response from the agent/model. We treat that as a reference.
107- eval_case .append ({
108- 'query' : query ,
109- 'expected_tool_use' : expected_tool_use ,
110- 'expected_intermediate_agent_responses' : intermediate_agent_responses [
111- :- 1
112- ],
113- 'reference' : (
114- intermediate_agent_responses [- 1 ]['text' ]
115- if intermediate_agent_responses
116- else ''
117- ),
118- })
119-
120- return eval_case
121-
122-
12342def convert_session_to_eval_invocations (session : Session ) -> list [Invocation ]:
12443 """Converts a session data into a list of Invocation.
12544
0 commit comments