File tree 2 files changed +5
-3
lines changed
2 files changed +5
-3
lines changed Original file line number Diff line number Diff line change @@ -109,7 +109,7 @@ def main(opt):
109
109
110
110
for model_name in models :
111
111
logging .info (f"Testing model: { model_name } " )
112
- input_text = "This is a test input for the LLM ."
112
+ input_text = "Tell me an adventure story ."
113
113
try :
114
114
result = llm_client .test (model_name , input_text )
115
115
logging .info (f"Test result for model '{ model_name } ': { result } " )
Original file line number Diff line number Diff line change 3
3
import time
4
4
import logging
5
5
import uuid
6
- from .token_calc import calculate_token_length
6
+ # from .token_calc import calculate_token_length
7
7
8
8
logging .basicConfig (level = logging .INFO , format = '%(asctime)s - %(levelname)s - %(message)s' )
9
9
@@ -24,6 +24,7 @@ def send_request_stream(self, request):
24
24
response = b""
25
25
parsed_responses = []
26
26
output_text = ""
27
+ token_count = 0
27
28
28
29
start_time = time .time ()
29
30
first_packet_time = None
@@ -42,13 +43,14 @@ def send_request_stream(self, request):
42
43
if first_packet_time is None :
43
44
first_packet_time = time .time ()
44
45
output_text += parsed_response ["data" ]["delta" ]
46
+ token_count += 3
45
47
46
48
if "data" in parsed_response and parsed_response ["data" ].get ("finish" , False ):
47
49
end_time = time .time ()
48
50
total_time = end_time - start_time
49
51
first_packet_latency = first_packet_time - start_time if first_packet_time else None
50
52
51
- token_count = calculate_token_length (output_text )
53
+ # token_count = calculate_token_length(output_text)
52
54
token_speed = token_count / total_time if total_time > 0 else 0
53
55
54
56
logging .info ("Stream reception completed." )
You can’t perform that action at this time.
0 commit comments