[update] update benchmark

LittleMouse · LittleMouse · commit 6b285ed98dbe · 2025-05-08T17:15:50.000+08:00
diff --git a/benchmark/benchmodulellm.py b/benchmark/benchmodulellm.py
@@ -109,7 +109,7 @@ def main(opt):
             
             for model_name in models:
                 logging.info(f"Testing model: {model_name}")
-                input_text = "This is a test input for the LLM."
+                input_text = "Tell me an adventure story."
                 try:
                     result = llm_client.test(model_name, input_text)
                     logging.info(f"Test result for model '{model_name}': {result}")
diff --git a/benchmark/utils/llm.py b/benchmark/utils/llm.py
@@ -3,7 +3,7 @@
 import time
 import logging
 import uuid
-from .token_calc import calculate_token_length
+# from .token_calc import calculate_token_length
 
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
@@ -24,6 +24,7 @@ def send_request_stream(self, request):
         response = b""
         parsed_responses = []
         output_text = ""
+        token_count = 0
 
         start_time = time.time()
         first_packet_time = None
@@ -42,13 +43,14 @@ def send_request_stream(self, request):
                         if first_packet_time is None:
                             first_packet_time = time.time()
                         output_text += parsed_response["data"]["delta"]
+                        token_count += 3
 
                     if "data" in parsed_response and parsed_response["data"].get("finish", False):
                         end_time = time.time()
                         total_time = end_time - start_time
                         first_packet_latency = first_packet_time - start_time if first_packet_time else None
 
-                        token_count = calculate_token_length(output_text)
+                        # token_count = calculate_token_length(output_text)
                         token_speed = token_count / total_time if total_time > 0 else 0
 
                         logging.info("Stream reception completed.")