diff --git a/WebAgent/WebSailor/src/react_agent.py b/WebAgent/WebSailor/src/react_agent.py index 9e5e1db2..7a95051c 100644 --- a/WebAgent/WebSailor/src/react_agent.py +++ b/WebAgent/WebSailor/src/react_agent.py @@ -69,7 +69,7 @@ def call_server(self, msgs, max_tries=10): def count_tokens(self, messages, model="gpt-4o"): try: tokenizer = AutoTokenizer.from_pretrained(self.llm_local_path) - except Exception as e: + except: tokenizer = tiktoken.encoding_for_model(model) full_message = [Message(**x) for x in messages] @@ -159,4 +159,4 @@ def _run(self, data: str, model: str, user_prompt: str, **kwargs) -> List[List[M "prediction": prediction, "termination": termination } - return result + return result \ No newline at end of file diff --git a/WebAgent/WebWalker/src/agent.py b/WebAgent/WebWalker/src/agent.py index c8e0421e..fc035ee3 100644 --- a/WebAgent/WebWalker/src/agent.py +++ b/WebAgent/WebWalker/src/agent.py @@ -140,9 +140,9 @@ def _run(self, messages: List[Message], lang: Literal['en', 'zh'] = 'en', **kwar if stage1: self.momery.append(stage1+"\n") if len(self.momery) > 1: - yield [Message(role=ASSISTANT, content= "Memory:\n" + "-".join(self.momery)+"\"}")] + yield [Message(role=ASSISTANT, content= "Memory:\n" + "-".join(self.momery)+"\"")] else: - yield [Message(role=ASSISTANT, content= "Memory:\n" + "-" + self.momery[0]+"\"}")] + yield [Message(role=ASSISTANT, content= "Memory:\n" + "-" + self.momery[0]+"\"")] stage2 = self.critic_information(query, self.momery) if stage2: response = f'Final Answer: {stage2}' @@ -205,4 +205,4 @@ def _detect_tool(self, text: str) -> Tuple[bool, str, str, str]: func_name = text[i + len(special_func_token):j].strip() func_args = text[j + len(special_args_token):k].strip() text = text[:i] # Return the response before tool call, i.e., `Thought` - return (func_name is not None), func_name, func_args, text + return (func_name is not None), func_name, func_args, text \ No newline at end of file diff --git a/evaluation/evaluate_deepsearch_official.py b/evaluation/evaluate_deepsearch_official.py index 036973b1..d5aed588 100644 --- a/evaluation/evaluate_deepsearch_official.py +++ b/evaluation/evaluate_deepsearch_official.py @@ -1,7 +1,5 @@ -from pydantic import BaseModel from openai import OpenAI import concurrent.futures -from typing import Literal import litellm import os import argparse @@ -189,7 +187,7 @@ def aggregate_statistics(round1_file, round2_file, round3_file): round3_stats = single_round_statistics(round3_file) keys = round1_stats.keys() - avg_stats = {} + avg_stats = {} for key in keys: if isinstance(round1_stats[key], dict): @@ -224,7 +222,7 @@ def single_round_statistics(input_file): try: tokenizer = AutoTokenizer.from_pretrained(os.getenv("Qwen2_5_7B_PATH", "")) - except Exception as e: + except Exception: tokenizer = tiktoken.encoding_for_model("gpt-4o") for item in contents: @@ -329,7 +327,7 @@ def calculate_enhanced_statistics(round_results, round_items): try: tokenizer = AutoTokenizer.from_pretrained(os.getenv("Qwen2_5_7B_PATH", "")) - except Exception as e: + except Exception: tokenizer = tiktoken.encoding_for_model("gpt-4o") enhanced_stats = {} @@ -419,7 +417,7 @@ def calculate_best_pass_at_1(query_results): round_correct = {round_name: 0 for round_name in ["round1", "round2", "round3"]} for query, results in query_results.items(): - for round_name in ["round1", "round2", "round3"]: + for round_name in ["round1", "round2", "round3"]: if results[round_name] == "Correct": round_correct[round_name] += 1 @@ -459,10 +457,10 @@ def main(): args = parser.parse_args() dataset = args.dataset - if dataset in ["gaia", "webwalker"]: + if dataset in ["gaia", "webwalker"]: judge_model = "openai/qwen2.5-72b-instruct" judge_prompt = JUDGE_PROMPT_GAIA - elif dataset in ["xbench-deepsearch"]: + elif dataset in ["xbench-deepsearch"]: judge_prompt = JUDGE_PROMPT_XBENCH judge_model = "google/gemini-2.0-flash-001" elif dataset.startswith("browsecomp_zh"): diff --git a/inference/react_agent.py b/inference/react_agent.py index 1824666c..2e1dee85 100644 --- a/inference/react_agent.py +++ b/inference/react_agent.py @@ -25,7 +25,8 @@ from tool_visit import * OBS_START = '' -OBS_END = '\n' +OBS_END = ' +' MAX_LLM_CALL_PER_RUN = int(os.getenv('MAX_LLM_CALL_PER_RUN', 100))