Skip to content

Commit b7ee26a

Browse files
committed
fix content len
1 parent d690237 commit b7ee26a

File tree

3 files changed

+53
-15
lines changed

3 files changed

+53
-15
lines changed

src/controller/custom_controller.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,6 @@ async def extract_content(params: ExtractPageContentAction, browser: BrowserCont
6666
)
6767
# go back to org url
6868
await page.go_back()
69-
msg = f'📄 Extracted page content as {output_format}\n: {content}\n'
69+
msg = f'Extracted page content:\n {content}\n'
7070
logger.info(msg)
7171
return ActionResult(extracted_content=msg)

src/utils/deep_research.py

+22-14
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
logger = logging.getLogger(__name__)
3030

3131

32-
async def deep_research(task, llm, agent_state, **kwargs):
32+
async def deep_research(task, llm, agent_state=None, **kwargs):
3333
task_id = str(uuid4())
3434
save_dir = kwargs.get("save_dir", os.path.join(f"./tmp/deep_research/{task_id}"))
3535
logger.info(f"Save Deep Research at: {save_dir}")
@@ -237,19 +237,27 @@ async def deep_research(task, llm, agent_state, **kwargs):
237237
with open(querr_save_path, "w", encoding="utf-8") as fw:
238238
fw.write(f"Query: {query_tasks[i]}\n")
239239
fw.write(query_result)
240-
history_infos_ = json.dumps(history_infos, indent=4)
241-
record_prompt = f"User Instruction:{task}. \nPrevious Recorded Information:\n {json.dumps(history_infos_)}\n Current Search Iteration: {search_iteration}\n Current Search Plan:\n{query_plan}\n Current Search Query:\n {query_tasks[i]}\n Current Search Results: {query_result}\n "
242-
record_messages.append(HumanMessage(content=record_prompt))
243-
ai_record_msg = llm.invoke(record_messages[:1] + record_messages[-1:])
244-
record_messages.append(ai_record_msg)
245-
if hasattr(ai_record_msg, "reasoning_content"):
246-
logger.info("🤯 Start Record Deep Thinking: ")
247-
logger.info(ai_record_msg.reasoning_content)
248-
logger.info("🤯 End Record Deep Thinking")
249-
record_content = ai_record_msg.content
250-
record_content = repair_json(record_content)
251-
new_record_infos = json.loads(record_content)
252-
history_infos.extend(new_record_infos)
240+
# split query result in case the content is too long
241+
query_results_split = query_result.split("Extracted page content:")
242+
for qi, query_result_ in enumerate(query_results_split):
243+
if not query_result_:
244+
continue
245+
else:
246+
# TODO: limit content lenght: 128k tokens, ~3 chars per token
247+
query_result_ = query_result_[:128000*3]
248+
history_infos_ = json.dumps(history_infos, indent=4)
249+
record_prompt = f"User Instruction:{task}. \nPrevious Recorded Information:\n {history_infos_}\n Current Search Iteration: {search_iteration}\n Current Search Plan:\n{query_plan}\n Current Search Query:\n {query_tasks[i]}\n Current Search Results: {query_result_}\n "
250+
record_messages.append(HumanMessage(content=record_prompt))
251+
ai_record_msg = llm.invoke(record_messages[:1] + record_messages[-1:])
252+
record_messages.append(ai_record_msg)
253+
if hasattr(ai_record_msg, "reasoning_content"):
254+
logger.info("🤯 Start Record Deep Thinking: ")
255+
logger.info(ai_record_msg.reasoning_content)
256+
logger.info("🤯 End Record Deep Thinking")
257+
record_content = ai_record_msg.content
258+
record_content = repair_json(record_content)
259+
new_record_infos = json.loads(record_content)
260+
history_infos.extend(new_record_infos)
253261

254262
logger.info("\nFinish Searching, Start Generating Report...")
255263

tests/test_deep_research.py

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import asyncio
2+
import os
3+
from dotenv import load_dotenv
4+
5+
load_dotenv()
6+
import sys
7+
8+
sys.path.append(".")
9+
10+
async def test_deep_research():
11+
from src.utils.deep_research import deep_research
12+
from src.utils import utils
13+
14+
task = "write a report about DeepSeek-R1, get its pdf"
15+
llm = utils.get_llm_model(
16+
provider="gemini",
17+
model_name="gemini-2.0-flash-thinking-exp-01-21",
18+
temperature=1.0,
19+
api_key=os.getenv("GOOGLE_API_KEY", "")
20+
)
21+
22+
report_content, report_file_path = await deep_research(task=task, llm=llm, agent_state=None,
23+
max_search_iterations=1,
24+
max_query_num=3,
25+
use_own_browser=False)
26+
27+
28+
29+
if __name__ == "__main__":
30+
asyncio.run(test_deep_research())

0 commit comments

Comments
 (0)