Skip to content

Commit 45168a3

Browse files
committed
update to bu=0.1.40
1 parent dd69063 commit 45168a3

11 files changed

+464
-557
lines changed

src/agent/custom_agent.py

+184-295
Large diffs are not rendered by default.

src/agent/custom_message_manager.py

+48-60
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,17 @@
88
from browser_use.agent.prompts import SystemPrompt, AgentMessagePrompt
99
from browser_use.agent.views import ActionResult, AgentStepInfo, ActionModel
1010
from browser_use.browser.views import BrowserState
11+
from browser_use.agent.message_manager.service import MessageManagerSettings
12+
from browser_use.agent.views import ActionResult, AgentOutput, AgentStepInfo, MessageManagerState
1113
from langchain_core.language_models import BaseChatModel
1214
from langchain_anthropic import ChatAnthropic
1315
from langchain_core.language_models import BaseChatModel
1416
from langchain_core.messages import (
15-
AIMessage,
16-
BaseMessage,
17-
HumanMessage,
18-
ToolMessage
17+
AIMessage,
18+
BaseMessage,
19+
HumanMessage,
20+
ToolMessage,
21+
SystemMessage
1922
)
2023
from langchain_openai import ChatOpenAI
2124
from ..utils.llm import DeepSeekR1ChatOpenAI
@@ -24,55 +27,55 @@
2427
logger = logging.getLogger(__name__)
2528

2629

30+
class CustomMessageManagerSettings(MessageManagerSettings):
31+
agent_prompt_class: Type[AgentMessagePrompt] = AgentMessagePrompt
32+
33+
2734
class CustomMessageManager(MessageManager):
2835
def __init__(
2936
self,
30-
llm: BaseChatModel,
3137
task: str,
32-
action_descriptions: str,
33-
system_prompt_class: Type[SystemPrompt],
34-
agent_prompt_class: Type[AgentMessagePrompt],
35-
max_input_tokens: int = 128000,
36-
estimated_characters_per_token: int = 3,
37-
image_tokens: int = 800,
38-
include_attributes: list[str] = [],
39-
max_error_length: int = 400,
40-
max_actions_per_step: int = 10,
41-
message_context: Optional[str] = None,
42-
sensitive_data: Optional[Dict[str, str]] = None,
38+
system_message: SystemMessage,
39+
settings: MessageManagerSettings = MessageManagerSettings(),
40+
state: MessageManagerState = MessageManagerState(),
4341
):
4442
super().__init__(
45-
llm=llm,
4643
task=task,
47-
action_descriptions=action_descriptions,
48-
system_prompt_class=system_prompt_class,
49-
max_input_tokens=max_input_tokens,
50-
estimated_characters_per_token=estimated_characters_per_token,
51-
image_tokens=image_tokens,
52-
include_attributes=include_attributes,
53-
max_error_length=max_error_length,
54-
max_actions_per_step=max_actions_per_step,
55-
message_context=message_context,
56-
sensitive_data=sensitive_data
44+
system_message=system_message,
45+
settings=settings,
46+
state=state
5747
)
58-
self.agent_prompt_class = agent_prompt_class
59-
# Custom: Move Task info to state_message
60-
self.history = MessageHistory()
48+
49+
def _init_messages(self) -> None:
50+
"""Initialize the message history with system message, context, task, and other initial messages"""
6151
self._add_message_with_tokens(self.system_prompt)
62-
63-
if self.message_context:
64-
context_message = HumanMessage(content=self.message_context)
52+
self.context_content = ""
53+
54+
if self.settings.message_context:
55+
self.context_content += 'Context for the task' + self.settings.message_context
56+
57+
if self.settings.sensitive_data:
58+
info = f'Here are placeholders for sensitive data: {list(self.settings.sensitive_data.keys())}'
59+
info += 'To use them, write <secret>the placeholder name</secret>'
60+
self.context_content += info
61+
62+
if self.settings.available_file_paths:
63+
filepaths_msg = f'Here are file paths you can use: {self.settings.available_file_paths}'
64+
self.context_content += filepaths_msg
65+
66+
if self.context_content:
67+
context_message = HumanMessage(content=self.context_content)
6568
self._add_message_with_tokens(context_message)
6669

6770
def cut_messages(self):
6871
"""Get current message list, potentially trimmed to max tokens"""
69-
diff = self.history.total_tokens - self.max_input_tokens
70-
min_message_len = 2 if self.message_context is not None else 1
71-
72-
while diff > 0 and len(self.history.messages) > min_message_len:
73-
self.history.remove_message(min_message_len) # always remove the oldest message
74-
diff = self.history.total_tokens - self.max_input_tokens
75-
72+
diff = self.state.history.current_tokens - self.settings.max_input_tokens
73+
min_message_len = 2 if self.context_content is not None else 1
74+
75+
while diff > 0 and len(self.state.history.messages) > min_message_len:
76+
self.state.history.remove_message(min_message_len) # always remove the oldest message
77+
diff = self.state.history.current_tokens - self.settings.max_input_tokens
78+
7679
def add_state_message(
7780
self,
7881
state: BrowserState,
@@ -83,38 +86,23 @@ def add_state_message(
8386
) -> None:
8487
"""Add browser state as human message"""
8588
# otherwise add state message and result to next message (which will not stay in memory)
86-
state_message = self.agent_prompt_class(
89+
state_message = self.settings.agent_prompt_class(
8790
state,
8891
actions,
8992
result,
90-
include_attributes=self.include_attributes,
91-
max_error_length=self.max_error_length,
93+
include_attributes=self.settings.include_attributes,
9294
step_info=step_info,
9395
).get_user_message(use_vision)
9496
self._add_message_with_tokens(state_message)
95-
96-
def _count_text_tokens(self, text: str) -> int:
97-
if isinstance(self.llm, (ChatOpenAI, ChatAnthropic, DeepSeekR1ChatOpenAI)):
98-
try:
99-
tokens = self.llm.get_num_tokens(text)
100-
except Exception:
101-
tokens = (
102-
len(text) // self.estimated_characters_per_token
103-
) # Rough estimate if no tokenizer available
104-
else:
105-
tokens = (
106-
len(text) // self.estimated_characters_per_token
107-
) # Rough estimate if no tokenizer available
108-
return tokens
10997

11098
def _remove_state_message_by_index(self, remove_ind=-1) -> None:
11199
"""Remove last state message from history"""
112-
i = len(self.history.messages) - 1
100+
i = len(self.state.history.messages) - 1
113101
remove_cnt = 0
114102
while i >= 0:
115-
if isinstance(self.history.messages[i].message, HumanMessage):
103+
if isinstance(self.state.history.messages[i].message, HumanMessage):
116104
remove_cnt += 1
117105
if remove_cnt == abs(remove_ind):
118-
self.history.remove_message(i)
106+
self.state.history.messages.pop(i)
119107
break
120108
i -= 1

src/agent/custom_prompts.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,7 @@ def important_rules(self) -> str:
2020
{
2121
"current_state": {
2222
"evaluation_previous_goal": "Success|Failed|Unknown - Analyze the current elements and the image to check if the previous goals/actions are successful like intended by the task. Mention if something unexpected happened. Shortly state why/why not.",
23-
"important_contents": "Output important contents closely related to user\'s instruction on the current page. If there is, please output the contents. If not, please output ''.",
24-
"task_progress": "Task Progress is a general summary of the current contents that have been completed. Just summarize the contents that have been actually completed based on the content at current step and the history operations. Please list each completed item individually, such as: 1. Input username. 2. Input Password. 3. Click confirm button. Please return string type not a list.",
25-
"future_plans": "Based on the user's request and the current state, outline the remaining steps needed to complete the task. This should be a concise list of sub-goals yet to be performed, such as: 1. Select a date. 2. Choose a specific time slot. 3. Confirm booking. Please return string type not a list.",
23+
"important_contents": "Output important contents closely related to user's instruction on the current page. If there is, please output the contents. If not, please output ''.",
2624
"thought": "Think about the requirements that have been completed in previous operations and the requirements that need to be completed in the next one operation. If your output of evaluation_previous_goal is 'Failed', please reflect and output your reflection here.",
2725
"next_goal": "Please generate a brief natural language description for the goal of your next actions based on your thought."
2826
},
@@ -167,7 +165,7 @@ def get_user_message(self, use_vision: bool = True) -> HumanMessage:
167165

168166
if self.actions and self.result:
169167
state_description += "\n **Previous Actions** \n"
170-
state_description += f'Previous step: {self.step_info.step_number-1}/{self.step_info.max_steps} \n'
168+
state_description += f'Previous step: {self.step_info.step_number - 1}/{self.step_info.max_steps} \n'
171169
for i, result in enumerate(self.result):
172170
action = self.actions[i]
173171
state_description += f"Previous action {i + 1}/{len(self.result)}: {action.model_dump_json(exclude_unset=True)}\n"

src/agent/custom_views.py

+20-7
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
from dataclasses import dataclass
2-
from typing import Type
2+
from typing import Any, Dict, List, Literal, Optional, Type
3+
import uuid
34

4-
from browser_use.agent.views import AgentOutput
5+
from browser_use.agent.views import AgentOutput, AgentState, ActionResult, AgentHistoryList, MessageManagerState
56
from browser_use.controller.registry.views import ActionModel
67
from pydantic import BaseModel, ConfigDict, Field, create_model
78

@@ -13,17 +14,13 @@ class CustomAgentStepInfo:
1314
task: str
1415
add_infos: str
1516
memory: str
16-
task_progress: str
17-
future_plans: str
1817

1918

2019
class CustomAgentBrain(BaseModel):
2120
"""Current state of the agent"""
2221

2322
evaluation_previous_goal: str
2423
important_contents: str
25-
task_progress: str
26-
future_plans: str
2724
thought: str
2825
next_goal: str
2926

@@ -38,7 +35,7 @@ class CustomAgentOutput(AgentOutput):
3835

3936
@staticmethod
4037
def type_with_custom_actions(
41-
custom_actions: Type[ActionModel],
38+
custom_actions: Type[ActionModel],
4239
) -> Type["CustomAgentOutput"]:
4340
"""Extend actions with custom actions"""
4441
model_ = create_model(
@@ -52,3 +49,19 @@ def type_with_custom_actions(
5249
)
5350
model_.__doc__ = 'AgentOutput model with custom actions'
5451
return model_
52+
53+
54+
class CustomAgentState(BaseModel):
55+
agent_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
56+
n_steps: int = 1
57+
consecutive_failures: int = 0
58+
last_result: Optional[List['ActionResult']] = None
59+
history: AgentHistoryList = Field(default_factory=lambda: AgentHistoryList(history=[]))
60+
last_plan: Optional[str] = None
61+
paused: bool = False
62+
stopped: bool = False
63+
64+
message_manager_state: MessageManagerState = Field(default_factory=MessageManagerState)
65+
66+
last_action: Optional[List['ActionModel']] = None
67+
extracted_content: str = ''

src/browser/custom_browser.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,11 @@
1818

1919
logger = logging.getLogger(__name__)
2020

21+
2122
class CustomBrowser(Browser):
2223

2324
async def new_context(
24-
self,
25-
config: BrowserContextConfig = BrowserContextConfig()
25+
self,
26+
config: BrowserContextConfig = BrowserContextConfig()
2627
) -> CustomBrowserContext:
2728
return CustomBrowserContext(config=config, browser=self)

src/browser/custom_context.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212

1313
class CustomBrowserContext(BrowserContext):
1414
def __init__(
15-
self,
16-
browser: "Browser",
17-
config: BrowserContextConfig = BrowserContextConfig()
15+
self,
16+
browser: "Browser",
17+
config: BrowserContextConfig = BrowserContextConfig()
1818
):
19-
super(CustomBrowserContext, self).__init__(browser=browser, config=config)
19+
super(CustomBrowserContext, self).__init__(browser=browser, config=config)

src/utils/deep_research.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -310,11 +310,12 @@ async def extract_content(browser: BrowserContext):
310310
await browser_context.close()
311311
logger.info("Browser closed.")
312312

313+
313314
async def generate_final_report(task, history_infos, save_dir, llm, error_msg=None):
314315
"""Generate report from collected information with error handling"""
315316
try:
316317
logger.info("\nAttempting to generate final report from collected data...")
317-
318+
318319
writer_system_prompt = """
319320
You are a **Deep Researcher** and a professional report writer tasked with creating polished, high-quality reports that fully meet the user's needs, based on the user's instructions and the relevant information provided. You will write the report using Markdown format, ensuring it is both informative and visually appealing.
320321
@@ -366,9 +367,9 @@ async def generate_final_report(task, history_infos, save_dir, llm, error_msg=No
366367
# Add error notification to the report
367368
if error_msg:
368369
report_content = f"## ⚠️ Research Incomplete - Partial Results\n" \
369-
f"**The research process was interrupted by an error:** {error_msg}\n\n" \
370-
f"{report_content}"
371-
370+
f"**The research process was interrupted by an error:** {error_msg}\n\n" \
371+
f"{report_content}"
372+
372373
report_file_path = os.path.join(save_dir, "final_report.md")
373374
with open(report_file_path, "w", encoding="utf-8") as f:
374375
f.write(report_content)

0 commit comments

Comments
 (0)