Skip to content

Commit 2538a75

Browse files
committed
update to browser-use==0.1.37
1 parent 70ac1df commit 2538a75

9 files changed

+327
-369
lines changed

requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
browser-use==0.1.29
1+
browser-use==0.1.37
22
pyperclip==1.9.0
33
gradio==5.10.0
44
json-repair

src/agent/custom_agent.py

+143-122
Large diffs are not rendered by default.

src/agent/custom_message_manager.py

+9-6
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from __future__ import annotations
22

33
import logging
4-
from typing import List, Optional, Type
4+
from typing import List, Optional, Type, Dict
55

66
from browser_use.agent.message_manager.service import MessageManager
77
from browser_use.agent.message_manager.views import MessageHistory
@@ -38,7 +38,8 @@ def __init__(
3838
include_attributes: list[str] = [],
3939
max_error_length: int = 400,
4040
max_actions_per_step: int = 10,
41-
message_context: Optional[str] = None
41+
message_context: Optional[str] = None,
42+
sensitive_data: Optional[Dict[str, str]] = None,
4243
):
4344
super().__init__(
4445
llm=llm,
@@ -51,7 +52,8 @@ def __init__(
5152
include_attributes=include_attributes,
5253
max_error_length=max_error_length,
5354
max_actions_per_step=max_actions_per_step,
54-
message_context=message_context
55+
message_context=message_context,
56+
sensitive_data=sensitive_data
5557
)
5658
self.agent_prompt_class = agent_prompt_class
5759
# Custom: Move Task info to state_message
@@ -68,7 +70,7 @@ def cut_messages(self):
6870
min_message_len = 2 if self.message_context is not None else 1
6971

7072
while diff > 0 and len(self.history.messages) > min_message_len:
71-
self.history.remove_message(min_message_len) # alway remove the oldest message
73+
self.history.remove_message(min_message_len) # always remove the oldest message
7274
diff = self.history.total_tokens - self.max_input_tokens
7375

7476
def add_state_message(
@@ -77,6 +79,7 @@ def add_state_message(
7779
actions: Optional[List[ActionModel]] = None,
7880
result: Optional[List[ActionResult]] = None,
7981
step_info: Optional[AgentStepInfo] = None,
82+
use_vision=True,
8083
) -> None:
8184
"""Add browser state as human message"""
8285
# otherwise add state message and result to next message (which will not stay in memory)
@@ -87,7 +90,7 @@ def add_state_message(
8790
include_attributes=self.include_attributes,
8891
max_error_length=self.max_error_length,
8992
step_info=step_info,
90-
).get_user_message()
93+
).get_user_message(use_vision)
9194
self._add_message_with_tokens(state_message)
9295

9396
def _count_text_tokens(self, text: str) -> int:
@@ -114,4 +117,4 @@ def _remove_state_message_by_index(self, remove_ind=-1) -> None:
114117
if remove_cnt == abs(remove_ind):
115118
self.history.remove_message(i)
116119
break
117-
i -= 1
120+
i -= 1

src/agent/custom_prompts.py

+103-123
Large diffs are not rendered by default.

src/browser/custom_browser.py

-54
Original file line numberDiff line numberDiff line change
@@ -25,57 +25,3 @@ async def new_context(
2525
config: BrowserContextConfig = BrowserContextConfig()
2626
) -> CustomBrowserContext:
2727
return CustomBrowserContext(config=config, browser=self)
28-
29-
async def _setup_browser_with_instance(self, playwright: Playwright) -> PlaywrightBrowser:
30-
"""Sets up and returns a Playwright Browser instance with anti-detection measures."""
31-
if not self.config.chrome_instance_path:
32-
raise ValueError('Chrome instance path is required')
33-
import subprocess
34-
35-
import requests
36-
37-
try:
38-
# Check if browser is already running
39-
response = requests.get('http://localhost:9222/json/version', timeout=2)
40-
if response.status_code == 200:
41-
logger.info('Reusing existing Chrome instance')
42-
browser = await playwright.chromium.connect_over_cdp(
43-
endpoint_url='http://localhost:9222',
44-
timeout=20000, # 20 second timeout for connection
45-
)
46-
return browser
47-
except requests.ConnectionError:
48-
logger.debug('No existing Chrome instance found, starting a new one')
49-
50-
# Start a new Chrome instance
51-
subprocess.Popen(
52-
[
53-
self.config.chrome_instance_path,
54-
'--remote-debugging-port=9222',
55-
] + self.config.extra_chromium_args,
56-
stdout=subprocess.DEVNULL,
57-
stderr=subprocess.DEVNULL,
58-
)
59-
60-
# try to connect first in case the browser have not started
61-
for _ in range(10):
62-
try:
63-
response = requests.get('http://localhost:9222/json/version', timeout=2)
64-
if response.status_code == 200:
65-
break
66-
except requests.ConnectionError:
67-
pass
68-
await asyncio.sleep(1)
69-
70-
# Attempt to connect again after starting a new instance
71-
try:
72-
browser = await playwright.chromium.connect_over_cdp(
73-
endpoint_url='http://localhost:9222',
74-
timeout=20000, # 20 second timeout for connection
75-
)
76-
return browser
77-
except Exception as e:
78-
logger.error(f'Failed to start a new Chrome instance.: {str(e)}')
79-
raise RuntimeError(
80-
' To start chrome in Debug mode, you need to close all existing Chrome instances and try again otherwise we can not connect to the instance.'
81-
)

src/controller/custom_controller.py

+1-23
Original file line numberDiff line numberDiff line change
@@ -39,33 +39,11 @@ def copy_to_clipboard(text: str):
3939
pyperclip.copy(text)
4040
return ActionResult(extracted_content=text)
4141

42-
@self.registry.action("Paste text from clipboard", requires_browser=True)
42+
@self.registry.action("Paste text from clipboard")
4343
async def paste_from_clipboard(browser: BrowserContext):
4444
text = pyperclip.paste()
4545
# send text to browser
4646
page = await browser.get_current_page()
4747
await page.keyboard.type(text)
4848

4949
return ActionResult(extracted_content=text)
50-
51-
@self.registry.action(
52-
'Extract page content to get the pure text or markdown with links if include_links is set to true',
53-
param_model=ExtractPageContentAction,
54-
requires_browser=True,
55-
)
56-
async def extract_content(params: ExtractPageContentAction, browser: BrowserContext):
57-
page = await browser.get_current_page()
58-
# use jina reader
59-
url = page.url
60-
jina_url = f"https://r.jina.ai/{url}"
61-
await page.goto(jina_url)
62-
output_format = 'markdown' if params.include_links else 'text'
63-
content = MainContentExtractor.extract( # type: ignore
64-
html=await page.content(),
65-
output_format=output_format,
66-
)
67-
# go back to org url
68-
await page.go_back()
69-
msg = f'Extracted page content:\n {content}\n'
70-
logger.info(msg)
71-
return ActionResult(extracted_content=msg)

src/utils/deep_research.py

+30-4
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,16 @@
1515
import re
1616
from browser_use.agent.service import Agent
1717
from browser_use.browser.browser import BrowserConfig, Browser
18+
from browser_use.agent.views import ActionResult
19+
from browser_use.browser.context import BrowserContext
20+
from browser_use.controller.service import Controller, DoneAction
21+
from main_content_extractor import MainContentExtractor
1822
from langchain.schema import SystemMessage, HumanMessage
1923
from json_repair import repair_json
2024
from src.agent.custom_prompts import CustomSystemPrompt, CustomAgentMessagePrompt
2125
from src.controller.custom_controller import CustomController
2226
from src.browser.custom_browser import CustomBrowser
23-
from src.browser.custom_context import BrowserContextConfig
27+
from src.browser.custom_context import BrowserContextConfig, BrowserContext
2428
from browser_use.browser.context import (
2529
BrowserContextConfig,
2630
BrowserContextWindowSize,
@@ -65,6 +69,27 @@ async def deep_research(task, llm, agent_state=None, **kwargs):
6569

6670
controller = CustomController()
6771

72+
@controller.registry.action(
73+
'Extract page content to get the pure markdown.',
74+
)
75+
async def extract_content(browser: BrowserContext):
76+
page = await browser.get_current_page()
77+
# use jina reader
78+
url = page.url
79+
80+
jina_url = f"https://r.jina.ai/{url}"
81+
await page.goto(jina_url)
82+
output_format = 'markdown'
83+
content = MainContentExtractor.extract( # type: ignore
84+
html=await page.content(),
85+
output_format=output_format,
86+
)
87+
# go back to org url
88+
await page.go_back()
89+
msg = f'Extracted page content:\n {content}\n'
90+
logger.info(msg)
91+
return ActionResult(extracted_content=msg)
92+
6893
search_system_prompt = f"""
6994
You are a **Deep Researcher**, an AI agent specializing in in-depth information gathering and research using a web browser with **automated execution capabilities**. Your expertise lies in formulating comprehensive research plans and executing them meticulously to fulfill complex user requests. You will analyze user instructions, devise a detailed research plan, and determine the necessary search queries to gather the required information.
7095
@@ -200,8 +225,7 @@ async def deep_research(task, llm, agent_state=None, **kwargs):
200225
system_prompt_class=CustomSystemPrompt,
201226
agent_prompt_class=CustomAgentMessagePrompt,
202227
max_actions_per_step=5,
203-
controller=controller,
204-
agent_state=agent_state
228+
controller=controller
205229
)
206230
agent_result = await agent.run(max_steps=kwargs.get("max_steps", 10))
207231
query_results = [agent_result]
@@ -224,7 +248,6 @@ async def deep_research(task, llm, agent_state=None, **kwargs):
224248
agent_prompt_class=CustomAgentMessagePrompt,
225249
max_actions_per_step=5,
226250
controller=controller,
227-
agent_state=agent_state
228251
) for task in query_tasks]
229252
query_results = await asyncio.gather(
230253
*[agent.run(max_steps=kwargs.get("max_steps", 10)) for agent in agents])
@@ -265,6 +288,9 @@ async def deep_research(task, llm, agent_state=None, **kwargs):
265288
record_content = repair_json(record_content)
266289
new_record_infos = json.loads(record_content)
267290
history_infos.extend(new_record_infos)
291+
if agent_state and agent_state.is_stop_requested():
292+
# Stop
293+
break
268294

269295
logger.info("\nFinish Searching, Start Generating Report...")
270296

tests/test_browser_use.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ async def test_browser_use_custom():
128128

129129
# llm = utils.get_llm_model(
130130
# provider="google",
131-
# model_name="gemini-2.0-flash-exp",
131+
# model_name="gemini-2.0-flash",
132132
# temperature=1.0,
133133
# api_key=os.getenv("GOOGLE_API_KEY", "")
134134
# )
@@ -193,7 +193,7 @@ async def test_browser_use_custom():
193193
)
194194
)
195195
agent = CustomAgent(
196-
task="Search 'Nvidia' and give me the first url",
196+
task="Give me stock price of Tesla",
197197
add_infos="", # some hints for llm to complete the task
198198
llm=llm,
199199
browser=browser,

webui.py

+38-34
Original file line numberDiff line numberDiff line change
@@ -39,17 +39,18 @@
3939
# Global variables for persistence
4040
_global_browser = None
4141
_global_browser_context = None
42+
_global_agent = None
4243

4344
# Create the global agent state instance
4445
_global_agent_state = AgentState()
4546

4647
async def stop_agent():
4748
"""Request the agent to stop and update UI with enhanced feedback"""
48-
global _global_agent_state, _global_browser_context, _global_browser
49+
global _global_agent_state, _global_browser_context, _global_browser, _global_agent
4950

5051
try:
5152
# Request stop
52-
_global_agent_state.request_stop()
53+
_global_agent.stop()
5354

5455
# Update UI immediately
5556
message = "Stop requested - the agent will halt at the next safe point"
@@ -247,7 +248,7 @@ async def run_org_agent(
247248
tool_calling_method
248249
):
249250
try:
250-
global _global_browser, _global_browser_context, _global_agent_state
251+
global _global_browser, _global_browser_context, _global_agent_state, _global_agent
251252

252253
# Clear any previous stop request
253254
_global_agent_state.clear_stop()
@@ -284,20 +285,21 @@ async def run_org_agent(
284285
),
285286
)
286287
)
287-
288-
agent = Agent(
289-
task=task,
290-
llm=llm,
291-
use_vision=use_vision,
292-
browser=_global_browser,
293-
browser_context=_global_browser_context,
294-
max_actions_per_step=max_actions_per_step,
295-
tool_calling_method=tool_calling_method
296-
)
297-
history = await agent.run(max_steps=max_steps)
298288

299-
history_file = os.path.join(save_agent_history_path, f"{agent.agent_id}.json")
300-
agent.save_history(history_file)
289+
if _global_agent is None:
290+
_global_agent = Agent(
291+
task=task,
292+
llm=llm,
293+
use_vision=use_vision,
294+
browser=_global_browser,
295+
browser_context=_global_browser_context,
296+
max_actions_per_step=max_actions_per_step,
297+
tool_calling_method=tool_calling_method
298+
)
299+
history = await _global_agent.run(max_steps=max_steps)
300+
301+
history_file = os.path.join(save_agent_history_path, f"{_global_agent.agent_id}.json")
302+
_global_agent.save_history(history_file)
301303

302304
final_result = history.final_result()
303305
errors = history.errors()
@@ -313,6 +315,7 @@ async def run_org_agent(
313315
errors = str(e) + "\n" + traceback.format_exc()
314316
return '', errors, '', '', None, None
315317
finally:
318+
_global_agent = None
316319
# Handle cleanup based on persistence configuration
317320
if not keep_browser_open:
318321
if _global_browser_context:
@@ -342,7 +345,7 @@ async def run_custom_agent(
342345
tool_calling_method
343346
):
344347
try:
345-
global _global_browser, _global_browser_context, _global_agent_state
348+
global _global_browser, _global_browser_context, _global_agent_state, _global_agent
346349

347350
# Clear any previous stop request
348351
_global_agent_state.clear_stop()
@@ -384,24 +387,24 @@ async def run_custom_agent(
384387
)
385388

386389
# Create and run agent
387-
agent = CustomAgent(
388-
task=task,
389-
add_infos=add_infos,
390-
use_vision=use_vision,
391-
llm=llm,
392-
browser=_global_browser,
393-
browser_context=_global_browser_context,
394-
controller=controller,
395-
system_prompt_class=CustomSystemPrompt,
396-
agent_prompt_class=CustomAgentMessagePrompt,
397-
max_actions_per_step=max_actions_per_step,
398-
agent_state=_global_agent_state,
399-
tool_calling_method=tool_calling_method
400-
)
401-
history = await agent.run(max_steps=max_steps)
390+
if _global_agent is None:
391+
_global_agent = CustomAgent(
392+
task=task,
393+
add_infos=add_infos,
394+
use_vision=use_vision,
395+
llm=llm,
396+
browser=_global_browser,
397+
browser_context=_global_browser_context,
398+
controller=controller,
399+
system_prompt_class=CustomSystemPrompt,
400+
agent_prompt_class=CustomAgentMessagePrompt,
401+
max_actions_per_step=max_actions_per_step,
402+
tool_calling_method=tool_calling_method
403+
)
404+
history = await _global_agent.run(max_steps=max_steps)
402405

403-
history_file = os.path.join(save_agent_history_path, f"{agent.agent_id}.json")
404-
agent.save_history(history_file)
406+
history_file = os.path.join(save_agent_history_path, f"{_global_agent.agent_id}.json")
407+
_global_agent.save_history(history_file)
405408

406409
final_result = history.final_result()
407410
errors = history.errors()
@@ -417,6 +420,7 @@ async def run_custom_agent(
417420
errors = str(e) + "\n" + traceback.format_exc()
418421
return '', errors, '', '', None, None
419422
finally:
423+
_global_agent = None
420424
# Handle cleanup based on persistence configuration
421425
if not keep_browser_open:
422426
if _global_browser_context:

0 commit comments

Comments
 (0)