update to browser-use==0.1.37

vvincent1234 · vvincent1234 · commit 2538a75e982e · 2025-02-16T13:20:04.000+08:00
diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,4 @@
-browser-use==0.1.29
+browser-use==0.1.37
 pyperclip==1.9.0
 gradio==5.10.0
 json-repair
diff --git a/src/agent/custom_agent.py b/src/agent/custom_agent.py
diff --git a/src/agent/custom_message_manager.py b/src/agent/custom_message_manager.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 import logging
-from typing import List, Optional, Type
+from typing import List, Optional, Type, Dict
 
 from browser_use.agent.message_manager.service import MessageManager
 from browser_use.agent.message_manager.views import MessageHistory
@@ -38,7 +38,8 @@ def __init__(
             include_attributes: list[str] = [],
             max_error_length: int = 400,
             max_actions_per_step: int = 10,
-            message_context: Optional[str] = None
+            message_context: Optional[str] = None,
+            sensitive_data: Optional[Dict[str, str]] = None,
     ):
         super().__init__(
             llm=llm,
@@ -51,7 +52,8 @@ def __init__(
             include_attributes=include_attributes,
             max_error_length=max_error_length,
             max_actions_per_step=max_actions_per_step,
-            message_context=message_context
+            message_context=message_context,
+            sensitive_data=sensitive_data
         )
         self.agent_prompt_class = agent_prompt_class
         # Custom: Move Task info to state_message
@@ -68,7 +70,7 @@ def cut_messages(self):
         min_message_len = 2 if self.message_context is not None else 1
         
         while diff > 0 and len(self.history.messages) > min_message_len:
-            self.history.remove_message(min_message_len) # alway remove the oldest message
+            self.history.remove_message(min_message_len)  # always remove the oldest message
             diff = self.history.total_tokens - self.max_input_tokens
         
     def add_state_message(
@@ -77,6 +79,7 @@ def add_state_message(
             actions: Optional[List[ActionModel]] = None,
             result: Optional[List[ActionResult]] = None,
             step_info: Optional[AgentStepInfo] = None,
+            use_vision=True,
     ) -> None:
         """Add browser state as human message"""
         # otherwise add state message and result to next message (which will not stay in memory)
@@ -87,7 +90,7 @@ def add_state_message(
             include_attributes=self.include_attributes,
             max_error_length=self.max_error_length,
             step_info=step_info,
-        ).get_user_message()
+        ).get_user_message(use_vision)
         self._add_message_with_tokens(state_message)
     
     def _count_text_tokens(self, text: str) -> int:
@@ -114,4 +117,4 @@ def _remove_state_message_by_index(self, remove_ind=-1) -> None:
             if remove_cnt == abs(remove_ind):
                 self.history.remove_message(i)
                 break
-            i -= 1
+            i -= 1
diff --git a/src/agent/custom_prompts.py b/src/agent/custom_prompts.py
diff --git a/src/browser/custom_browser.py b/src/browser/custom_browser.py
@@ -25,57 +25,3 @@ async def new_context(
         config: BrowserContextConfig = BrowserContextConfig()
     ) -> CustomBrowserContext:
         return CustomBrowserContext(config=config, browser=self)
-    
-    async def _setup_browser_with_instance(self, playwright: Playwright) -> PlaywrightBrowser:
-        """Sets up and returns a Playwright Browser instance with anti-detection measures."""
-        if not self.config.chrome_instance_path:
-            raise ValueError('Chrome instance path is required')
-        import subprocess
-
-        import requests
-
-        try:
-            # Check if browser is already running
-            response = requests.get('http://localhost:9222/json/version', timeout=2)
-            if response.status_code == 200:
-                logger.info('Reusing existing Chrome instance')
-                browser = await playwright.chromium.connect_over_cdp(
-                    endpoint_url='http://localhost:9222',
-                    timeout=20000,  # 20 second timeout for connection
-                )
-                return browser
-        except requests.ConnectionError:
-            logger.debug('No existing Chrome instance found, starting a new one')
-
-        # Start a new Chrome instance
-        subprocess.Popen(
-            [
-                self.config.chrome_instance_path,
-                '--remote-debugging-port=9222',
-            ] + self.config.extra_chromium_args,
-            stdout=subprocess.DEVNULL,
-            stderr=subprocess.DEVNULL,
-        )
-  
-        # try to connect first in case the browser have not started
-        for _ in range(10):
-            try:
-                response = requests.get('http://localhost:9222/json/version', timeout=2)
-                if response.status_code == 200:
-                    break
-            except requests.ConnectionError:
-                pass
-            await asyncio.sleep(1)
-
-        # Attempt to connect again after starting a new instance
-        try:
-            browser = await playwright.chromium.connect_over_cdp(
-                endpoint_url='http://localhost:9222',
-                timeout=20000,  # 20 second timeout for connection
-            )
-            return browser
-        except Exception as e:
-            logger.error(f'Failed to start a new Chrome instance.: {str(e)}')
-            raise RuntimeError(
-                ' To start chrome in Debug mode, you need to close all existing Chrome instances and try again otherwise we can not connect to the instance.'
-            )
diff --git a/src/controller/custom_controller.py b/src/controller/custom_controller.py
@@ -39,33 +39,11 @@ def copy_to_clipboard(text: str):
             pyperclip.copy(text)
             return ActionResult(extracted_content=text)
 
-        @self.registry.action("Paste text from clipboard", requires_browser=True)
+        @self.registry.action("Paste text from clipboard")
         async def paste_from_clipboard(browser: BrowserContext):
             text = pyperclip.paste()
             # send text to browser
             page = await browser.get_current_page()
             await page.keyboard.type(text)
 
             return ActionResult(extracted_content=text)
-
-        @self.registry.action(
-            'Extract page content to get the pure text or markdown with links if include_links is set to true',
-            param_model=ExtractPageContentAction,
-            requires_browser=True,
-        )
-        async def extract_content(params: ExtractPageContentAction, browser: BrowserContext):
-            page = await browser.get_current_page()
-            # use jina reader
-            url = page.url
-            jina_url = f"https://r.jina.ai/{url}"
-            await page.goto(jina_url)
-            output_format = 'markdown' if params.include_links else 'text'
-            content = MainContentExtractor.extract(  # type: ignore
-                html=await page.content(),
-                output_format=output_format,
-            )
-            # go back to org url
-            await page.go_back()
-            msg = f'Extracted page content:\n {content}\n'
-            logger.info(msg)
-            return ActionResult(extracted_content=msg)
diff --git a/src/utils/deep_research.py b/src/utils/deep_research.py
@@ -15,12 +15,16 @@
 import re
 from browser_use.agent.service import Agent
 from browser_use.browser.browser import BrowserConfig, Browser
+from browser_use.agent.views import ActionResult
+from browser_use.browser.context import BrowserContext
+from browser_use.controller.service import Controller, DoneAction
+from main_content_extractor import MainContentExtractor
 from langchain.schema import SystemMessage, HumanMessage
 from json_repair import repair_json
 from src.agent.custom_prompts import CustomSystemPrompt, CustomAgentMessagePrompt
 from src.controller.custom_controller import CustomController
 from src.browser.custom_browser import CustomBrowser
-from src.browser.custom_context import BrowserContextConfig
+from src.browser.custom_context import BrowserContextConfig, BrowserContext
 from browser_use.browser.context import (
     BrowserContextConfig,
     BrowserContextWindowSize,
@@ -65,6 +69,27 @@ async def deep_research(task, llm, agent_state=None, **kwargs):
 
     controller = CustomController()
 
+    @controller.registry.action(
+        'Extract page content to get the pure markdown.',
+    )
+    async def extract_content(browser: BrowserContext):
+        page = await browser.get_current_page()
+        # use jina reader
+        url = page.url
+
+        jina_url = f"https://r.jina.ai/{url}"
+        await page.goto(jina_url)
+        output_format = 'markdown'
+        content = MainContentExtractor.extract(  # type: ignore
+            html=await page.content(),
+            output_format=output_format,
+        )
+        # go back to org url
+        await page.go_back()
+        msg = f'Extracted page content:\n {content}\n'
+        logger.info(msg)
+        return ActionResult(extracted_content=msg)
+
     search_system_prompt = f"""
     You are a **Deep Researcher**, an AI agent specializing in in-depth information gathering and research using a web browser with **automated execution capabilities**. Your expertise lies in formulating comprehensive research plans and executing them meticulously to fulfill complex user requests. You will analyze user instructions, devise a detailed research plan, and determine the necessary search queries to gather the required information.
 
@@ -200,8 +225,7 @@ async def deep_research(task, llm, agent_state=None, **kwargs):
                     system_prompt_class=CustomSystemPrompt,
                     agent_prompt_class=CustomAgentMessagePrompt,
                     max_actions_per_step=5,
-                    controller=controller,
-                    agent_state=agent_state
+                    controller=controller
                 )
                 agent_result = await agent.run(max_steps=kwargs.get("max_steps", 10))
                 query_results = [agent_result]
@@ -224,7 +248,6 @@ async def deep_research(task, llm, agent_state=None, **kwargs):
                     agent_prompt_class=CustomAgentMessagePrompt,
                     max_actions_per_step=5,
                     controller=controller,
-                    agent_state=agent_state
                 ) for task in query_tasks]
                 query_results = await asyncio.gather(
                     *[agent.run(max_steps=kwargs.get("max_steps", 10)) for agent in agents])
@@ -265,6 +288,9 @@ async def deep_research(task, llm, agent_state=None, **kwargs):
                     record_content = repair_json(record_content)
                     new_record_infos = json.loads(record_content)
                     history_infos.extend(new_record_infos)
+            if agent_state and agent_state.is_stop_requested():
+                # Stop
+                break
 
         logger.info("\nFinish Searching, Start Generating Report...")
 
diff --git a/tests/test_browser_use.py b/tests/test_browser_use.py
@@ -128,7 +128,7 @@ async def test_browser_use_custom():
 
     # llm = utils.get_llm_model(
     #     provider="google",
-    #     model_name="gemini-2.0-flash-exp",
+    #     model_name="gemini-2.0-flash",
     #     temperature=1.0,
     #     api_key=os.getenv("GOOGLE_API_KEY", "")
     # )
@@ -193,7 +193,7 @@ async def test_browser_use_custom():
             )
         )
         agent = CustomAgent(
-            task="Search 'Nvidia' and give me the first url",
+            task="Give me stock price of Tesla",
             add_infos="",  # some hints for llm to complete the task
             llm=llm,
             browser=browser,
diff --git a/webui.py b/webui.py
@@ -39,17 +39,18 @@
 # Global variables for persistence
 _global_browser = None
 _global_browser_context = None
+_global_agent = None
 
 # Create the global agent state instance
 _global_agent_state = AgentState()
 
 async def stop_agent():
     """Request the agent to stop and update UI with enhanced feedback"""
-    global _global_agent_state, _global_browser_context, _global_browser
+    global _global_agent_state, _global_browser_context, _global_browser, _global_agent
 
     try:
         # Request stop
-        _global_agent_state.request_stop()
+        _global_agent.stop()
 
         # Update UI immediately
         message = "Stop requested - the agent will halt at the next safe point"
@@ -247,7 +248,7 @@ async def run_org_agent(
         tool_calling_method
 ):
     try:
-        global _global_browser, _global_browser_context, _global_agent_state
+        global _global_browser, _global_browser_context, _global_agent_state, _global_agent
         
         # Clear any previous stop request
         _global_agent_state.clear_stop()
@@ -284,20 +285,21 @@ async def run_org_agent(
                     ),
                 )
             )
-            
-        agent = Agent(
-            task=task,
-            llm=llm,
-            use_vision=use_vision,
-            browser=_global_browser,
-            browser_context=_global_browser_context,
-            max_actions_per_step=max_actions_per_step,
-            tool_calling_method=tool_calling_method
-        )
-        history = await agent.run(max_steps=max_steps)
 
-        history_file = os.path.join(save_agent_history_path, f"{agent.agent_id}.json")
-        agent.save_history(history_file)
+        if _global_agent is None:
+            _global_agent = Agent(
+                task=task,
+                llm=llm,
+                use_vision=use_vision,
+                browser=_global_browser,
+                browser_context=_global_browser_context,
+                max_actions_per_step=max_actions_per_step,
+                tool_calling_method=tool_calling_method
+            )
+        history = await _global_agent.run(max_steps=max_steps)
+
+        history_file = os.path.join(save_agent_history_path, f"{_global_agent.agent_id}.json")
+        _global_agent.save_history(history_file)
 
         final_result = history.final_result()
         errors = history.errors()
@@ -313,6 +315,7 @@ async def run_org_agent(
         errors = str(e) + "\n" + traceback.format_exc()
         return '', errors, '', '', None, None
     finally:
+        _global_agent = None
         # Handle cleanup based on persistence configuration
         if not keep_browser_open:
             if _global_browser_context:
@@ -342,7 +345,7 @@ async def run_custom_agent(
         tool_calling_method
 ):
     try:
-        global _global_browser, _global_browser_context, _global_agent_state
+        global _global_browser, _global_browser_context, _global_agent_state, _global_agent
 
         # Clear any previous stop request
         _global_agent_state.clear_stop()
@@ -384,24 +387,24 @@ async def run_custom_agent(
             )
             
         # Create and run agent
-        agent = CustomAgent(
-            task=task,
-            add_infos=add_infos,
-            use_vision=use_vision,
-            llm=llm,
-            browser=_global_browser,
-            browser_context=_global_browser_context,
-            controller=controller,
-            system_prompt_class=CustomSystemPrompt,
-            agent_prompt_class=CustomAgentMessagePrompt,
-            max_actions_per_step=max_actions_per_step,
-            agent_state=_global_agent_state,
-            tool_calling_method=tool_calling_method
-        )
-        history = await agent.run(max_steps=max_steps)
+        if _global_agent is None:
+            _global_agent = CustomAgent(
+                task=task,
+                add_infos=add_infos,
+                use_vision=use_vision,
+                llm=llm,
+                browser=_global_browser,
+                browser_context=_global_browser_context,
+                controller=controller,
+                system_prompt_class=CustomSystemPrompt,
+                agent_prompt_class=CustomAgentMessagePrompt,
+                max_actions_per_step=max_actions_per_step,
+                tool_calling_method=tool_calling_method
+            )
+        history = await _global_agent.run(max_steps=max_steps)
 
-        history_file = os.path.join(save_agent_history_path, f"{agent.agent_id}.json")
-        agent.save_history(history_file)
+        history_file = os.path.join(save_agent_history_path, f"{_global_agent.agent_id}.json")
+        _global_agent.save_history(history_file)
 
         final_result = history.final_result()
         errors = history.errors()
@@ -417,6 +420,7 @@ async def run_custom_agent(
         errors = str(e) + "\n" + traceback.format_exc()
         return '', errors, '', '', None, None
     finally:
+        _global_agent = None
         # Handle cleanup based on persistence configuration
         if not keep_browser_open:
             if _global_browser_context:

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-browser-use==0.1.29`
	`1`	`+browser-use==0.1.37`
`2`	`2`	`pyperclip==1.9.0`
`3`	`3`	`gradio==5.10.0`
`4`	`4`	`json-repair`