yasircoder
diff --git a/‎src/agent/custom_agent.py
+184-295 b/‎src/agent/custom_agent.py
+184-295
diff --git a/‎src/agent/custom_message_manager.py
+48-60 b/‎src/agent/custom_message_manager.py
+48-60
diff --git a/‎src/agent/custom_prompts.py
+2-4 b/‎src/agent/custom_prompts.py
+2-4
diff --git a/‎src/agent/custom_views.py
+20-7 b/‎src/agent/custom_views.py
+20-7
diff --git a/‎src/browser/custom_browser.py
+3-2 b/‎src/browser/custom_browser.py
+3-2
diff --git a/‎src/browser/custom_context.py
+4-4 b/‎src/browser/custom_context.py
+4-4
diff --git a/‎src/utils/deep_research.py
+5-4 b/‎src/utils/deep_research.py
+5-4
@@ -8,14 +8,17 @@
 from browser_use.agent.prompts import SystemPrompt, AgentMessagePrompt
 from browser_use.agent.views import ActionResult, AgentStepInfo, ActionModel
 from browser_use.browser.views import BrowserState
+from browser_use.agent.message_manager.service import MessageManagerSettings
+from browser_use.agent.views import ActionResult, AgentOutput, AgentStepInfo, MessageManagerState
 from langchain_core.language_models import BaseChatModel
 from langchain_anthropic import ChatAnthropic
 from langchain_core.language_models import BaseChatModel
 from langchain_core.messages import (
-	AIMessage,
-	BaseMessage,
-	HumanMessage,
-    ToolMessage
+    AIMessage,
+    BaseMessage,
+    HumanMessage,
+    ToolMessage,
+    SystemMessage
 )
 from langchain_openai import ChatOpenAI
 from ..utils.llm import DeepSeekR1ChatOpenAI
@@ -24,55 +27,55 @@
 logger = logging.getLogger(__name__)
 
 
+class CustomMessageManagerSettings(MessageManagerSettings):
+    agent_prompt_class: Type[AgentMessagePrompt] = AgentMessagePrompt
+
+
 class CustomMessageManager(MessageManager):
     def __init__(
             self,
-            llm: BaseChatModel,
             task: str,
-            action_descriptions: str,
-            system_prompt_class: Type[SystemPrompt],
-            agent_prompt_class: Type[AgentMessagePrompt],
-            max_input_tokens: int = 128000,
-            estimated_characters_per_token: int = 3,
-            image_tokens: int = 800,
-            include_attributes: list[str] = [],
-            max_error_length: int = 400,
-            max_actions_per_step: int = 10,
-            message_context: Optional[str] = None,
-            sensitive_data: Optional[Dict[str, str]] = None,
+            system_message: SystemMessage,
+            settings: MessageManagerSettings = MessageManagerSettings(),
+            state: MessageManagerState = MessageManagerState(),
     ):
         super().__init__(
-            llm=llm,
             task=task,
-            action_descriptions=action_descriptions,
-            system_prompt_class=system_prompt_class,
-            max_input_tokens=max_input_tokens,
-            estimated_characters_per_token=estimated_characters_per_token,
-            image_tokens=image_tokens,
-            include_attributes=include_attributes,
-            max_error_length=max_error_length,
-            max_actions_per_step=max_actions_per_step,
-            message_context=message_context,
-            sensitive_data=sensitive_data
+            system_message=system_message,
+            settings=settings,
+            state=state
         )
-        self.agent_prompt_class = agent_prompt_class
-        # Custom: Move Task info to state_message
-        self.history = MessageHistory()
+
+    def _init_messages(self) -> None:
+        """Initialize the message history with system message, context, task, and other initial messages"""
         self._add_message_with_tokens(self.system_prompt)
-        
-        if self.message_context:
-            context_message = HumanMessage(content=self.message_context)
+        self.context_content = ""
+
+        if self.settings.message_context:
+            self.context_content += 'Context for the task' + self.settings.message_context
+
+        if self.settings.sensitive_data:
+            info = f'Here are placeholders for sensitive data: {list(self.settings.sensitive_data.keys())}'
+            info += 'To use them, write <secret>the placeholder name</secret>'
+            self.context_content += info
+
+        if self.settings.available_file_paths:
+            filepaths_msg = f'Here are file paths you can use: {self.settings.available_file_paths}'
+            self.context_content += filepaths_msg
+
+        if self.context_content:
+            context_message = HumanMessage(content=self.context_content)
             self._add_message_with_tokens(context_message)
 
     def cut_messages(self):
         """Get current message list, potentially trimmed to max tokens"""
-        diff = self.history.total_tokens - self.max_input_tokens
-        min_message_len = 2 if self.message_context is not None else 1
-        
-        while diff > 0 and len(self.history.messages) > min_message_len:
-            self.history.remove_message(min_message_len)  # always remove the oldest message
-            diff = self.history.total_tokens - self.max_input_tokens
-        
+        diff = self.state.history.current_tokens - self.settings.max_input_tokens
+        min_message_len = 2 if self.context_content is not None else 1
+
+        while diff > 0 and len(self.state.history.messages) > min_message_len:
+            self.state.history.remove_message(min_message_len)  # always remove the oldest message
+            diff = self.state.history.current_tokens - self.settings.max_input_tokens
+
     def add_state_message(
             self,
             state: BrowserState,
@@ -83,38 +86,23 @@ def add_state_message(
     ) -> None:
         """Add browser state as human message"""
         # otherwise add state message and result to next message (which will not stay in memory)
-        state_message = self.agent_prompt_class(
+        state_message = self.settings.agent_prompt_class(
             state,
             actions,
             result,
-            include_attributes=self.include_attributes,
-            max_error_length=self.max_error_length,
+            include_attributes=self.settings.include_attributes,
             step_info=step_info,
         ).get_user_message(use_vision)
         self._add_message_with_tokens(state_message)
-    
-    def _count_text_tokens(self, text: str) -> int:
-        if isinstance(self.llm, (ChatOpenAI, ChatAnthropic, DeepSeekR1ChatOpenAI)):
-            try:
-                tokens = self.llm.get_num_tokens(text)
-            except Exception:
-                tokens = (
-					len(text) // self.estimated_characters_per_token
-				)  # Rough estimate if no tokenizer available
-        else:
-            tokens = (
-				len(text) // self.estimated_characters_per_token
-			)  # Rough estimate if no tokenizer available
-        return tokens
 
     def _remove_state_message_by_index(self, remove_ind=-1) -> None:
         """Remove last state message from history"""
-        i = len(self.history.messages) - 1
+        i = len(self.state.history.messages) - 1
         remove_cnt = 0
         while i >= 0:
-            if isinstance(self.history.messages[i].message, HumanMessage): 
+            if isinstance(self.state.history.messages[i].message, HumanMessage):
                 remove_cnt += 1
             if remove_cnt == abs(remove_ind):
-                self.history.remove_message(i)
+                self.state.history.messages.pop(i)
                 break
             i -= 1
@@ -20,9 +20,7 @@ def important_rules(self) -> str:
    {
      "current_state": {
        "evaluation_previous_goal": "Success|Failed|Unknown - Analyze the current elements and the image to check if the previous goals/actions are successful like intended by the task. Mention if something unexpected happened. Shortly state why/why not.",
-       "important_contents": "Output important contents closely related to user\'s instruction on the current page. If there is, please output the contents. If not, please output ''.",
-       "task_progress": "Task Progress is a general summary of the current contents that have been completed. Just summarize the contents that have been actually completed based on the content at current step and the history operations. Please list each completed item individually, such as: 1. Input username. 2. Input Password. 3. Click confirm button. Please return string type not a list.",
-       "future_plans": "Based on the user's request and the current state, outline the remaining steps needed to complete the task. This should be a concise list of sub-goals yet to be performed, such as: 1. Select a date. 2. Choose a specific time slot. 3. Confirm booking. Please return string type not a list.",
+       "important_contents": "Output important contents closely related to user's instruction on the current page. If there is, please output the contents. If not, please output ''.",
        "thought": "Think about the requirements that have been completed in previous operations and the requirements that need to be completed in the next one operation. If your output of evaluation_previous_goal is 'Failed', please reflect and output your reflection here.",
        "next_goal": "Please generate a brief natural language description for the goal of your next actions based on your thought."
      },
@@ -167,7 +165,7 @@ def get_user_message(self, use_vision: bool = True) -> HumanMessage:
 
         if self.actions and self.result:
             state_description += "\n **Previous Actions** \n"
-            state_description += f'Previous step: {self.step_info.step_number-1}/{self.step_info.max_steps} \n'
+            state_description += f'Previous step: {self.step_info.step_number - 1}/{self.step_info.max_steps} \n'
             for i, result in enumerate(self.result):
                 action = self.actions[i]
                 state_description += f"Previous action {i + 1}/{len(self.result)}: {action.model_dump_json(exclude_unset=True)}\n"
 
@@ -1,7 +1,8 @@
 from dataclasses import dataclass
-from typing import Type
+from typing import Any, Dict, List, Literal, Optional, Type
+import uuid
 
-from browser_use.agent.views import AgentOutput
+from browser_use.agent.views import AgentOutput, AgentState, ActionResult, AgentHistoryList, MessageManagerState
 from browser_use.controller.registry.views import ActionModel
 from pydantic import BaseModel, ConfigDict, Field, create_model
 
@@ -13,17 +14,13 @@ class CustomAgentStepInfo:
     task: str
     add_infos: str
     memory: str
-    task_progress: str
-    future_plans: str
 
 
 class CustomAgentBrain(BaseModel):
     """Current state of the agent"""
 
     evaluation_previous_goal: str
     important_contents: str
-    task_progress: str
-    future_plans: str
     thought: str
     next_goal: str
 
@@ -38,7 +35,7 @@ class CustomAgentOutput(AgentOutput):
 
     @staticmethod
     def type_with_custom_actions(
-        custom_actions: Type[ActionModel],
+            custom_actions: Type[ActionModel],
     ) -> Type["CustomAgentOutput"]:
         """Extend actions with custom actions"""
         model_ = create_model(
@@ -52,3 +49,19 @@ def type_with_custom_actions(
         )
         model_.__doc__ = 'AgentOutput model with custom actions'
         return model_
+
+
+class CustomAgentState(BaseModel):
+    agent_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
+    n_steps: int = 1
+    consecutive_failures: int = 0
+    last_result: Optional[List['ActionResult']] = None
+    history: AgentHistoryList = Field(default_factory=lambda: AgentHistoryList(history=[]))
+    last_plan: Optional[str] = None
+    paused: bool = False
+    stopped: bool = False
+
+    message_manager_state: MessageManagerState = Field(default_factory=MessageManagerState)
+
+    last_action: Optional[List['ActionModel']] = None
+    extracted_content: str = ''
@@ -18,10 +18,11 @@
 
 logger = logging.getLogger(__name__)
 
+
 class CustomBrowser(Browser):
 
     async def new_context(
-        self,
-        config: BrowserContextConfig = BrowserContextConfig()
+            self,
+            config: BrowserContextConfig = BrowserContextConfig()
     ) -> CustomBrowserContext:
         return CustomBrowserContext(config=config, browser=self)
@@ -12,8 +12,8 @@
 
 class CustomBrowserContext(BrowserContext):
     def __init__(
-        self,
-        browser: "Browser",
-        config: BrowserContextConfig = BrowserContextConfig()
+            self,
+            browser: "Browser",
+            config: BrowserContextConfig = BrowserContextConfig()
     ):
-        super(CustomBrowserContext, self).__init__(browser=browser, config=config)
+        super(CustomBrowserContext, self).__init__(browser=browser, config=config)
@@ -310,11 +310,12 @@ async def extract_content(browser: BrowserContext):
             await browser_context.close()
         logger.info("Browser closed.")
 
+
 async def generate_final_report(task, history_infos, save_dir, llm, error_msg=None):
     """Generate report from collected information with error handling"""
     try:
         logger.info("\nAttempting to generate final report from collected data...")
-        
+
         writer_system_prompt = """
         You are a **Deep Researcher** and a professional report writer tasked with creating polished, high-quality reports that fully meet the user's needs, based on the user's instructions and the relevant information provided. You will write the report using Markdown format, ensuring it is both informative and visually appealing.
 
@@ -366,9 +367,9 @@ async def generate_final_report(task, history_infos, save_dir, llm, error_msg=No
         # Add error notification to the report
         if error_msg:
             report_content = f"## ⚠️ Research Incomplete - Partial Results\n" \
-                            f"**The research process was interrupted by an error:** {error_msg}\n\n" \
-                            f"{report_content}"
-            
+                             f"**The research process was interrupted by an error:** {error_msg}\n\n" \
+                             f"{report_content}"
+
         report_file_path = os.path.join(save_dir, "final_report.md")
         with open(report_file_path, "w", encoding="utf-8") as f:
             f.write(report_content)