Reflection step for agent + claude 3.7 + extender recursion (#689)

kopekC · web-flow · commit 40f5a8f85fa7 · 2025-02-27T17:01:27.000-05:00
# Motivation

&lt;!-- Why is this change necessary? --&gt;

# Content

&lt;!-- Please include a summary of the change --&gt;

# Testing

&lt;!-- How was the change tested? --&gt;

# Please check the following before marking your PR as ready for review

- [x] I have added tests for my changes
- [x] I have updated the documentation or added new documentation as
needed
diff --git a/src/codegen/agents/code_agent.py b/src/codegen/agents/code_agent.py
@@ -13,7 +13,7 @@
 class CodeAgent:
     """Agent for interacting with a codebase."""
 
-    def __init__(self, codebase: "Codebase", model_provider: str = "anthropic", model_name: str = "claude-3-5-sonnet-latest", memory: bool = True, tools: Optional[list[BaseTool]] = None, **kwargs):
+    def __init__(self, codebase: "Codebase", model_provider: str = "anthropic", model_name: str = "claude-3-7-sonnet-latest", memory: bool = True, tools: Optional[list[BaseTool]] = None, **kwargs):
         """Initialize a CodeAgent.
 
         Args:
@@ -49,7 +49,7 @@ def run(self, prompt: str, thread_id: Optional[str] = None) -> str:
         input = {"messages": [("user", prompt)]}
 
         # we stream the steps instead of invoke because it allows us to access intermediate nodes
-        stream = self.agent.stream(input, config={"configurable": {"thread_id": thread_id}}, stream_mode="values")
+        stream = self.agent.stream(input, config={"configurable": {"thread_id": thread_id}, "recursion_limit": 100}, stream_mode="values")
 
         for s in stream:
             message = s["messages"][-1]
diff --git a/src/codegen/extensions/langchain/agent.py b/src/codegen/extensions/langchain/agent.py
@@ -15,6 +15,7 @@
     DeleteFileTool,
     ListDirectoryTool,
     MoveSymbolTool,
+    ReflectionTool,
     RelaceEditTool,
     RenameFileTool,
     ReplacementEditTool,
@@ -31,7 +32,7 @@
 def create_codebase_agent(
     codebase: "Codebase",
     model_provider: str = "anthropic",
-    model_name: str = "claude-3-5-sonnet-latest",
+    model_name: str = "claude-3-7-sonnet-latest",
     system_message: SystemMessage = SystemMessage(REASONER_SYSTEM_MESSAGE),
     memory: bool = True,
     debug: bool = False,
@@ -71,6 +72,7 @@ def create_codebase_agent(
         # SemanticEditTool(codebase),
         ReplacementEditTool(codebase),
         RelaceEditTool(codebase),
+        ReflectionTool(codebase),
         # SemanticSearchTool(codebase),
         # =====[ Github Integration ]=====
         # Enable Github integration
diff --git a/src/codegen/extensions/langchain/tools.py b/src/codegen/extensions/langchain/tools.py
@@ -16,6 +16,7 @@
     linear_search_issues_tool,
 )
 from codegen.extensions.tools.link_annotation import add_links_to_message
+from codegen.extensions.tools.reflection import perform_reflection
 from codegen.extensions.tools.relace_edit import relace_edit
 from codegen.extensions.tools.replacement_edit import replacement_edit
 from codegen.extensions.tools.reveal_symbol import reveal_symbol
@@ -742,6 +743,7 @@ def get_workspace_tools(codebase: Codebase) -> list["BaseTool"]:
         SemanticSearchTool(codebase),
         ViewFileTool(codebase),
         RelaceEditTool(codebase),
+        ReflectionTool(codebase),
         # Github
         GithubCreatePRTool(codebase),
         GithubCreatePRCommentTool(codebase),
@@ -842,3 +844,39 @@ def __init__(self, codebase: Codebase) -> None:
     def _run(self, filepath: str, edit_snippet: str) -> str:
         result = relace_edit(self.codebase, filepath, edit_snippet)
         return result.render()
+
+
+class ReflectionInput(BaseModel):
+    """Input for agent reflection."""
+
+    context_summary: str = Field(..., description="Summary of the current context and problem being solved")
+    findings_so_far: str = Field(..., description="Key information and insights gathered so far")
+    current_challenges: str = Field(default="", description="Current obstacles or questions that need to be addressed")
+    reflection_focus: Optional[str] = Field(default=None, description="Optional specific aspect to focus reflection on (e.g., 'architecture', 'performance', 'next steps')")
+
+
+class ReflectionTool(BaseTool):
+    """Tool for agent self-reflection and planning."""
+
+    name: ClassVar[str] = "reflect"
+    description: ClassVar[str] = """
+    Reflect on current understanding and plan next steps.
+    This tool helps organize thoughts, identify knowledge gaps, and create a strategic plan.
+    Use this when you need to consolidate information or when facing complex decisions.
+    """
+    args_schema: ClassVar[type[BaseModel]] = ReflectionInput
+    codebase: Codebase = Field(exclude=True)
+
+    def __init__(self, codebase: Codebase) -> None:
+        super().__init__(codebase=codebase)
+
+    def _run(
+        self,
+        context_summary: str,
+        findings_so_far: str,
+        current_challenges: str = "",
+        reflection_focus: Optional[str] = None,
+    ) -> str:
+        result = perform_reflection(context_summary=context_summary, findings_so_far=findings_so_far, current_challenges=current_challenges, reflection_focus=reflection_focus, codebase=self.codebase)
+
+        return result.render()
diff --git a/src/codegen/extensions/swebench/harness.py b/src/codegen/extensions/swebench/harness.py
@@ -79,7 +79,13 @@ def run_agent_on_entry(entry: SweBenchExample, codebase: Codebase | None = None)
 Filenames, directory names, file contents, etc may be different than what you're used to.
 
 Propose changes to update the repo to fix the problem below.
+*** IMPORTANT: *** DO NOT MODIFY ANY TESTS!
+*** IMPORTANT: *** DO NOT ADD ANY TESTS!
 
+Before commiting to do any modifications, double check your work with the Reflection tool.
+you can also use that tool to check your work after you think you are done.
+if you ever get stuck using other tools, use the Reflection tool to re asses your situation.
+after every file edit, use the Reflection tool to check your work and sanity check yourself.
 """
     message += problem_statement
 
diff --git a/src/codegen/extensions/tools/__init__.py b/src/codegen/extensions/tools/__init__.py
@@ -16,6 +16,7 @@
 )
 from .list_directory import list_directory
 from .move_symbol import move_symbol
+from .reflection import perform_reflection
 from .rename_file import rename_file
 from .replacement_edit import replacement_edit
 from .reveal_symbol import reveal_symbol
@@ -43,6 +44,8 @@
     "list_directory",
     # Symbol operations
     "move_symbol",
+    # Reflection
+    "perform_reflection",
     "rename_file",
     "replacement_edit",
     "reveal_symbol",
diff --git a/src/codegen/extensions/tools/reflection.py b/src/codegen/extensions/tools/reflection.py
@@ -0,0 +1,217 @@
+"""Tool for agent self-reflection and planning."""
+
+from typing import ClassVar, Optional
+
+from langchain_core.messages import HumanMessage, SystemMessage
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.prompts import ChatPromptTemplate
+from pydantic import Field
+
+from codegen.extensions.langchain.llm import LLM
+from codegen.sdk.core.codebase import Codebase
+
+from .observation import Observation
+
+
+class ReflectionSection(Observation):
+    """A section of the reflection output."""
+
+    title: str = Field(description="Title of the section")
+    content: str = Field(description="Content of the section")
+
+    str_template: ClassVar[str] = "{title}:\n{content}"
+
+
+class ReflectionObservation(Observation):
+    """Response from agent reflection."""
+
+    context_summary: str = Field(description="Summary of the current context")
+    findings: str = Field(description="Key information and insights gathered")
+    challenges: Optional[str] = Field(None, description="Current obstacles or questions")
+    focus: Optional[str] = Field(None, description="Specific aspect focused on")
+    sections: list[ReflectionSection] = Field(description="Structured reflection sections")
+
+    str_template: ClassVar[str] = "Reflection on: {focus}"
+
+    def _get_details(self) -> dict[str, str]:
+        """Get details for string representation."""
+        return {
+            "focus": self.focus or "current understanding and next steps",
+        }
+
+    def render(self) -> str:
+        """Render the reflection as a formatted string."""
+        output = []
+
+        # Add header
+        if self.focus:
+            output.append(f"# Reflection on: {self.focus}")
+        else:
+            output.append("# Agent Reflection")
+
+        # Add each section
+        for section in self.sections:
+            output.append(f"\n## {section.title}")
+            output.append(section.content)
+
+        return "\n".join(output)
+
+
+# System prompt for the reflection LLM
+REFLECTION_SYSTEM_PROMPT = """You are an expert AI assistant specialized in reflection and strategic planning.
+Your task is to help organize thoughts, identify knowledge gaps, and create a strategic plan based on the information provided.
+
+You will be given:
+1. A summary of the current context and problem being solved
+2. Key information and insights gathered so far
+3. Current obstacles or questions that need to be addressed (if any)
+4. A specific aspect to focus the reflection on (if any)
+
+Your response should be structured into the following sections:
+1. Current Understanding - Summarize what you understand about the problem and context
+2. Key Insights - Highlight the most important findings and their implications
+3. Knowledge Gaps (if challenges are provided) - Identify what information is still missing
+4. Action Plan - Recommend specific next steps to move forward
+5. Alternative Approaches - Suggest other ways to tackle the problem
+
+Your reflection should be clear, insightful, and actionable. Focus on helping the agent make progress and double check its own work.
+You will not suggest the agent writes new tests or modifies existing tests.
+"""
+
+
+def parse_reflection_response(response: str) -> list[ReflectionSection]:
+    """Parse the LLM response into structured reflection sections.
+
+    Args:
+        response: Raw LLM response text
+
+    Returns:
+        List of ReflectionSection objects
+    """
+    sections = []
+    current_section = None
+    current_content = []
+
+    # Split the response into lines
+    lines = response.strip().split("\n")
+
+    for line in lines:
+        # Check if this is a section header (starts with ## or #)
+        if line.startswith("## ") or (line.startswith("# ") and not line.startswith("# Reflection")):
+            # If we have a current section, save it before starting a new one
+            if current_section:
+                sections.append(ReflectionSection(title=current_section, content="\n".join(current_content).strip()))
+                current_content = []
+
+            # Extract the new section title
+            current_section = line.lstrip("#").strip()
+        elif current_section:
+            # Add content to the current section
+            current_content.append(line)
+
+    # Add the last section if there is one
+    if current_section and current_content:
+        sections.append(ReflectionSection(title=current_section, content="\n".join(current_content).strip()))
+
+    return sections
+
+
+def perform_reflection(
+    context_summary: str,
+    findings_so_far: str,
+    current_challenges: str = "",
+    reflection_focus: Optional[str] = None,
+    codebase: Optional[Codebase] = None,
+) -> ReflectionObservation:
+    """Perform agent reflection to organize thoughts and plan next steps.
+
+    This function helps the agent consolidate its understanding, identify knowledge gaps,
+    and create a strategic plan for moving forward.
+
+    Args:
+        context_summary: Summary of the current context and problem being solved
+        findings_so_far: Key information and insights gathered so far
+        current_challenges: Current obstacles or questions that need to be addressed
+        reflection_focus: Optional specific aspect to focus reflection on
+        codebase: Optional codebase context for code-specific reflections
+
+    Returns:
+        ReflectionObservation containing structured reflection sections
+    """
+    try:
+        # Create the prompt for the LLM
+        system_message = SystemMessage(content=REFLECTION_SYSTEM_PROMPT)
+
+        # Construct the human message with all the context
+        human_message_content = f"""
+Context Summary:
+{context_summary}
+
+Key Findings:
+{findings_so_far}
+"""
+
+        # Add challenges if provided
+        if current_challenges:
+            human_message_content += f"""
+Current Challenges:
+{current_challenges}
+"""
+
+        # Add reflection focus if provided
+        if reflection_focus:
+            human_message_content += f"""
+Reflection Focus:
+{reflection_focus}
+"""
+
+        # Add codebase context if available and relevant
+        if codebase and (reflection_focus and "code" in reflection_focus.lower()):
+            # In a real implementation, you might add relevant codebase context here
+            # For example, listing key files or symbols related to the reflection focus
+            human_message_content += f"""
+Codebase Context:
+- Working with codebase at: {codebase.root}
+"""
+
+        human_message = HumanMessage(content=human_message_content)
+        prompt = ChatPromptTemplate.from_messages([system_message, human_message])
+
+        # Initialize the LLM
+        llm = LLM(
+            model_provider="anthropic",
+            model_name="claude-3-5-sonnet-latest",
+            temperature=0.2,  # Slightly higher temperature for more creative reflection
+            max_tokens=4000,
+        )
+
+        # Create and execute the chain
+        chain = prompt | llm | StrOutputParser()
+        response = chain.invoke({})
+
+        # Parse the response into sections
+        sections = parse_reflection_response(response)
+
+        # If no sections were parsed, create a default section with the full response
+        if not sections:
+            sections = [ReflectionSection(title="Reflection", content=response)]
+
+        return ReflectionObservation(
+            status="success",
+            context_summary=context_summary,
+            findings=findings_so_far,
+            challenges=current_challenges,
+            focus=reflection_focus,
+            sections=sections,
+        )
+
+    except Exception as e:
+        return ReflectionObservation(
+            status="error",
+            error=f"Failed to perform reflection: {e!s}",
+            context_summary=context_summary,
+            findings=findings_so_far,
+            challenges=current_challenges,
+            focus=reflection_focus,
+            sections=[],
+        )