Skip to content

Commit 40f5a8f

Browse files
authored
Reflection step for agent + claude 3.7 + extender recursion (#689)
# Motivation <!-- Why is this change necessary? --> # Content <!-- Please include a summary of the change --> # Testing <!-- How was the change tested? --> # Please check the following before marking your PR as ready for review - [x] I have added tests for my changes - [x] I have updated the documentation or added new documentation as needed
1 parent 8ad361c commit 40f5a8f

File tree

6 files changed

+269
-3
lines changed

6 files changed

+269
-3
lines changed

src/codegen/agents/code_agent.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
class CodeAgent:
1414
"""Agent for interacting with a codebase."""
1515

16-
def __init__(self, codebase: "Codebase", model_provider: str = "anthropic", model_name: str = "claude-3-5-sonnet-latest", memory: bool = True, tools: Optional[list[BaseTool]] = None, **kwargs):
16+
def __init__(self, codebase: "Codebase", model_provider: str = "anthropic", model_name: str = "claude-3-7-sonnet-latest", memory: bool = True, tools: Optional[list[BaseTool]] = None, **kwargs):
1717
"""Initialize a CodeAgent.
1818
1919
Args:
@@ -49,7 +49,7 @@ def run(self, prompt: str, thread_id: Optional[str] = None) -> str:
4949
input = {"messages": [("user", prompt)]}
5050

5151
# we stream the steps instead of invoke because it allows us to access intermediate nodes
52-
stream = self.agent.stream(input, config={"configurable": {"thread_id": thread_id}}, stream_mode="values")
52+
stream = self.agent.stream(input, config={"configurable": {"thread_id": thread_id}, "recursion_limit": 100}, stream_mode="values")
5353

5454
for s in stream:
5555
message = s["messages"][-1]

src/codegen/extensions/langchain/agent.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
DeleteFileTool,
1616
ListDirectoryTool,
1717
MoveSymbolTool,
18+
ReflectionTool,
1819
RelaceEditTool,
1920
RenameFileTool,
2021
ReplacementEditTool,
@@ -31,7 +32,7 @@
3132
def create_codebase_agent(
3233
codebase: "Codebase",
3334
model_provider: str = "anthropic",
34-
model_name: str = "claude-3-5-sonnet-latest",
35+
model_name: str = "claude-3-7-sonnet-latest",
3536
system_message: SystemMessage = SystemMessage(REASONER_SYSTEM_MESSAGE),
3637
memory: bool = True,
3738
debug: bool = False,
@@ -71,6 +72,7 @@ def create_codebase_agent(
7172
# SemanticEditTool(codebase),
7273
ReplacementEditTool(codebase),
7374
RelaceEditTool(codebase),
75+
ReflectionTool(codebase),
7476
# SemanticSearchTool(codebase),
7577
# =====[ Github Integration ]=====
7678
# Enable Github integration

src/codegen/extensions/langchain/tools.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
linear_search_issues_tool,
1717
)
1818
from codegen.extensions.tools.link_annotation import add_links_to_message
19+
from codegen.extensions.tools.reflection import perform_reflection
1920
from codegen.extensions.tools.relace_edit import relace_edit
2021
from codegen.extensions.tools.replacement_edit import replacement_edit
2122
from codegen.extensions.tools.reveal_symbol import reveal_symbol
@@ -742,6 +743,7 @@ def get_workspace_tools(codebase: Codebase) -> list["BaseTool"]:
742743
SemanticSearchTool(codebase),
743744
ViewFileTool(codebase),
744745
RelaceEditTool(codebase),
746+
ReflectionTool(codebase),
745747
# Github
746748
GithubCreatePRTool(codebase),
747749
GithubCreatePRCommentTool(codebase),
@@ -842,3 +844,39 @@ def __init__(self, codebase: Codebase) -> None:
842844
def _run(self, filepath: str, edit_snippet: str) -> str:
843845
result = relace_edit(self.codebase, filepath, edit_snippet)
844846
return result.render()
847+
848+
849+
class ReflectionInput(BaseModel):
850+
"""Input for agent reflection."""
851+
852+
context_summary: str = Field(..., description="Summary of the current context and problem being solved")
853+
findings_so_far: str = Field(..., description="Key information and insights gathered so far")
854+
current_challenges: str = Field(default="", description="Current obstacles or questions that need to be addressed")
855+
reflection_focus: Optional[str] = Field(default=None, description="Optional specific aspect to focus reflection on (e.g., 'architecture', 'performance', 'next steps')")
856+
857+
858+
class ReflectionTool(BaseTool):
859+
"""Tool for agent self-reflection and planning."""
860+
861+
name: ClassVar[str] = "reflect"
862+
description: ClassVar[str] = """
863+
Reflect on current understanding and plan next steps.
864+
This tool helps organize thoughts, identify knowledge gaps, and create a strategic plan.
865+
Use this when you need to consolidate information or when facing complex decisions.
866+
"""
867+
args_schema: ClassVar[type[BaseModel]] = ReflectionInput
868+
codebase: Codebase = Field(exclude=True)
869+
870+
def __init__(self, codebase: Codebase) -> None:
871+
super().__init__(codebase=codebase)
872+
873+
def _run(
874+
self,
875+
context_summary: str,
876+
findings_so_far: str,
877+
current_challenges: str = "",
878+
reflection_focus: Optional[str] = None,
879+
) -> str:
880+
result = perform_reflection(context_summary=context_summary, findings_so_far=findings_so_far, current_challenges=current_challenges, reflection_focus=reflection_focus, codebase=self.codebase)
881+
882+
return result.render()

src/codegen/extensions/swebench/harness.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,13 @@ def run_agent_on_entry(entry: SweBenchExample, codebase: Codebase | None = None)
7979
Filenames, directory names, file contents, etc may be different than what you're used to.
8080
8181
Propose changes to update the repo to fix the problem below.
82+
*** IMPORTANT: *** DO NOT MODIFY ANY TESTS!
83+
*** IMPORTANT: *** DO NOT ADD ANY TESTS!
8284
85+
Before commiting to do any modifications, double check your work with the Reflection tool.
86+
you can also use that tool to check your work after you think you are done.
87+
if you ever get stuck using other tools, use the Reflection tool to re asses your situation.
88+
after every file edit, use the Reflection tool to check your work and sanity check yourself.
8389
"""
8490
message += problem_statement
8591

src/codegen/extensions/tools/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
)
1717
from .list_directory import list_directory
1818
from .move_symbol import move_symbol
19+
from .reflection import perform_reflection
1920
from .rename_file import rename_file
2021
from .replacement_edit import replacement_edit
2122
from .reveal_symbol import reveal_symbol
@@ -43,6 +44,8 @@
4344
"list_directory",
4445
# Symbol operations
4546
"move_symbol",
47+
# Reflection
48+
"perform_reflection",
4649
"rename_file",
4750
"replacement_edit",
4851
"reveal_symbol",
Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
"""Tool for agent self-reflection and planning."""
2+
3+
from typing import ClassVar, Optional
4+
5+
from langchain_core.messages import HumanMessage, SystemMessage
6+
from langchain_core.output_parsers import StrOutputParser
7+
from langchain_core.prompts import ChatPromptTemplate
8+
from pydantic import Field
9+
10+
from codegen.extensions.langchain.llm import LLM
11+
from codegen.sdk.core.codebase import Codebase
12+
13+
from .observation import Observation
14+
15+
16+
class ReflectionSection(Observation):
17+
"""A section of the reflection output."""
18+
19+
title: str = Field(description="Title of the section")
20+
content: str = Field(description="Content of the section")
21+
22+
str_template: ClassVar[str] = "{title}:\n{content}"
23+
24+
25+
class ReflectionObservation(Observation):
26+
"""Response from agent reflection."""
27+
28+
context_summary: str = Field(description="Summary of the current context")
29+
findings: str = Field(description="Key information and insights gathered")
30+
challenges: Optional[str] = Field(None, description="Current obstacles or questions")
31+
focus: Optional[str] = Field(None, description="Specific aspect focused on")
32+
sections: list[ReflectionSection] = Field(description="Structured reflection sections")
33+
34+
str_template: ClassVar[str] = "Reflection on: {focus}"
35+
36+
def _get_details(self) -> dict[str, str]:
37+
"""Get details for string representation."""
38+
return {
39+
"focus": self.focus or "current understanding and next steps",
40+
}
41+
42+
def render(self) -> str:
43+
"""Render the reflection as a formatted string."""
44+
output = []
45+
46+
# Add header
47+
if self.focus:
48+
output.append(f"# Reflection on: {self.focus}")
49+
else:
50+
output.append("# Agent Reflection")
51+
52+
# Add each section
53+
for section in self.sections:
54+
output.append(f"\n## {section.title}")
55+
output.append(section.content)
56+
57+
return "\n".join(output)
58+
59+
60+
# System prompt for the reflection LLM
61+
REFLECTION_SYSTEM_PROMPT = """You are an expert AI assistant specialized in reflection and strategic planning.
62+
Your task is to help organize thoughts, identify knowledge gaps, and create a strategic plan based on the information provided.
63+
64+
You will be given:
65+
1. A summary of the current context and problem being solved
66+
2. Key information and insights gathered so far
67+
3. Current obstacles or questions that need to be addressed (if any)
68+
4. A specific aspect to focus the reflection on (if any)
69+
70+
Your response should be structured into the following sections:
71+
1. Current Understanding - Summarize what you understand about the problem and context
72+
2. Key Insights - Highlight the most important findings and their implications
73+
3. Knowledge Gaps (if challenges are provided) - Identify what information is still missing
74+
4. Action Plan - Recommend specific next steps to move forward
75+
5. Alternative Approaches - Suggest other ways to tackle the problem
76+
77+
Your reflection should be clear, insightful, and actionable. Focus on helping the agent make progress and double check its own work.
78+
You will not suggest the agent writes new tests or modifies existing tests.
79+
"""
80+
81+
82+
def parse_reflection_response(response: str) -> list[ReflectionSection]:
83+
"""Parse the LLM response into structured reflection sections.
84+
85+
Args:
86+
response: Raw LLM response text
87+
88+
Returns:
89+
List of ReflectionSection objects
90+
"""
91+
sections = []
92+
current_section = None
93+
current_content = []
94+
95+
# Split the response into lines
96+
lines = response.strip().split("\n")
97+
98+
for line in lines:
99+
# Check if this is a section header (starts with ## or #)
100+
if line.startswith("## ") or (line.startswith("# ") and not line.startswith("# Reflection")):
101+
# If we have a current section, save it before starting a new one
102+
if current_section:
103+
sections.append(ReflectionSection(title=current_section, content="\n".join(current_content).strip()))
104+
current_content = []
105+
106+
# Extract the new section title
107+
current_section = line.lstrip("#").strip()
108+
elif current_section:
109+
# Add content to the current section
110+
current_content.append(line)
111+
112+
# Add the last section if there is one
113+
if current_section and current_content:
114+
sections.append(ReflectionSection(title=current_section, content="\n".join(current_content).strip()))
115+
116+
return sections
117+
118+
119+
def perform_reflection(
120+
context_summary: str,
121+
findings_so_far: str,
122+
current_challenges: str = "",
123+
reflection_focus: Optional[str] = None,
124+
codebase: Optional[Codebase] = None,
125+
) -> ReflectionObservation:
126+
"""Perform agent reflection to organize thoughts and plan next steps.
127+
128+
This function helps the agent consolidate its understanding, identify knowledge gaps,
129+
and create a strategic plan for moving forward.
130+
131+
Args:
132+
context_summary: Summary of the current context and problem being solved
133+
findings_so_far: Key information and insights gathered so far
134+
current_challenges: Current obstacles or questions that need to be addressed
135+
reflection_focus: Optional specific aspect to focus reflection on
136+
codebase: Optional codebase context for code-specific reflections
137+
138+
Returns:
139+
ReflectionObservation containing structured reflection sections
140+
"""
141+
try:
142+
# Create the prompt for the LLM
143+
system_message = SystemMessage(content=REFLECTION_SYSTEM_PROMPT)
144+
145+
# Construct the human message with all the context
146+
human_message_content = f"""
147+
Context Summary:
148+
{context_summary}
149+
150+
Key Findings:
151+
{findings_so_far}
152+
"""
153+
154+
# Add challenges if provided
155+
if current_challenges:
156+
human_message_content += f"""
157+
Current Challenges:
158+
{current_challenges}
159+
"""
160+
161+
# Add reflection focus if provided
162+
if reflection_focus:
163+
human_message_content += f"""
164+
Reflection Focus:
165+
{reflection_focus}
166+
"""
167+
168+
# Add codebase context if available and relevant
169+
if codebase and (reflection_focus and "code" in reflection_focus.lower()):
170+
# In a real implementation, you might add relevant codebase context here
171+
# For example, listing key files or symbols related to the reflection focus
172+
human_message_content += f"""
173+
Codebase Context:
174+
- Working with codebase at: {codebase.root}
175+
"""
176+
177+
human_message = HumanMessage(content=human_message_content)
178+
prompt = ChatPromptTemplate.from_messages([system_message, human_message])
179+
180+
# Initialize the LLM
181+
llm = LLM(
182+
model_provider="anthropic",
183+
model_name="claude-3-5-sonnet-latest",
184+
temperature=0.2, # Slightly higher temperature for more creative reflection
185+
max_tokens=4000,
186+
)
187+
188+
# Create and execute the chain
189+
chain = prompt | llm | StrOutputParser()
190+
response = chain.invoke({})
191+
192+
# Parse the response into sections
193+
sections = parse_reflection_response(response)
194+
195+
# If no sections were parsed, create a default section with the full response
196+
if not sections:
197+
sections = [ReflectionSection(title="Reflection", content=response)]
198+
199+
return ReflectionObservation(
200+
status="success",
201+
context_summary=context_summary,
202+
findings=findings_so_far,
203+
challenges=current_challenges,
204+
focus=reflection_focus,
205+
sections=sections,
206+
)
207+
208+
except Exception as e:
209+
return ReflectionObservation(
210+
status="error",
211+
error=f"Failed to perform reflection: {e!s}",
212+
context_summary=context_summary,
213+
findings=findings_so_far,
214+
challenges=current_challenges,
215+
focus=reflection_focus,
216+
sections=[],
217+
)

0 commit comments

Comments
 (0)