Skip to content

Commit 99ecc82

Browse files
feat: Add AI-powered exploration workflow with map-reduce pattern (#539)
Implement intelligent task decomposition and parallel exploration system: - Add ai_split operation for AI-powered task decomposition using Claude - Uses CLIExecutor to intelligently analyze goals and generate focused sub-tasks - Includes fallback mechanism for graceful degradation - Configurable task count limits (min_tasks, max_tasks) - Add exploration operation with ExploreAgent integration - Supports 5 exploration types: question, implementation, structure, usage, flow - Stores findings in session files (JSON format) for persistence - Proper error handling with failed status reporting - Add summarize operation for result aggregation - Three summary formats: detailed, concise, structured - Reads findings from session files or direct input - Groups findings by exploration type - Include deterministic split operation as alternative - Four strategies: by_items, by_count, by_chunk_size, custom - Simple algorithmic splitting without AI - Add comprehensive test coverage (35 tests, all passing) - Mock AI calls for fast, deterministic testing (~0.6s execution) - Test all operations: split, ai_split, exploration, summarize - Integration test for full map-reduce workflow - Create workflow examples - ai_exploration_workflow.yaml: AI-driven exploration - exploration_mapreduce.yaml: Manual task specification - Update documentation - Add map-reduce operations section to README - Document all operations with examples - Update architecture diagram 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude <[email protected]>
1 parent 8e3ea0d commit 99ecc82

File tree

11 files changed

+2648
-4
lines changed

11 files changed

+2648
-4
lines changed
Lines changed: 383 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,383 @@
1+
"""Tests for AI-powered split operation."""
2+
3+
import pytest
4+
import json
5+
from unittest.mock import AsyncMock, MagicMock, patch
6+
7+
from plugins.automation.workflows.steps.operations import AISplitOperation
8+
9+
10+
class TestAISplitOperation:
11+
"""Test AI-powered split operation."""
12+
13+
def test_validation_success(self):
14+
"""Test validation with valid config."""
15+
config = {"max_tasks": 5, "min_tasks": 2}
16+
inputs = {"goal": "Understand authentication"}
17+
18+
operation = AISplitOperation(config, inputs)
19+
assert operation.validate() is None
20+
21+
def test_validation_missing_goal(self):
22+
"""Test validation fails without goal."""
23+
config = {}
24+
inputs = {}
25+
26+
operation = AISplitOperation(config, inputs)
27+
error = operation.validate()
28+
assert error is not None
29+
assert "goal" in error.lower()
30+
31+
def test_validation_invalid_task_counts(self):
32+
"""Test validation fails when max_tasks < min_tasks."""
33+
config = {"max_tasks": 2, "min_tasks": 5}
34+
inputs = {"goal": "Test"}
35+
36+
operation = AISplitOperation(config, inputs)
37+
error = operation.validate()
38+
assert error is not None
39+
assert "max_tasks" in error
40+
41+
def test_build_split_prompt_basic(self):
42+
"""Test prompt building with basic inputs."""
43+
config = {"min_tasks": 2, "max_tasks": 5}
44+
inputs = {"goal": "Understand authentication"}
45+
46+
operation = AISplitOperation(config, inputs)
47+
prompt = operation._build_split_prompt(
48+
goal="Understand authentication",
49+
codebase_path=None,
50+
focus_areas=[],
51+
constraints=None,
52+
context="",
53+
min_tasks=2,
54+
max_tasks=5
55+
)
56+
57+
assert "Understand authentication" in prompt
58+
assert "2-5" in prompt
59+
assert "JSON" in prompt
60+
61+
def test_build_split_prompt_comprehensive(self):
62+
"""Test prompt building with all optional fields."""
63+
config = {}
64+
inputs = {"goal": "Test"}
65+
66+
operation = AISplitOperation(config, inputs)
67+
prompt = operation._build_split_prompt(
68+
goal="Understand security",
69+
codebase_path="/path/to/code",
70+
focus_areas=["authentication", "authorization"],
71+
constraints="Focus on backend only",
72+
context="Legacy system",
73+
min_tasks=3,
74+
max_tasks=8
75+
)
76+
77+
assert "Understand security" in prompt
78+
assert "/path/to/code" in prompt
79+
assert "authentication" in prompt
80+
assert "Focus on backend only" in prompt
81+
assert "Legacy system" in prompt
82+
83+
@pytest.mark.asyncio
84+
async def test_execute_with_valid_ai_response(self):
85+
"""Test execution with valid AI JSON response."""
86+
config = {"model": "haiku", "max_tasks": 5, "min_tasks": 2}
87+
inputs = {"goal": "Understand auth", "codebase_path": "/code"}
88+
89+
operation = AISplitOperation(config, inputs)
90+
91+
# Mock AI response
92+
mock_ai_response = {
93+
"reasoning": "Split into login, session, and security",
94+
"tasks": [
95+
{
96+
"title": "Login Flow",
97+
"query": "Trace user login",
98+
"type": "flow",
99+
"priority": "high",
100+
"estimated_complexity": "moderate"
101+
},
102+
{
103+
"title": "Session Management",
104+
"query": "Find session handling",
105+
"type": "implementation",
106+
"priority": "high",
107+
"estimated_complexity": "complex"
108+
}
109+
]
110+
}
111+
112+
# Mock the AI call
113+
with patch.object(operation, '_call_ai_for_split', new_callable=AsyncMock) as mock_call:
114+
mock_call.return_value = mock_ai_response
115+
116+
result = await operation.execute()
117+
118+
assert result["task_count"] == 2
119+
assert len(result["tasks"]) == 2
120+
assert result["reasoning"] == "Split into login, session, and security"
121+
assert result["tasks"][0]["title"] == "Login Flow"
122+
assert result["tasks"][0]["index"] == 0
123+
assert result["metadata"]["goal"] == "Understand auth"
124+
125+
@pytest.mark.asyncio
126+
async def test_execute_with_too_many_tasks(self):
127+
"""Test execution truncates when AI generates too many tasks."""
128+
config = {"max_tasks": 3, "min_tasks": 2}
129+
inputs = {"goal": "Test"}
130+
131+
operation = AISplitOperation(config, inputs)
132+
133+
# Mock AI response with too many tasks
134+
mock_ai_response = {
135+
"reasoning": "Detailed split",
136+
"tasks": [
137+
{"title": f"Task {i}", "query": f"Q{i}", "type": "question", "priority": "medium"}
138+
for i in range(10) # Generate 10 tasks
139+
]
140+
}
141+
142+
with patch.object(operation, '_call_ai_for_split', new_callable=AsyncMock) as mock_call:
143+
mock_call.return_value = mock_ai_response
144+
145+
result = await operation.execute()
146+
147+
# Should be truncated to max_tasks
148+
assert result["task_count"] == 3
149+
assert len(result["tasks"]) == 3
150+
151+
@pytest.mark.asyncio
152+
async def test_call_ai_for_split_with_clean_json(self):
153+
"""Test AI call with clean JSON response."""
154+
config = {}
155+
inputs = {"goal": "Test"}
156+
operation = AISplitOperation(config, inputs)
157+
158+
mock_response = json.dumps({
159+
"reasoning": "Test reasoning",
160+
"tasks": [
161+
{"title": "Task 1", "query": "Q1", "type": "question", "priority": "high"}
162+
]
163+
})
164+
165+
with patch('utils.agent.cli_executor.CLIExecutor') as MockExecutor:
166+
mock_executor = MockExecutor.return_value
167+
mock_executor.execute = AsyncMock(return_value=mock_response)
168+
169+
result = await operation._call_ai_for_split("test prompt", "haiku")
170+
171+
assert result["reasoning"] == "Test reasoning"
172+
assert len(result["tasks"]) == 1
173+
174+
@pytest.mark.asyncio
175+
async def test_call_ai_for_split_with_json_in_text(self):
176+
"""Test AI call when JSON is embedded in text."""
177+
config = {}
178+
inputs = {"goal": "Test"}
179+
operation = AISplitOperation(config, inputs)
180+
181+
# AI response with explanation before/after JSON
182+
mock_response = """Here's my analysis:
183+
184+
{
185+
"reasoning": "Embedded JSON",
186+
"tasks": [
187+
{"title": "Task", "query": "Q", "type": "question", "priority": "high"}
188+
]
189+
}
190+
191+
Hope this helps!"""
192+
193+
with patch('utils.agent.cli_executor.CLIExecutor') as MockExecutor:
194+
mock_executor = MockExecutor.return_value
195+
mock_executor.execute = AsyncMock(return_value=mock_response)
196+
197+
result = await operation._call_ai_for_split("test prompt", "haiku")
198+
199+
assert result["reasoning"] == "Embedded JSON"
200+
assert len(result["tasks"]) == 1
201+
202+
@pytest.mark.asyncio
203+
async def test_call_ai_for_split_fallback_on_error(self):
204+
"""Test fallback split when AI call fails."""
205+
config = {}
206+
inputs = {"goal": "Test authentication"}
207+
operation = AISplitOperation(config, inputs)
208+
209+
with patch('utils.agent.cli_executor.CLIExecutor') as MockExecutor:
210+
mock_executor = MockExecutor.return_value
211+
mock_executor.execute = AsyncMock(side_effect=Exception("API error"))
212+
213+
result = await operation._call_ai_for_split("test prompt", "haiku")
214+
215+
# Should return fallback split
216+
assert "Fallback split" in result["reasoning"]
217+
assert len(result["tasks"]) == 3 # Default fallback has 3 tasks
218+
assert "Test authentication" in result["tasks"][0]["query"]
219+
220+
@pytest.mark.asyncio
221+
async def test_call_ai_for_split_fallback_on_invalid_json(self):
222+
"""Test fallback when AI returns invalid JSON."""
223+
config = {}
224+
inputs = {"goal": "Test"}
225+
operation = AISplitOperation(config, inputs)
226+
227+
# AI returns text without JSON
228+
mock_response = "I cannot parse this request as JSON"
229+
230+
with patch('utils.agent.cli_executor.CLIExecutor') as MockExecutor:
231+
mock_executor = MockExecutor.return_value
232+
mock_executor.execute = AsyncMock(return_value=mock_response)
233+
234+
result = await operation._call_ai_for_split("test prompt", "haiku")
235+
236+
# Should use fallback
237+
assert "Fallback split" in result["reasoning"]
238+
assert len(result["tasks"]) >= 3
239+
240+
def test_create_fallback_split(self):
241+
"""Test fallback split generation."""
242+
config = {}
243+
inputs = {"goal": "Understand database operations"}
244+
operation = AISplitOperation(config, inputs)
245+
246+
result = operation._create_fallback_split("Some error")
247+
248+
assert "Fallback split" in result["reasoning"]
249+
assert len(result["tasks"]) == 3
250+
assert "database operations" in result["tasks"][0]["query"]
251+
assert all("title" in task for task in result["tasks"])
252+
assert all("query" in task for task in result["tasks"])
253+
assert all("type" in task for task in result["tasks"])
254+
255+
def test_parse_ai_response_valid(self):
256+
"""Test parsing valid AI response."""
257+
config = {}
258+
inputs = {"goal": "Test"}
259+
operation = AISplitOperation(config, inputs)
260+
261+
ai_response = {
262+
"reasoning": "Test",
263+
"tasks": [
264+
{"title": "T1", "query": "Q1", "type": "question", "priority": "high"},
265+
{"title": "T2", "query": "Q2", "type": "implementation", "priority": "medium"}
266+
]
267+
}
268+
269+
tasks = operation._parse_ai_response(ai_response, min_tasks=2, max_tasks=5)
270+
271+
assert len(tasks) == 2
272+
assert tasks[0]["index"] == 0
273+
assert tasks[1]["index"] == 1
274+
275+
@pytest.mark.asyncio
276+
async def test_execute_with_focus_areas(self):
277+
"""Test execution with focus areas."""
278+
config = {"max_tasks": 5}
279+
inputs = {
280+
"goal": "Understand system",
281+
"focus_areas": ["performance", "security"],
282+
"constraints": "Backend only"
283+
}
284+
285+
operation = AISplitOperation(config, inputs)
286+
287+
mock_response = {
288+
"reasoning": "Focused on performance and security",
289+
"tasks": [
290+
{"title": "Security", "query": "Security aspects", "type": "question", "priority": "high"}
291+
]
292+
}
293+
294+
with patch.object(operation, '_call_ai_for_split', new_callable=AsyncMock) as mock_call:
295+
mock_call.return_value = mock_response
296+
297+
result = await operation.execute()
298+
299+
# Verify focus areas were included in the call
300+
call_args = mock_call.call_args[0][0] # Get prompt
301+
assert "performance" in call_args
302+
assert "security" in call_args
303+
assert "Backend only" in call_args
304+
305+
306+
class TestAISplitIntegration:
307+
"""Integration tests for AI split with CLIExecutor."""
308+
309+
@pytest.mark.asyncio
310+
@pytest.mark.integration
311+
async def test_full_ai_split_execution(self):
312+
"""
313+
Integration test with real CLIExecutor (mocked).
314+
315+
This tests the full flow including CLIExecutor integration.
316+
"""
317+
config = {"model": "haiku", "max_tasks": 5, "min_tasks": 3}
318+
inputs = {
319+
"goal": "Understand authentication flow in the application",
320+
"codebase_path": "/path/to/code",
321+
"focus_areas": ["security", "user management"]
322+
}
323+
324+
operation = AISplitOperation(config, inputs)
325+
326+
# Mock CLIExecutor at a higher level
327+
mock_cli_response = json.dumps({
328+
"reasoning": "Authentication involves login, session, and security. Splitting into focused areas.",
329+
"tasks": [
330+
{
331+
"title": "User Login Flow",
332+
"query": "Trace the user login process from form submission to session creation",
333+
"type": "flow",
334+
"priority": "high",
335+
"estimated_complexity": "complex"
336+
},
337+
{
338+
"title": "Session Management",
339+
"query": "Investigate how user sessions are stored and validated",
340+
"type": "implementation",
341+
"priority": "high",
342+
"estimated_complexity": "moderate"
343+
},
344+
{
345+
"title": "Security Mechanisms",
346+
"query": "Find password hashing, encryption, and security measures",
347+
"type": "structure",
348+
"priority": "high",
349+
"estimated_complexity": "moderate"
350+
},
351+
{
352+
"title": "User Management",
353+
"query": "Explore user creation, updates, and permission management",
354+
"type": "implementation",
355+
"priority": "medium",
356+
"estimated_complexity": "simple"
357+
}
358+
]
359+
})
360+
361+
with patch('utils.agent.cli_executor.CLIExecutor') as MockExecutor:
362+
mock_executor = MockExecutor.return_value
363+
mock_executor.execute = AsyncMock(return_value=mock_cli_response)
364+
365+
result = await operation.execute()
366+
367+
# Verify execution
368+
assert mock_executor.execute.called
369+
prompt = mock_executor.execute.call_args[0][0]
370+
assert "Understand authentication flow" in prompt
371+
assert "security" in prompt
372+
373+
# Verify results
374+
assert result["task_count"] == 4
375+
assert len(result["tasks"]) == 4
376+
assert "Authentication involves" in result["reasoning"]
377+
378+
# Verify task structure
379+
first_task = result["tasks"][0]
380+
assert first_task["title"] == "User Login Flow"
381+
assert first_task["type"] == "flow"
382+
assert first_task["priority"] == "high"
383+
assert "index" in first_task

0 commit comments

Comments
 (0)