Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
6412a5d
feat(gepa): add tool description optimization for multi-agent systems
Ju-usc Oct 10, 2025
cf0be4f
style: fix ruff formatting (trailing whitespace)
Ju-usc Oct 10, 2025
aa53fe2
style: apply ruff formatting fixes
Ju-usc Oct 10, 2025
045c6cf
feat(gepa): implement tool-specific proposer for tool descriptions
Ju-usc Oct 10, 2025
c4f2041
docs(gepa): clean up multi-agent example code
Ju-usc Oct 10, 2025
260ca80
refactor(gepa): simplify tool reflective dataset with ReAct context r…
Ju-usc Oct 11, 2025
04f7e3d
fix(gepa): unify custom proposer routing for tools
Ju-usc Oct 12, 2025
f92e184
docs(gepa): clarify tool reflection prompt
Ju-usc Oct 12, 2025
7178869
test: streamline GEPA tool optimization tests
Ju-usc Oct 12, 2025
e34703b
fix(gepa): streamline tool proposer formatting
Ju-usc Oct 12, 2025
3f05311
test(gepa): drop legacy dummy tool fixture
Ju-usc Oct 12, 2025
4df9ce5
docs(gepa): add tool-specific reflection prompt and metric example
Ju-usc Oct 12, 2025
4296ccf
docs(gepa): fix implementation details with accurate code flow
Ju-usc Oct 13, 2025
ea1204a
docs(gepa): remove backward compatibility note
Ju-usc Oct 13, 2025
48d5cd6
docs(gepa): improve usage examples with optimization visualization
Ju-usc Oct 13, 2025
548d9b6
docs(gepa): add design rationale comments for tool context sharing
Ju-usc Oct 13, 2025
e61d0a1
docs(gepa): add tool optimization links to overview and parameter docs
Ju-usc Oct 13, 2025
5c95412
docs(gepa): refine tool optimization scenarios and remove implementat…
Ju-usc Oct 13, 2025
19d7717
docs(gepa): clarify future work section in code comments
Ju-usc Oct 13, 2025
9ce5fe4
refactor(gepa): unify ReAct optimization as single module
Ju-usc Oct 24, 2025
91331d0
test(gepa): add end-to-end ReAct module optimization test
Ju-usc Oct 24, 2025
3418b59
fix(gepa): enable arg description optimization for ReAct tools
Ju-usc Oct 24, 2025
b26d39a
chore: remove legacy test_gepa_tool_optimization.py
Ju-usc Oct 24, 2025
2791b5c
fix: restore accidentally removed score mismatch warning
Ju-usc Oct 24, 2025
8e63c62
test: update fixture after arg description optimization fix
Ju-usc Oct 25, 2025
7a9d2f3
fix(test): use JSON-based hashing for cross-version fixture stability
Ju-usc Oct 25, 2025
cd0de57
refactor(gepa): rename optimize_tool_descriptions to optimize_react_c…
Ju-usc Oct 26, 2025
67bb739
docs(gepa): improve 'What is optimize_react_components?' section
Ju-usc Oct 26, 2025
b3026a7
docs(gepa): replace outdated tool-specific prompt with actual ReAct o…
Ju-usc Oct 26, 2025
4e107aa
docs(gepa): simplify 'How It Works' section with accurate routing beh…
Ju-usc Oct 26, 2025
78547e7
docs(gepa): remove outdated Implementation Details section
Ju-usc Oct 26, 2025
7fa829b
docs(gepa): replace theoretical scenarios with real user pain points
Ju-usc Oct 26, 2025
da0e7bc
docs(gepa): fix usage examples reference to match updated scenarios
Ju-usc Oct 26, 2025
e51158d
docs(gepa): update inspect section to show all 4 ReAct components wit…
Ju-usc Oct 26, 2025
776ab9b
docs(gepa): rewrite Section 8 with accurate custom proposer behavior …
Ju-usc Oct 26, 2025
ec6bb7b
fix(gepa): fix top-level ReAct module lookup and remove tool name san…
Ju-usc Oct 27, 2025
b6cc67b
refactor(gepa): unify ReAct module key handling and use constant
Ju-usc Oct 28, 2025
1206f38
test(gepa): add ReAct module detection tests for nested structures
Ju-usc Oct 28, 2025
333cbbf
test(gepa): add comprehensive ReAct detection and reconstruction tests
Ju-usc Oct 28, 2025
a50552a
test(gepa): add reflective dataset tests for multi-agent trajectory v…
Ju-usc Oct 28, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
406 changes: 406 additions & 0 deletions docs/docs/api/optimizers/GEPA/GEPA_Advanced.md

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions docs/docs/api/optimizers/GEPA/overview.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,12 @@ Practical Recipe for GEPA-Friendly Feedback:
- **Multi-Objective Tasks** (e.g., PUPA): Decompose aggregate scores to reveal contributions from each objective, highlighting tradeoffs (e.g., quality vs. privacy).
- **Stacked Pipelines** (e.g., code generation: parse → compile → run → profile → evaluate): Expose stage-specific failures; natural-language traces often suffice for LLM self-correction.

## ReAct Component Optimization

GEPA can optimize ReAct modules holistically. When `optimize_react_components=True`, GEPA jointly optimizes all four components of ReAct modules: react instructions, extract instructions, tool descriptions, and tool argument descriptions. This helps agents make better decisions by learning from execution traces how all components work together.

For details on how ReAct optimization works, when to use it, and usage examples, see [ReAct Component Optimization](GEPA_Advanced.md#react-component-optimization) in the Advanced Features guide.

## Custom Instruction Proposal

For advanced customization of GEPA's instruction proposal mechanism, including custom instruction proposers and component selectors, see [Advanced Features](GEPA_Advanced.md).
Expand Down
59 changes: 57 additions & 2 deletions dspy/teleprompt/gepa/gepa.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import inspect
import json
import logging
import random
from dataclasses import dataclass
Expand All @@ -9,8 +10,9 @@
from gepa.proposer.reflective_mutation.base import ReflectionComponentSelector

from dspy.clients.lm import LM
from dspy.predict.react import ReAct
from dspy.primitives import Example, Module, Prediction
from dspy.teleprompt.gepa.gepa_utils import DspyAdapter, DSPyTrace, PredictorFeedbackFn, ScoreWithFeedback
from dspy.teleprompt.gepa.gepa_utils import DspyAdapter, DSPyTrace, PredictorFeedbackFn, REACT_MODULE_PREFIX, ScoreWithFeedback
from dspy.teleprompt.teleprompt import Teleprompter
from dspy.utils.annotation import experimental

Expand Down Expand Up @@ -273,6 +275,11 @@ def metric(
warn_on_score_mismatch: GEPA (currently) expects the metric to return the same module-level score when
called with and without the pred_name. This flag (defaults to True) determines whether a warning is
raised if a mismatch in module-level and predictor-level score is detected.
optimize_react_components: Whether to optimize ReAct module components including react
instructions, extract instructions, tool descriptions, and tool argument descriptions.
When enabled, GEPA jointly optimizes all four components of ReAct modules. See the
[ReAct Component Optimization guide](https://dspy.ai/api/optimizers/GEPA/GEPA_Advanced/#react-component-optimization)
for details on when to use this feature and how it works. Default is False.
seed: The random seed to use for reproducibility. Default is 0.
gepa_kwargs: (Optional) provide additional kwargs to be passed to [gepa.optimize](https://github.com/gepa-ai/gepa/blob/main/src/gepa/api.py) method

Expand Down Expand Up @@ -328,6 +335,7 @@ def __init__(
wandb_init_kwargs: dict[str, Any] | None = None,
track_best_outputs: bool = False,
warn_on_score_mismatch: bool = True,
optimize_react_components: bool = False,
use_mlflow: bool = False,
# Reproducibility
seed: int | None = 0,
Expand Down Expand Up @@ -390,6 +398,7 @@ def __init__(
self.wandb_api_key = wandb_api_key
self.wandb_init_kwargs = wandb_init_kwargs
self.warn_on_score_mismatch = warn_on_score_mismatch
self.optimize_react_components = optimize_react_components
self.use_mlflow = use_mlflow

if track_best_outputs:
Expand Down Expand Up @@ -518,11 +527,57 @@ def feedback_fn(
rng=rng,
reflection_lm=self.reflection_lm,
custom_instruction_proposer=self.custom_instruction_proposer,
warn_on_score_mismatch=self.warn_on_score_mismatch
warn_on_score_mismatch=self.warn_on_score_mismatch,
optimize_react_components=self.optimize_react_components,
)

# Instantiate GEPA with the simpler adapter-based API
base_program = {name: pred.signature.instructions for name, pred in student.named_predictors()}

if self.optimize_react_components:
for module_path, module in student.named_sub_modules():
# Only process ReAct modules
if not isinstance(module, ReAct):
continue
normalized_path = module_path.removeprefix("self.") if module_path != "self" else ""

# Get first predictor name as module identifier
for pred_name, _ in module.named_predictors():
comp_name = pred_name if not normalized_path else f"{normalized_path}.{pred_name}"
# Use full normalized path to avoid collapsing nested modules
# e.g., "multi_agent.coordinator" not "multi_agent"
module_key = f"{REACT_MODULE_PREFIX}:{normalized_path}" if normalized_path else REACT_MODULE_PREFIX

# Build JSON config with tool args for reflection
config = {
"react": module.react.signature.instructions,
"extract": module.extract.predict.signature.instructions,
"tools": {
tool_name: {
"desc": tool.desc,
"args": tool.args,
"arg_desc": tool.arg_desc or {}
}
for tool_name, tool in module.tools.items()
if tool_name != "finish"
}
}

# Replace predictor keys with module key and extract key to prevent duplicates
base_program.pop(comp_name, None)
extract_key = f"{normalized_path}.extract.predict" if normalized_path else "extract.predict"
base_program.pop(extract_key, None)
base_program[module_key] = json.dumps(config, indent=2)
break

# Log base_program keys for debugging
logger.info(f"Initialized base_program with {len(base_program)} components:")
for key in sorted(base_program.keys()):
if key.startswith(REACT_MODULE_PREFIX):
logger.info(f" {key}: <ReAct module JSON config>")
else:
logger.info(f" {key}: <instruction>")

gepa_result: GEPAResult = optimize(
seed_candidate=base_program,
trainset=trainset,
Expand Down
Loading