stanfordnlp · Ju-usc · Oct 10, 2025 · Oct 10, 2025 · Oct 10, 2025 · Oct 10, 2025
diff --git a/docs/docs/api/optimizers/GEPA/GEPA_Advanced.md b/docs/docs/api/optimizers/GEPA/GEPA_Advanced.md
diff --git a/docs/docs/api/optimizers/GEPA/overview.md b/docs/docs/api/optimizers/GEPA/overview.md
@@ -117,6 +117,12 @@ Practical Recipe for GEPA-Friendly Feedback:
 - **Multi-Objective Tasks** (e.g., PUPA): Decompose aggregate scores to reveal contributions from each objective, highlighting tradeoffs (e.g., quality vs. privacy).
 - **Stacked Pipelines** (e.g., code generation: parse → compile → run → profile → evaluate): Expose stage-specific failures; natural-language traces often suffice for LLM self-correction.
 
+## ReAct Component Optimization
+
+GEPA can optimize ReAct modules holistically. When `optimize_react_components=True`, GEPA jointly optimizes all four components of ReAct modules: react instructions, extract instructions, tool descriptions, and tool argument descriptions. This helps agents make better decisions by learning from execution traces how all components work together.
+
+For details on how ReAct optimization works, when to use it, and usage examples, see [ReAct Component Optimization](GEPA_Advanced.md#react-component-optimization) in the Advanced Features guide.
+
 ## Custom Instruction Proposal
 
 For advanced customization of GEPA's instruction proposal mechanism, including custom instruction proposers and component selectors, see [Advanced Features](GEPA_Advanced.md).

diff --git a/dspy/teleprompt/gepa/gepa.py b/dspy/teleprompt/gepa/gepa.py
@@ -1,4 +1,5 @@
 import inspect
+import json
 import logging
 import random
 from dataclasses import dataclass
@@ -9,8 +10,9 @@
 from gepa.proposer.reflective_mutation.base import ReflectionComponentSelector
 
 from dspy.clients.lm import LM
+from dspy.predict.react import ReAct
 from dspy.primitives import Example, Module, Prediction
-from dspy.teleprompt.gepa.gepa_utils import DspyAdapter, DSPyTrace, PredictorFeedbackFn, ScoreWithFeedback
+from dspy.teleprompt.gepa.gepa_utils import DspyAdapter, DSPyTrace, PredictorFeedbackFn, REACT_MODULE_PREFIX, ScoreWithFeedback
 from dspy.teleprompt.teleprompt import Teleprompter
 from dspy.utils.annotation import experimental
 
@@ -273,6 +275,11 @@ def metric(
         warn_on_score_mismatch: GEPA (currently) expects the metric to return the same module-level score when 
             called with and without the pred_name. This flag (defaults to True) determines whether a warning is 
             raised if a mismatch in module-level and predictor-level score is detected.
+        optimize_react_components: Whether to optimize ReAct module components including react 
+            instructions, extract instructions, tool descriptions, and tool argument descriptions. 
+            When enabled, GEPA jointly optimizes all four components of ReAct modules. See the 
+            [ReAct Component Optimization guide](https://dspy.ai/api/optimizers/GEPA/GEPA_Advanced/#react-component-optimization) 
+            for details on when to use this feature and how it works. Default is False.
         seed: The random seed to use for reproducibility. Default is 0.
         gepa_kwargs: (Optional) provide additional kwargs to be passed to [gepa.optimize](https://github.com/gepa-ai/gepa/blob/main/src/gepa/api.py) method
 
@@ -328,6 +335,7 @@ def __init__(
         wandb_init_kwargs: dict[str, Any] | None = None,
         track_best_outputs: bool = False,
         warn_on_score_mismatch: bool = True,
+        optimize_react_components: bool = False,
         use_mlflow: bool = False,
         # Reproducibility
         seed: int | None = 0,
@@ -390,6 +398,7 @@ def __init__(
         self.wandb_api_key = wandb_api_key
         self.wandb_init_kwargs = wandb_init_kwargs
         self.warn_on_score_mismatch = warn_on_score_mismatch
+        self.optimize_react_components = optimize_react_components
         self.use_mlflow = use_mlflow
 
         if track_best_outputs:
@@ -518,11 +527,57 @@ def feedback_fn(
             rng=rng,
             reflection_lm=self.reflection_lm,
             custom_instruction_proposer=self.custom_instruction_proposer,
-            warn_on_score_mismatch=self.warn_on_score_mismatch
+            warn_on_score_mismatch=self.warn_on_score_mismatch,
+            optimize_react_components=self.optimize_react_components,
         )
 
         # Instantiate GEPA with the simpler adapter-based API
         base_program = {name: pred.signature.instructions for name, pred in student.named_predictors()}
+
+        if self.optimize_react_components:
+            for module_path, module in student.named_sub_modules():
+                # Only process ReAct modules
+                if not isinstance(module, ReAct):
+                    continue
+                normalized_path = module_path.removeprefix("self.") if module_path != "self" else ""
+
+                # Get first predictor name as module identifier
+                for pred_name, _ in module.named_predictors():
+                    comp_name = pred_name if not normalized_path else f"{normalized_path}.{pred_name}"
+                    # Use full normalized path to avoid collapsing nested modules
+                    # e.g., "multi_agent.coordinator" not "multi_agent"
+                    module_key = f"{REACT_MODULE_PREFIX}:{normalized_path}" if normalized_path else REACT_MODULE_PREFIX
+
+                    # Build JSON config with tool args for reflection
+                    config = {
+                        "react": module.react.signature.instructions,
+                        "extract": module.extract.predict.signature.instructions,
+                        "tools": {
+                            tool_name: {
+                                "desc": tool.desc,
+                                "args": tool.args,
+                                "arg_desc": tool.arg_desc or {}
+                            }
+                            for tool_name, tool in module.tools.items()
+                            if tool_name != "finish"
+                        }
+                    }
+
+                    # Replace predictor keys with module key and extract key to prevent duplicates
+                    base_program.pop(comp_name, None)
+                    extract_key = f"{normalized_path}.extract.predict" if normalized_path else "extract.predict"
+                    base_program.pop(extract_key, None)
+                    base_program[module_key] = json.dumps(config, indent=2)
+                    break
+
+        # Log base_program keys for debugging
+        logger.info(f"Initialized base_program with {len(base_program)} components:")
+        for key in sorted(base_program.keys()):
+            if key.startswith(REACT_MODULE_PREFIX):
+                logger.info(f"  {key}: <ReAct module JSON config>")
+            else:
+                logger.info(f"  {key}: <instruction>")
+
         gepa_result: GEPAResult = optimize(
             seed_candidate=base_program,
             trainset=trainset,