stanfordnlp · TomeHirata · May 15, 2025 · May 7, 2025
diff --git a/dspy/propose/dataset_summary_generator.py b/dspy/propose/dataset_summary_generator.py
@@ -3,6 +3,7 @@
 import dspy
 from dspy.propose.utils import strip_prefix
 
+
 class ObservationSummarizer(dspy.Signature):
     ("""Given a series of observations I have made about my dataset, please summarize them into a brief 2-3 sentence summary which highlights only the most important details.""")
     observations = dspy.InputField(desc="Observations I have made about my dataset")
@@ -12,7 +13,7 @@ class DatasetDescriptor(dspy.Signature):
     ("""Given several examples from a dataset please write observations about trends that hold for most or all of the samples. """
     """Some areas you may consider in your observations: topics, content, syntax, conciceness, etc. """
     """It will be useful to make an educated guess as to the nature of the task this dataset will enable. Don't be afraid to be creative""")
-    
+
     examples = dspy.InputField(desc="Sample data points from the dataset")
     observations = dspy.OutputField(desc="Somethings that holds true for most or all of the data you observed")
 
@@ -21,7 +22,7 @@ class DatasetDescriptorWithPriorObservations(dspy.Signature):
     """I will also provide you with a few observations I have already made.  Please add your own observations or if you feel the observations are comprehensive say 'COMPLETE' """
     """Some areas you may consider in your observations: topics, content, syntax, conciceness, etc. """
     """It will be useful to make an educated guess as to the nature of the task this dataset will enable. Don't be afraid to be creative""")
-    
+
     examples = dspy.InputField(desc="Sample data points from the dataset")
     prior_observations = dspy.InputField(desc="Some prior observations I made about the data")
     observations = dspy.OutputField(desc="Somethings that holds true for most or all of the data you observed or COMPLETE if you have nothing to add")
@@ -75,8 +76,8 @@ def create_dataset_summary(trainset, view_data_batch_size, prompt_model, log_fil
                     break
                 continue
             observations += output["observations"]
-            
-            if log_file: 
+
+            if log_file:
                 log_file.write(f"observations {observations}\n")
     except Exception as e:
         if verbose:
@@ -91,7 +92,7 @@ def create_dataset_summary(trainset, view_data_batch_size, prompt_model, log_fil
         print(f"summary: {summary}")
     if log_file:
         log_file.write(f"summary: {summary}\n")
-    
+
     if verbose:
         print(f"\nGenerated summary: {strip_prefix(summary.summary)}\n")
 

diff --git a/dspy/propose/grounded_proposer.py b/dspy/propose/grounded_proposer.py
@@ -2,9 +2,14 @@
 
 import dspy
 from dspy.propose.dataset_summary_generator import create_dataset_summary
-from dspy.propose.utils import create_example_string, create_predictor_level_history_string, strip_prefix, get_dspy_source_code
-from dspy.teleprompt.utils import get_signature, get_prompt_model
 from dspy.propose.propose_base import Proposer
+from dspy.propose.utils import (
+    create_example_string,
+    create_predictor_level_history_string,
+    get_dspy_source_code,
+    strip_prefix,
+)
+from dspy.teleprompt.utils import get_prompt_model, get_signature
 
 # Hardcoded variables (TODO: update)
 MAX_INSTRUCT_IN_HISTORY = 5  # 10
@@ -184,15 +189,15 @@ def gather_examples_from_sets(candidate_sets, max_examples):
         # Construct full program demo or single module demo depending on settings
         basic_instruction = get_signature(program.predictors()[pred_i]).instructions
         task_demos = ""
-        
+
         if self.use_task_demos:
             # Combine current and adjacent sets
             adjacent_sets = (
                 [demo_candidates[pred_i][demo_set_i]] +
                 demo_candidates[pred_i][demo_set_i + 1:] +
                 demo_candidates[pred_i][:demo_set_i]
             )
-            
+
             # Gather examples up to the required count
             example_strings = gather_examples_from_sets(adjacent_sets, num_demos_in_context)
             task_demos = "\n\n".join(example_strings) + "\n\n"
@@ -220,7 +225,7 @@ def gather_examples_from_sets(candidate_sets, max_examples):
                 for field_name, field in get_signature(program.predictors()[pred_i]).fields.items():
                     # Access the '__dspy_field_type' from the extra metadata
                     dspy_field_type = field.json_schema_extra.get('__dspy_field_type')
-                    
+
                     # Based on the '__dspy_field_type', append to the respective list
                     if dspy_field_type == "input":
                         inputs.append(field_name)
@@ -236,9 +241,9 @@ def gather_examples_from_sets(candidate_sets, max_examples):
                     module=module_code,
                     max_depth=10,
                 ).module_description
-            except:
+            except Exception as e:
                 if self.verbose:
-                    print("Error getting program description. Running without program aware proposer.")
+                    print(f"Error getting program description. Running without program aware proposer. Error: {e}")
                 self.program_aware = False
 
         # Generate an instruction for our chosen module
@@ -324,12 +329,12 @@ def propose_instructions_for_program(
         program,
         demo_candidates,
         trial_logs,
-        N,
-        T,
+        N, # noqa: N803
+        T, # noqa: N803
     ) -> list[str]:
         """This method is responsible for returning the full set of new instructions for our program, given the specified criteria."""
 
-        proposed_instructions = {}      
+        proposed_instructions = {}
 
         if self.set_history_randomly:
             # Randomly select whether or not we're using instruction history
@@ -347,7 +352,7 @@ def propose_instructions_for_program(
         else:
             num_demos = max(len(demo_candidates[0]), 1)
 
-        # Create an instruction for each predictor 
+        # Create an instruction for each predictor
         for pred_i, predictor in enumerate(program.predictors()):
             for demo_set_i in range(num_demos)[:min(N, num_demos)]:
                 if pred_i not in proposed_instructions:
@@ -377,15 +382,15 @@ def propose_instructions_for_program(
                         tip=selected_tip,
                     ),
                 )
-        
+
         return proposed_instructions
 
     def propose_instruction_for_predictor(
         self,
         program,
         predictor,
         pred_i,
-        T,
+        T, # noqa: N803
         demo_candidates,
         demo_set_i,
         trial_logs,

diff --git a/dspy/propose/instruction_proposal.py b/dspy/propose/instruction_proposal.py
diff --git a/dspy/propose/propose_base.py b/dspy/propose/propose_base.py
@@ -10,4 +10,4 @@ def propose_instructions_for_program(self):
         pass
 
     def propose_instruction_for_predictor(self):
-        pass
+        pass
diff --git a/dspy/propose/utils.py b/dspy/propose/utils.py
@@ -1,16 +1,18 @@
+import inspect
 import json
 import re
+
 import dspy
-import inspect
+
 try:
     from IPython.core.magics.code import extract_symbols
 except ImportError:
     # Won't be able to read code from juptyer notebooks
     extract_symbols = None
 
 from dspy.predict.parameter import Parameter
+from dspy.teleprompt.utils import get_signature, new_getfile
 
-from dspy.teleprompt.utils import get_signature
 
 def strip_prefix(text):
     pattern = r'^[\*\s]*(([\w\'\-]+\s+){0,4}[\w\'\-]+):\s*'
@@ -38,7 +40,7 @@ def create_instruction_set_history_string(base_program, trial_logs, top_n):
         if instruction_set not in seen_programs:
             seen_programs.add(instruction_set)
             unique_program_history.append(entry)
-    
+
     # Get the top n programs from program history
     top_n_program_history = sorted(unique_program_history, key=lambda x: x['score'], reverse=True)[:top_n]
     top_n_program_history.reverse()
@@ -50,7 +52,7 @@ def create_instruction_set_history_string(base_program, trial_logs, top_n):
         score = entry["score"]
         instruction_set = get_program_instruction_set_string(program)
         instruction_set_history_string += instruction_set + f" | Score: {score}\n\n"
-    
+
     return instruction_set_history_string
 
 def parse_list_of_instructions(instruction_string):
@@ -60,7 +62,7 @@ def parse_list_of_instructions(instruction_string):
         return instructions
     except json.JSONDecodeError:
         pass
-    
+
     # If JSON decoding fails, extract strings within quotes
     instructions = re.findall(r'"([^"]*)"', instruction_string)
     return instructions
@@ -76,7 +78,7 @@ def get_program_instruction_set_string(program):
 def create_predictor_level_history_string(base_program, predictor_i, trial_logs, top_n):
     instruction_aggregate = {}
     instruction_history = []
-    
+
     # Load trial programs
     for trial_num in trial_logs:
         trial = trial_logs[trial_num]
@@ -93,19 +95,19 @@ def create_predictor_level_history_string(base_program, predictor_i, trial_logs,
         predictor = history_item["program"].predictors()[predictor_i]
         instruction = get_signature(predictor).instructions
         score = history_item["score"]
-        
+
         if instruction in instruction_aggregate:
             instruction_aggregate[instruction]['total_score'] += score
             instruction_aggregate[instruction]['count'] += 1
         else:
             instruction_aggregate[instruction] = {'total_score': score, 'count': 1}
-    
+
     # Calculate average score for each instruction and prepare for sorting
     predictor_history = []
     for instruction, data in instruction_aggregate.items():
         average_score = data['total_score'] / data['count']
         predictor_history.append((instruction, average_score))
-    
+
     # Deduplicate and sort by average score, then select top N
     seen_instructions = set()
     unique_predictor_history = []
@@ -116,12 +118,12 @@ def create_predictor_level_history_string(base_program, predictor_i, trial_logs,
 
     top_instructions = sorted(unique_predictor_history, key=lambda x: x[1], reverse=True)[:top_n]
     top_instructions.reverse()
-    
+
     # Create formatted history string
     predictor_history_string = ""
     for instruction, score in top_instructions:
         predictor_history_string += instruction + f" | Score: {score}\n\n"
-    
+
     return predictor_history_string
 
 def create_example_string(fields, example):
@@ -180,5 +182,5 @@ def get_dspy_source_code(module):
                     header.append(code)
                     completed_set.add(code)
             completed_set.add(item)
-        
-    return '\n\n'.join(header) + '\n\n' + base_code
+
+    return '\n\n'.join(header) + '\n\n' + base_code
-Original file line number
+Diff line change
@@ @@ -10,4 +10,4 @@ def propose_instructions_for_program(self): @@
             pass
         def propose_instruction_for_predictor(self):
-            pass
+            pass