Skip to content

Fix style errors of dspy/propose #8185

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 15, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions dspy/propose/dataset_summary_generator.py
Original file line number Diff line number Diff line change
@@ -3,6 +3,7 @@
import dspy
from dspy.propose.utils import strip_prefix


class ObservationSummarizer(dspy.Signature):
("""Given a series of observations I have made about my dataset, please summarize them into a brief 2-3 sentence summary which highlights only the most important details.""")
observations = dspy.InputField(desc="Observations I have made about my dataset")
@@ -12,7 +13,7 @@ class DatasetDescriptor(dspy.Signature):
("""Given several examples from a dataset please write observations about trends that hold for most or all of the samples. """
"""Some areas you may consider in your observations: topics, content, syntax, conciceness, etc. """
"""It will be useful to make an educated guess as to the nature of the task this dataset will enable. Don't be afraid to be creative""")

examples = dspy.InputField(desc="Sample data points from the dataset")
observations = dspy.OutputField(desc="Somethings that holds true for most or all of the data you observed")

@@ -21,7 +22,7 @@ class DatasetDescriptorWithPriorObservations(dspy.Signature):
"""I will also provide you with a few observations I have already made. Please add your own observations or if you feel the observations are comprehensive say 'COMPLETE' """
"""Some areas you may consider in your observations: topics, content, syntax, conciceness, etc. """
"""It will be useful to make an educated guess as to the nature of the task this dataset will enable. Don't be afraid to be creative""")

examples = dspy.InputField(desc="Sample data points from the dataset")
prior_observations = dspy.InputField(desc="Some prior observations I made about the data")
observations = dspy.OutputField(desc="Somethings that holds true for most or all of the data you observed or COMPLETE if you have nothing to add")
@@ -75,8 +76,8 @@ def create_dataset_summary(trainset, view_data_batch_size, prompt_model, log_fil
break
continue
observations += output["observations"]
if log_file:

if log_file:
log_file.write(f"observations {observations}\n")
except Exception as e:
if verbose:
@@ -91,7 +92,7 @@ def create_dataset_summary(trainset, view_data_batch_size, prompt_model, log_fil
print(f"summary: {summary}")
if log_file:
log_file.write(f"summary: {summary}\n")

if verbose:
print(f"\nGenerated summary: {strip_prefix(summary.summary)}\n")

31 changes: 18 additions & 13 deletions dspy/propose/grounded_proposer.py
Original file line number Diff line number Diff line change
@@ -2,9 +2,14 @@

import dspy
from dspy.propose.dataset_summary_generator import create_dataset_summary
from dspy.propose.utils import create_example_string, create_predictor_level_history_string, strip_prefix, get_dspy_source_code
from dspy.teleprompt.utils import get_signature, get_prompt_model
from dspy.propose.propose_base import Proposer
from dspy.propose.utils import (
create_example_string,
create_predictor_level_history_string,
get_dspy_source_code,
strip_prefix,
)
from dspy.teleprompt.utils import get_prompt_model, get_signature

# Hardcoded variables (TODO: update)
MAX_INSTRUCT_IN_HISTORY = 5 # 10
@@ -184,15 +189,15 @@ def gather_examples_from_sets(candidate_sets, max_examples):
# Construct full program demo or single module demo depending on settings
basic_instruction = get_signature(program.predictors()[pred_i]).instructions
task_demos = ""

if self.use_task_demos:
# Combine current and adjacent sets
adjacent_sets = (
[demo_candidates[pred_i][demo_set_i]] +
demo_candidates[pred_i][demo_set_i + 1:] +
demo_candidates[pred_i][:demo_set_i]
)

# Gather examples up to the required count
example_strings = gather_examples_from_sets(adjacent_sets, num_demos_in_context)
task_demos = "\n\n".join(example_strings) + "\n\n"
@@ -220,7 +225,7 @@ def gather_examples_from_sets(candidate_sets, max_examples):
for field_name, field in get_signature(program.predictors()[pred_i]).fields.items():
# Access the '__dspy_field_type' from the extra metadata
dspy_field_type = field.json_schema_extra.get('__dspy_field_type')

# Based on the '__dspy_field_type', append to the respective list
if dspy_field_type == "input":
inputs.append(field_name)
@@ -236,9 +241,9 @@ def gather_examples_from_sets(candidate_sets, max_examples):
module=module_code,
max_depth=10,
).module_description
except:
except Exception as e:
if self.verbose:
print("Error getting program description. Running without program aware proposer.")
print(f"Error getting program description. Running without program aware proposer. Error: {e}")
self.program_aware = False

# Generate an instruction for our chosen module
@@ -324,12 +329,12 @@ def propose_instructions_for_program(
program,
demo_candidates,
trial_logs,
N,
T,
N, # noqa: N803
T, # noqa: N803
) -> list[str]:
"""This method is responsible for returning the full set of new instructions for our program, given the specified criteria."""

proposed_instructions = {}
proposed_instructions = {}

if self.set_history_randomly:
# Randomly select whether or not we're using instruction history
@@ -347,7 +352,7 @@ def propose_instructions_for_program(
else:
num_demos = max(len(demo_candidates[0]), 1)

# Create an instruction for each predictor
# Create an instruction for each predictor
for pred_i, predictor in enumerate(program.predictors()):
for demo_set_i in range(num_demos)[:min(N, num_demos)]:
if pred_i not in proposed_instructions:
@@ -377,15 +382,15 @@ def propose_instructions_for_program(
tip=selected_tip,
),
)

return proposed_instructions

def propose_instruction_for_predictor(
self,
program,
predictor,
pred_i,
T,
T, # noqa: N803
demo_candidates,
demo_set_i,
trial_logs,
126 changes: 0 additions & 126 deletions dspy/propose/instruction_proposal.py

This file was deleted.

2 changes: 1 addition & 1 deletion dspy/propose/propose_base.py
Original file line number Diff line number Diff line change
@@ -10,4 +10,4 @@ def propose_instructions_for_program(self):
pass

def propose_instruction_for_predictor(self):
pass
pass
28 changes: 15 additions & 13 deletions dspy/propose/utils.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
import inspect
import json
import re

import dspy
import inspect

try:
from IPython.core.magics.code import extract_symbols
except ImportError:
# Won't be able to read code from juptyer notebooks
extract_symbols = None

from dspy.predict.parameter import Parameter
from dspy.teleprompt.utils import get_signature, new_getfile

from dspy.teleprompt.utils import get_signature

def strip_prefix(text):
pattern = r'^[\*\s]*(([\w\'\-]+\s+){0,4}[\w\'\-]+):\s*'
@@ -38,7 +40,7 @@ def create_instruction_set_history_string(base_program, trial_logs, top_n):
if instruction_set not in seen_programs:
seen_programs.add(instruction_set)
unique_program_history.append(entry)

# Get the top n programs from program history
top_n_program_history = sorted(unique_program_history, key=lambda x: x['score'], reverse=True)[:top_n]
top_n_program_history.reverse()
@@ -50,7 +52,7 @@ def create_instruction_set_history_string(base_program, trial_logs, top_n):
score = entry["score"]
instruction_set = get_program_instruction_set_string(program)
instruction_set_history_string += instruction_set + f" | Score: {score}\n\n"

return instruction_set_history_string

def parse_list_of_instructions(instruction_string):
@@ -60,7 +62,7 @@ def parse_list_of_instructions(instruction_string):
return instructions
except json.JSONDecodeError:
pass

# If JSON decoding fails, extract strings within quotes
instructions = re.findall(r'"([^"]*)"', instruction_string)
return instructions
@@ -76,7 +78,7 @@ def get_program_instruction_set_string(program):
def create_predictor_level_history_string(base_program, predictor_i, trial_logs, top_n):
instruction_aggregate = {}
instruction_history = []

# Load trial programs
for trial_num in trial_logs:
trial = trial_logs[trial_num]
@@ -93,19 +95,19 @@ def create_predictor_level_history_string(base_program, predictor_i, trial_logs,
predictor = history_item["program"].predictors()[predictor_i]
instruction = get_signature(predictor).instructions
score = history_item["score"]

if instruction in instruction_aggregate:
instruction_aggregate[instruction]['total_score'] += score
instruction_aggregate[instruction]['count'] += 1
else:
instruction_aggregate[instruction] = {'total_score': score, 'count': 1}

# Calculate average score for each instruction and prepare for sorting
predictor_history = []
for instruction, data in instruction_aggregate.items():
average_score = data['total_score'] / data['count']
predictor_history.append((instruction, average_score))

# Deduplicate and sort by average score, then select top N
seen_instructions = set()
unique_predictor_history = []
@@ -116,12 +118,12 @@ def create_predictor_level_history_string(base_program, predictor_i, trial_logs,

top_instructions = sorted(unique_predictor_history, key=lambda x: x[1], reverse=True)[:top_n]
top_instructions.reverse()

# Create formatted history string
predictor_history_string = ""
for instruction, score in top_instructions:
predictor_history_string += instruction + f" | Score: {score}\n\n"

return predictor_history_string

def create_example_string(fields, example):
@@ -180,5 +182,5 @@ def get_dspy_source_code(module):
header.append(code)
completed_set.add(code)
completed_set.add(item)
return '\n\n'.join(header) + '\n\n' + base_code

return '\n\n'.join(header) + '\n\n' + base_code