Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat add json plugin and support low, medium, high reasoning efforts for thinking models #163

Merged
merged 64 commits into from
Feb 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
64 commits
Select commit Hold shift + click to select a range
2b811ba
add initial implementaion
codelion Jan 30, 2025
b40e011
Update inference.py
codelion Jan 30, 2025
75e386c
add think deeper plugin
codelion Jan 30, 2025
dd3661a
Update thinkdeeper_plugin.py
codelion Jan 30, 2025
3935597
Update thinkdeeper_plugin.py
codelion Jan 30, 2025
57f1cf3
Update thinkdeeper_plugin.py
codelion Jan 30, 2025
c23c294
Update thinkdeeper_plugin.py
codelion Jan 30, 2025
fde8ea3
Update thinkdeeper_plugin.py
codelion Jan 30, 2025
bbe663b
Update thinkdeeper_plugin.py
codelion Jan 30, 2025
298a02c
Update thinkdeeper_plugin.py
codelion Jan 30, 2025
4f09a37
Update thinkdeeper_plugin.py
codelion Jan 30, 2025
982cd77
move code around
codelion Jan 31, 2025
d5e7f6c
fix config
codelion Jan 31, 2025
c630dcc
return response
codelion Jan 31, 2025
8223736
singleton cache
codelion Jan 31, 2025
ffc1ade
Update thinkdeeper.py
codelion Jan 31, 2025
8c3466d
singleon cache
codelion Jan 31, 2025
74b3bc2
add tip
codelion Feb 2, 2025
5bf0c0e
add num_traces
codelion Feb 5, 2025
4bc73b5
fix bugs
codelion Feb 6, 2025
c5c8d83
fix tokens
codelion Feb 7, 2025
d56a6b6
Update thinkdeeper.py
codelion Feb 7, 2025
6c06d47
Update eval_aime_benchmark.py
codelion Feb 7, 2025
3f4eb0d
removing num traces
codelion Feb 7, 2025
403416d
Update eval_math500_benchmark.py
codelion Feb 11, 2025
0479f85
Update eval_math500_benchmark.py
codelion Feb 11, 2025
c2523ee
Update eval_math500_benchmark.py
codelion Feb 11, 2025
c5a8ba7
Update eval_math500_benchmark.py
codelion Feb 11, 2025
63f1a2a
Update eval_math500_benchmark.py
codelion Feb 13, 2025
e309310
Update eval_math500_benchmark.py
codelion Feb 13, 2025
f1f33d6
Update eval_math500_benchmark.py
codelion Feb 13, 2025
052e102
Update eval_math500_benchmark.py
codelion Feb 13, 2025
d8ded8a
fixes
codelion Feb 13, 2025
354fd32
Update eval_math500_benchmark.py
codelion Feb 13, 2025
f72baea
Update eval_aime_benchmark.py
codelion Feb 13, 2025
da5be26
Update eval_math500_benchmark.py
codelion Feb 13, 2025
2c70537
Update eval_math500_benchmark.py
codelion Feb 13, 2025
ad95968
Update eval_math500_benchmark.py
codelion Feb 14, 2025
befbfc3
Update eval_math500_benchmark.py
codelion Feb 14, 2025
ee744a4
Merge branch 'feat-add-json-plugin' of https://github.com/codelion/op…
codelion Feb 14, 2025
7330140
Update eval_math500_benchmark.py
codelion Feb 14, 2025
7f957bc
Update eval_math500_benchmark.py
codelion Feb 14, 2025
91ec172
Update eval_math500_benchmark.py
codelion Feb 14, 2025
527df53
Update eval_math500_benchmark.py
codelion Feb 14, 2025
edc67ad
Update eval_math500_benchmark.py
codelion Feb 14, 2025
4068976
Update eval_math500_benchmark.py
codelion Feb 14, 2025
839e5d6
Update eval_math500_benchmark.py
codelion Feb 14, 2025
dca4b6f
Create gen_optillmbench.py
codelion Feb 16, 2025
98fd9e4
Update gen_optillmbench.py
codelion Feb 16, 2025
f89f89e
add eval script
codelion Feb 16, 2025
4b85f01
removed TIP added max_thoughts token
codelion Feb 23, 2025
aad3062
support reasoning_effort parameter for reaosning models
codelion Feb 23, 2025
1bd01f4
Update inference.py
codelion Feb 23, 2025
4f9bdc6
Update thinkdeeper.py
codelion Feb 23, 2025
3159768
fixes
codelion Feb 23, 2025
6524e18
Update inference.py
codelion Feb 24, 2025
5e78652
Update eval_aime_benchmark.py
codelion Feb 25, 2025
62e4948
Update eval_aime_benchmark.py
codelion Feb 25, 2025
120ad87
Update eval_aime_benchmark.py
codelion Feb 26, 2025
50ac90e
Update eval_aime_benchmark.py
codelion Feb 26, 2025
718b678
init
codelion Feb 26, 2025
3a7d0f0
Update thinkdeeper.py
codelion Feb 27, 2025
7755946
fix scripts
codelion Feb 27, 2025
2e380b6
Update inference.py
codelion Feb 27, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -167,3 +167,5 @@ cython_debug/

# VS Code
.vscode/

scripts/results/
48 changes: 34 additions & 14 deletions optillm.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ def parse_combined_approach(model: str, known_approaches: list, plugin_approache

return operation, approaches, actual_model

def execute_single_approach(approach, system_prompt, initial_query, client, model):
def execute_single_approach(approach, system_prompt, initial_query, client, model, request_config: dict = None):
if approach in known_approaches:
if approach == 'none':
# Extract kwargs from the request data
Expand Down Expand Up @@ -313,31 +313,42 @@ def execute_single_approach(approach, system_prompt, initial_query, client, mode
elif approach == 're2':
return re2_approach(system_prompt, initial_query, client, model, n=server_config['n'])
elif approach == 'cepo':
return cepo(system_prompt, initial_query, client, model, cepo_config)
return cepo(system_prompt, initial_query, client, model, cepo_config)
elif approach in plugin_approaches:
return plugin_approaches[approach](system_prompt, initial_query, client, model)
# Check if the plugin accepts request_config
plugin_func = plugin_approaches[approach]
import inspect
sig = inspect.signature(plugin_func)

if 'request_config' in sig.parameters:
# Plugin supports request_config
return plugin_func(system_prompt, initial_query, client, model, request_config=request_config)
else:
# Legacy plugin without request_config support
return plugin_func(system_prompt, initial_query, client, model)
else:
raise ValueError(f"Unknown approach: {approach}")

def execute_combined_approaches(approaches, system_prompt, initial_query, client, model):
def execute_combined_approaches(approaches, system_prompt, initial_query, client, model, request_config: dict = None):
final_response = initial_query
total_tokens = 0
for approach in approaches:
response, tokens = execute_single_approach(approach, system_prompt, final_response, client, model)
response, tokens = execute_single_approach(approach, system_prompt, final_response, client, model, request_config)
final_response = response
total_tokens += tokens
return final_response, total_tokens

async def execute_parallel_approaches(approaches, system_prompt, initial_query, client, model):
async def execute_parallel_approaches(approaches, system_prompt, initial_query, client, model, request_config: dict = None):
async def run_approach(approach):
return await asyncio.to_thread(execute_single_approach, approach, system_prompt, initial_query, client, model)
return await asyncio.to_thread(execute_single_approach, approach, system_prompt, initial_query, client, model, request_config)

tasks = [run_approach(approach) for approach in approaches]
results = await asyncio.gather(*tasks)
responses, tokens = zip(*results)
return list(responses), sum(tokens)

def execute_n_times(n: int, approaches, operation: str, system_prompt: str, initial_query: str, client: Any, model: str) -> Tuple[Union[str, List[str]], int]:
def execute_n_times(n: int, approaches, operation: str, system_prompt: str, initial_query: str, client: Any, model: str,
request_config: dict = None) -> Tuple[Union[str, List[str]], int]:
"""
Execute the pipeline n times and return n responses.

Expand All @@ -358,13 +369,13 @@ def execute_n_times(n: int, approaches, operation: str, system_prompt: str, init

for _ in range(n):
if operation == 'SINGLE':
response, tokens = execute_single_approach(approaches[0], system_prompt, initial_query, client, model)
response, tokens = execute_single_approach(approaches[0], system_prompt, initial_query, client, model, request_config)
elif operation == 'AND':
response, tokens = execute_combined_approaches(approaches, system_prompt, initial_query, client, model)
response, tokens = execute_combined_approaches(approaches, system_prompt, initial_query, client, model, request_config)
elif operation == 'OR':
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
response, tokens = loop.run_until_complete(execute_parallel_approaches(approaches, system_prompt, initial_query, client, model))
response, tokens = loop.run_until_complete(execute_parallel_approaches(approaches, system_prompt, initial_query, client, model, request_config))
loop.close()
else:
raise ValueError(f"Unknown operation: {operation}")
Expand Down Expand Up @@ -534,6 +545,15 @@ def proxy():
messages = data.get('messages', [])
model = data.get('model', server_config['model'])
n = data.get('n', server_config['n']) # Get n value from request or config
# Extract response_format if present
response_format = data.get("response_format", None)

# Create request config with all parameters
request_config = {
"stream": stream,
"n": n,
"response_format": response_format # Add response_format to config
}

optillm_approach = data.get('optillm_approach', server_config['approach'])
logger.debug(data)
Expand Down Expand Up @@ -574,12 +594,12 @@ def proxy():
responses = []
completion_tokens = 0
for _ in range(n):
result, tokens = execute_single_approach(approaches[0], system_prompt, initial_query, client, model)
result, tokens = execute_single_approach(approaches[0], system_prompt, initial_query, client, model, request_config)
responses.append(result)
completion_tokens += tokens
result = responses
else:
result, completion_tokens = execute_single_approach(approaches[0], system_prompt, initial_query, client, model)
result, completion_tokens = execute_single_approach(approaches[0], system_prompt, initial_query, client, model, request_config)

logger.debug(f'Direct proxy response: {result}')

Expand All @@ -593,7 +613,7 @@ def proxy():
raise ValueError("'none' approach cannot be combined with other approaches")

# Handle non-none approaches with n attempts
response, completion_tokens = execute_n_times(n, approaches, operation, system_prompt, initial_query, client, model)
response, completion_tokens = execute_n_times(n, approaches, operation, system_prompt, initial_query, client, model, request_config)

except Exception as e:
logger.error(f"Error processing request: {str(e)}")
Expand Down
Loading