Skip to content

Commit 7be1f50

Browse files
authored
Merge pull request #163 from codelion/feat-add-json-plugin
Feat add json plugin and support low, medium, high reasoning efforts for thinking models
2 parents 2ad6e5e + 2e380b6 commit 7be1f50

12 files changed

+2833
-260
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -167,3 +167,5 @@ cython_debug/
167167

168168
# VS Code
169169
.vscode/
170+
171+
scripts/results/

optillm.py

+34-14
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,7 @@ def parse_combined_approach(model: str, known_approaches: list, plugin_approache
268268

269269
return operation, approaches, actual_model
270270

271-
def execute_single_approach(approach, system_prompt, initial_query, client, model):
271+
def execute_single_approach(approach, system_prompt, initial_query, client, model, request_config: dict = None):
272272
if approach in known_approaches:
273273
if approach == 'none':
274274
# Extract kwargs from the request data
@@ -313,31 +313,42 @@ def execute_single_approach(approach, system_prompt, initial_query, client, mode
313313
elif approach == 're2':
314314
return re2_approach(system_prompt, initial_query, client, model, n=server_config['n'])
315315
elif approach == 'cepo':
316-
return cepo(system_prompt, initial_query, client, model, cepo_config)
316+
return cepo(system_prompt, initial_query, client, model, cepo_config)
317317
elif approach in plugin_approaches:
318-
return plugin_approaches[approach](system_prompt, initial_query, client, model)
318+
# Check if the plugin accepts request_config
319+
plugin_func = plugin_approaches[approach]
320+
import inspect
321+
sig = inspect.signature(plugin_func)
322+
323+
if 'request_config' in sig.parameters:
324+
# Plugin supports request_config
325+
return plugin_func(system_prompt, initial_query, client, model, request_config=request_config)
326+
else:
327+
# Legacy plugin without request_config support
328+
return plugin_func(system_prompt, initial_query, client, model)
319329
else:
320330
raise ValueError(f"Unknown approach: {approach}")
321331

322-
def execute_combined_approaches(approaches, system_prompt, initial_query, client, model):
332+
def execute_combined_approaches(approaches, system_prompt, initial_query, client, model, request_config: dict = None):
323333
final_response = initial_query
324334
total_tokens = 0
325335
for approach in approaches:
326-
response, tokens = execute_single_approach(approach, system_prompt, final_response, client, model)
336+
response, tokens = execute_single_approach(approach, system_prompt, final_response, client, model, request_config)
327337
final_response = response
328338
total_tokens += tokens
329339
return final_response, total_tokens
330340

331-
async def execute_parallel_approaches(approaches, system_prompt, initial_query, client, model):
341+
async def execute_parallel_approaches(approaches, system_prompt, initial_query, client, model, request_config: dict = None):
332342
async def run_approach(approach):
333-
return await asyncio.to_thread(execute_single_approach, approach, system_prompt, initial_query, client, model)
343+
return await asyncio.to_thread(execute_single_approach, approach, system_prompt, initial_query, client, model, request_config)
334344

335345
tasks = [run_approach(approach) for approach in approaches]
336346
results = await asyncio.gather(*tasks)
337347
responses, tokens = zip(*results)
338348
return list(responses), sum(tokens)
339349

340-
def execute_n_times(n: int, approaches, operation: str, system_prompt: str, initial_query: str, client: Any, model: str) -> Tuple[Union[str, List[str]], int]:
350+
def execute_n_times(n: int, approaches, operation: str, system_prompt: str, initial_query: str, client: Any, model: str,
351+
request_config: dict = None) -> Tuple[Union[str, List[str]], int]:
341352
"""
342353
Execute the pipeline n times and return n responses.
343354
@@ -358,13 +369,13 @@ def execute_n_times(n: int, approaches, operation: str, system_prompt: str, init
358369

359370
for _ in range(n):
360371
if operation == 'SINGLE':
361-
response, tokens = execute_single_approach(approaches[0], system_prompt, initial_query, client, model)
372+
response, tokens = execute_single_approach(approaches[0], system_prompt, initial_query, client, model, request_config)
362373
elif operation == 'AND':
363-
response, tokens = execute_combined_approaches(approaches, system_prompt, initial_query, client, model)
374+
response, tokens = execute_combined_approaches(approaches, system_prompt, initial_query, client, model, request_config)
364375
elif operation == 'OR':
365376
loop = asyncio.new_event_loop()
366377
asyncio.set_event_loop(loop)
367-
response, tokens = loop.run_until_complete(execute_parallel_approaches(approaches, system_prompt, initial_query, client, model))
378+
response, tokens = loop.run_until_complete(execute_parallel_approaches(approaches, system_prompt, initial_query, client, model, request_config))
368379
loop.close()
369380
else:
370381
raise ValueError(f"Unknown operation: {operation}")
@@ -534,6 +545,15 @@ def proxy():
534545
messages = data.get('messages', [])
535546
model = data.get('model', server_config['model'])
536547
n = data.get('n', server_config['n']) # Get n value from request or config
548+
# Extract response_format if present
549+
response_format = data.get("response_format", None)
550+
551+
# Create request config with all parameters
552+
request_config = {
553+
"stream": stream,
554+
"n": n,
555+
"response_format": response_format # Add response_format to config
556+
}
537557

538558
optillm_approach = data.get('optillm_approach', server_config['approach'])
539559
logger.debug(data)
@@ -574,12 +594,12 @@ def proxy():
574594
responses = []
575595
completion_tokens = 0
576596
for _ in range(n):
577-
result, tokens = execute_single_approach(approaches[0], system_prompt, initial_query, client, model)
597+
result, tokens = execute_single_approach(approaches[0], system_prompt, initial_query, client, model, request_config)
578598
responses.append(result)
579599
completion_tokens += tokens
580600
result = responses
581601
else:
582-
result, completion_tokens = execute_single_approach(approaches[0], system_prompt, initial_query, client, model)
602+
result, completion_tokens = execute_single_approach(approaches[0], system_prompt, initial_query, client, model, request_config)
583603

584604
logger.debug(f'Direct proxy response: {result}')
585605

@@ -593,7 +613,7 @@ def proxy():
593613
raise ValueError("'none' approach cannot be combined with other approaches")
594614

595615
# Handle non-none approaches with n attempts
596-
response, completion_tokens = execute_n_times(n, approaches, operation, system_prompt, initial_query, client, model)
616+
response, completion_tokens = execute_n_times(n, approaches, operation, system_prompt, initial_query, client, model, request_config)
597617

598618
except Exception as e:
599619
logger.error(f"Error processing request: {str(e)}")

0 commit comments

Comments
 (0)