@@ -268,7 +268,7 @@ def parse_combined_approach(model: str, known_approaches: list, plugin_approache
268
268
269
269
return operation , approaches , actual_model
270
270
271
- def execute_single_approach (approach , system_prompt , initial_query , client , model ):
271
+ def execute_single_approach (approach , system_prompt , initial_query , client , model , request_config : dict = None ):
272
272
if approach in known_approaches :
273
273
if approach == 'none' :
274
274
# Extract kwargs from the request data
@@ -313,31 +313,42 @@ def execute_single_approach(approach, system_prompt, initial_query, client, mode
313
313
elif approach == 're2' :
314
314
return re2_approach (system_prompt , initial_query , client , model , n = server_config ['n' ])
315
315
elif approach == 'cepo' :
316
- return cepo (system_prompt , initial_query , client , model , cepo_config )
316
+ return cepo (system_prompt , initial_query , client , model , cepo_config )
317
317
elif approach in plugin_approaches :
318
- return plugin_approaches [approach ](system_prompt , initial_query , client , model )
318
+ # Check if the plugin accepts request_config
319
+ plugin_func = plugin_approaches [approach ]
320
+ import inspect
321
+ sig = inspect .signature (plugin_func )
322
+
323
+ if 'request_config' in sig .parameters :
324
+ # Plugin supports request_config
325
+ return plugin_func (system_prompt , initial_query , client , model , request_config = request_config )
326
+ else :
327
+ # Legacy plugin without request_config support
328
+ return plugin_func (system_prompt , initial_query , client , model )
319
329
else :
320
330
raise ValueError (f"Unknown approach: { approach } " )
321
331
322
- def execute_combined_approaches (approaches , system_prompt , initial_query , client , model ):
332
+ def execute_combined_approaches (approaches , system_prompt , initial_query , client , model , request_config : dict = None ):
323
333
final_response = initial_query
324
334
total_tokens = 0
325
335
for approach in approaches :
326
- response , tokens = execute_single_approach (approach , system_prompt , final_response , client , model )
336
+ response , tokens = execute_single_approach (approach , system_prompt , final_response , client , model , request_config )
327
337
final_response = response
328
338
total_tokens += tokens
329
339
return final_response , total_tokens
330
340
331
- async def execute_parallel_approaches (approaches , system_prompt , initial_query , client , model ):
341
+ async def execute_parallel_approaches (approaches , system_prompt , initial_query , client , model , request_config : dict = None ):
332
342
async def run_approach (approach ):
333
- return await asyncio .to_thread (execute_single_approach , approach , system_prompt , initial_query , client , model )
343
+ return await asyncio .to_thread (execute_single_approach , approach , system_prompt , initial_query , client , model , request_config )
334
344
335
345
tasks = [run_approach (approach ) for approach in approaches ]
336
346
results = await asyncio .gather (* tasks )
337
347
responses , tokens = zip (* results )
338
348
return list (responses ), sum (tokens )
339
349
340
- def execute_n_times (n : int , approaches , operation : str , system_prompt : str , initial_query : str , client : Any , model : str ) -> Tuple [Union [str , List [str ]], int ]:
350
+ def execute_n_times (n : int , approaches , operation : str , system_prompt : str , initial_query : str , client : Any , model : str ,
351
+ request_config : dict = None ) -> Tuple [Union [str , List [str ]], int ]:
341
352
"""
342
353
Execute the pipeline n times and return n responses.
343
354
@@ -358,13 +369,13 @@ def execute_n_times(n: int, approaches, operation: str, system_prompt: str, init
358
369
359
370
for _ in range (n ):
360
371
if operation == 'SINGLE' :
361
- response , tokens = execute_single_approach (approaches [0 ], system_prompt , initial_query , client , model )
372
+ response , tokens = execute_single_approach (approaches [0 ], system_prompt , initial_query , client , model , request_config )
362
373
elif operation == 'AND' :
363
- response , tokens = execute_combined_approaches (approaches , system_prompt , initial_query , client , model )
374
+ response , tokens = execute_combined_approaches (approaches , system_prompt , initial_query , client , model , request_config )
364
375
elif operation == 'OR' :
365
376
loop = asyncio .new_event_loop ()
366
377
asyncio .set_event_loop (loop )
367
- response , tokens = loop .run_until_complete (execute_parallel_approaches (approaches , system_prompt , initial_query , client , model ))
378
+ response , tokens = loop .run_until_complete (execute_parallel_approaches (approaches , system_prompt , initial_query , client , model , request_config ))
368
379
loop .close ()
369
380
else :
370
381
raise ValueError (f"Unknown operation: { operation } " )
@@ -534,6 +545,15 @@ def proxy():
534
545
messages = data .get ('messages' , [])
535
546
model = data .get ('model' , server_config ['model' ])
536
547
n = data .get ('n' , server_config ['n' ]) # Get n value from request or config
548
+ # Extract response_format if present
549
+ response_format = data .get ("response_format" , None )
550
+
551
+ # Create request config with all parameters
552
+ request_config = {
553
+ "stream" : stream ,
554
+ "n" : n ,
555
+ "response_format" : response_format # Add response_format to config
556
+ }
537
557
538
558
optillm_approach = data .get ('optillm_approach' , server_config ['approach' ])
539
559
logger .debug (data )
@@ -574,12 +594,12 @@ def proxy():
574
594
responses = []
575
595
completion_tokens = 0
576
596
for _ in range (n ):
577
- result , tokens = execute_single_approach (approaches [0 ], system_prompt , initial_query , client , model )
597
+ result , tokens = execute_single_approach (approaches [0 ], system_prompt , initial_query , client , model , request_config )
578
598
responses .append (result )
579
599
completion_tokens += tokens
580
600
result = responses
581
601
else :
582
- result , completion_tokens = execute_single_approach (approaches [0 ], system_prompt , initial_query , client , model )
602
+ result , completion_tokens = execute_single_approach (approaches [0 ], system_prompt , initial_query , client , model , request_config )
583
603
584
604
logger .debug (f'Direct proxy response: { result } ' )
585
605
@@ -593,7 +613,7 @@ def proxy():
593
613
raise ValueError ("'none' approach cannot be combined with other approaches" )
594
614
595
615
# Handle non-none approaches with n attempts
596
- response , completion_tokens = execute_n_times (n , approaches , operation , system_prompt , initial_query , client , model )
616
+ response , completion_tokens = execute_n_times (n , approaches , operation , system_prompt , initial_query , client , model , request_config )
597
617
598
618
except Exception as e :
599
619
logger .error (f"Error processing request: { str (e )} " )
0 commit comments