6
6
from transformers import AutoTokenizer # type: ignore[import-untyped]
7
7
8
8
from guidellm .backend import Backend , BackendType
9
+ from guidellm .benchmark .scenario import Scenario , ScenarioManager
9
10
from guidellm .core import GuidanceReport , TextGenerationBenchmarkReport
10
11
from guidellm .executor import Executor , ProfileGenerationMode
11
12
from guidellm .request import (
19
20
__all__ = ["generate_benchmark_report" ]
20
21
21
22
# FIXME: Remove
22
- SCENARIOS = Literal [ "rag" , "short" ]
23
+ SCENARIOS = ScenarioManager ()
23
24
24
25
@click .command ()
25
26
@click .option (
33
34
)
34
35
@click .option (
35
36
"--scenario" ,
36
- type = cli_params .Union (click .File (mode = 'r' ), click .Choice (get_args ( SCENARIOS ))),
37
+ type = cli_params .Union (click .File (mode = 'r' ), click .Choice (SCENARIOS . list ( ))),
37
38
default = None ,
38
39
help = (
39
40
"TODO: A scenario or path to config"
42
43
@click .option (
43
44
"--backend" ,
44
45
type = click .Choice (get_args (BackendType )),
45
- default = "openai_http" ,
46
+ default = None ,
46
47
help = (
47
48
"The backend to use for benchmarking. "
48
49
"The default is OpenAI Server enabling compatability with any server that "
61
62
@click .option (
62
63
"--data" ,
63
64
type = str ,
64
- required = True ,
65
+ default = None ,
65
66
help = (
66
67
"The data source to use for benchmarking. "
67
68
"Depending on the data-type, it should be a "
74
75
@click .option (
75
76
"--data-type" ,
76
77
type = click .Choice (["emulated" , "file" , "transformers" ]),
77
- required = True ,
78
+ default = None ,
78
79
help = (
79
80
"The type of data to use for benchmarking. "
80
81
"Use 'emulated' for synthetic data, 'file' for a file, or 'transformers' "
96
97
@click .option (
97
98
"--rate-type" ,
98
99
type = click .Choice (get_args (ProfileGenerationMode )),
99
- default = "sweep" ,
100
+ default = None ,
100
101
help = (
101
102
"The type of request rate to use for benchmarking. "
102
103
"Use sweep to run a full range from synchronous to throughput (default), "
119
120
@click .option (
120
121
"--max-seconds" ,
121
122
type = int ,
122
- default = 120 ,
123
+ default = None ,
123
124
help = (
124
125
"The maximum number of seconds for each benchmark run. "
125
126
"Either max-seconds, max-requests, or both must be set. "
164
165
)
165
166
def generate_benchmark_report_cli (
166
167
target : str ,
167
- scenario : Optional [Union [IO [Any ], SCENARIOS ]],
168
- backend : BackendType ,
168
+ scenario : Optional [Union [IO [Any ], str ]],
169
+ backend : Optional [ BackendType ] ,
169
170
model : Optional [str ],
170
171
data : Optional [str ],
171
- data_type : Literal ["emulated" , "file" , "transformers" ],
172
+ data_type : Optional [ Literal ["emulated" , "file" , "transformers" ] ],
172
173
tokenizer : Optional [str ],
173
- rate_type : ProfileGenerationMode ,
174
+ rate_type : Optional [ ProfileGenerationMode ] ,
174
175
rate : Optional [float ],
175
176
max_seconds : Optional [int ],
176
177
max_requests : Union [Literal ["dataset" ], int , None ],
177
- output_path : str ,
178
+ output_path : Optional [ str ] ,
178
179
enable_continuous_refresh : bool ,
179
180
):
180
181
"""
181
182
Generate a benchmark report for a specified backend and dataset.
182
183
"""
183
- generate_benchmark_report (
184
- target = target ,
185
- scenario = scenario ,
184
+
185
+ if isinstance (scenario , str ):
186
+ defaults = SCENARIOS [scenario ]
187
+ elif isinstance (scenario , IO ):
188
+ defaults = Scenario .from_json (scenario .read ())
189
+ SCENARIOS ["custom" ] = defaults
190
+ elif scenario is None :
191
+ defaults = Scenario ()
192
+ else :
193
+ raise ValueError ("Invalid scenario type" )
194
+
195
+ # Update defaults with CLI args
196
+ defaults .update (
186
197
backend = backend ,
187
198
model = model ,
188
199
data = data ,
@@ -191,25 +202,20 @@ def generate_benchmark_report_cli(
191
202
rate_type = rate_type ,
192
203
rate = rate ,
193
204
max_seconds = max_seconds ,
194
- max_requests = max_requests ,
205
+ max_requests = max_requests
206
+ )
207
+
208
+ generate_benchmark_report (
209
+ target = target ,
210
+ scenario = defaults ,
195
211
output_path = output_path ,
196
212
cont_refresh_table = enable_continuous_refresh ,
197
213
)
198
214
199
215
200
216
def generate_benchmark_report (
201
217
target : str ,
202
- data : Optional [str ],
203
- data_type : Literal ["emulated" , "file" , "transformers" ],
204
- scenario : Optional [Union [IO [Any ], SCENARIOS ]],
205
- backend : BackendType = "openai_http" ,
206
- backend_kwargs : Optional [Mapping [str , Any ]] = None ,
207
- model : Optional [str ] = None ,
208
- tokenizer : Optional [str ] = None ,
209
- rate_type : ProfileGenerationMode = "sweep" ,
210
- rate : Optional [float ] = None ,
211
- max_seconds : Optional [int ] = 120 ,
212
- max_requests : Union [Literal ["dataset" ], int , None ] = None ,
218
+ scenario : Scenario ,
213
219
output_path : Optional [str ] = None ,
214
220
cont_refresh_table : bool = False ,
215
221
) -> GuidanceReport :
@@ -236,22 +242,22 @@ def generate_benchmark_report(
236
242
:param backend_kwargs: Additional keyword arguments for the backend.
237
243
"""
238
244
logger .info (
239
- "Generating benchmark report with target: {}, backend: {}" , target , backend
245
+ "Generating benchmark report with target: {}, backend: {}" , target , scenario . backend
240
246
)
241
247
242
248
# Create backend
243
249
backend_inst = Backend .create (
244
- type_ = backend ,
250
+ type_ = scenario . backend ,
245
251
target = target ,
246
- model = model ,
247
- ** (backend_kwargs or {}),
252
+ model = scenario . model ,
253
+ ** (scenario . backend_kwargs or {}),
248
254
)
249
255
backend_inst .validate ()
250
256
251
257
request_generator : RequestGenerator
252
258
253
259
# Create tokenizer and request generator
254
- tokenizer_inst = tokenizer
260
+ tokenizer_inst = scenario . tokenizer
255
261
if not tokenizer_inst :
256
262
try :
257
263
tokenizer_inst = AutoTokenizer .from_pretrained (backend_inst .model )
@@ -261,44 +267,44 @@ def generate_benchmark_report(
261
267
"--tokenizer must be provided for request generation"
262
268
) from err
263
269
264
- if data_type == "emulated" :
270
+ if scenario . data_type == "emulated" :
265
271
request_generator = EmulatedRequestGenerator (
266
- config = data , tokenizer = tokenizer_inst
272
+ config = scenario . data , tokenizer = tokenizer_inst
267
273
)
268
- elif data_type == "file" :
269
- request_generator = FileRequestGenerator (path = data , tokenizer = tokenizer_inst )
270
- elif data_type == "transformers" :
274
+ elif scenario . data_type == "file" :
275
+ request_generator = FileRequestGenerator (path = scenario . data , tokenizer = tokenizer_inst )
276
+ elif scenario . data_type == "transformers" :
271
277
request_generator = TransformersDatasetRequestGenerator (
272
- dataset = data , tokenizer = tokenizer_inst
278
+ dataset = scenario . data , tokenizer = tokenizer_inst
273
279
)
274
280
else :
275
- raise ValueError (f"Unknown data type: { data_type } " )
281
+ raise ValueError (f"Unknown data type: { scenario . data_type } " )
276
282
277
- if data_type == "emulated" and max_requests == "dataset" :
283
+ if scenario . data_type == "emulated" and scenario . max_requests == "dataset" :
278
284
raise ValueError ("Cannot use 'dataset' for emulated data" )
279
285
280
286
# Create executor
281
287
executor = Executor (
282
288
backend = backend_inst ,
283
289
request_generator = request_generator ,
284
- mode = rate_type ,
285
- rate = rate if rate_type in ("constant" , "poisson" ) else None ,
290
+ mode = scenario . rate_type ,
291
+ rate = scenario . rate if scenario . rate_type in ("constant" , "poisson" ) else None ,
286
292
max_number = (
287
- len (request_generator ) if max_requests == "dataset" else max_requests
293
+ len (request_generator ) if scenario . max_requests == "dataset" else scenario . max_requests
288
294
),
289
- max_duration = max_seconds ,
295
+ max_duration = scenario . max_seconds ,
290
296
)
291
297
292
298
# Run executor
293
299
logger .debug (
294
300
"Running executor with args: {}" ,
295
301
{
296
- "backend" : backend ,
302
+ "backend" : scenario . backend ,
297
303
"request_generator" : request_generator ,
298
- "mode" : rate_type ,
299
- "rate" : rate ,
300
- "max_number" : max_requests ,
301
- "max_duration" : max_seconds ,
304
+ "mode" : scenario . rate_type ,
305
+ "rate" : scenario . rate ,
306
+ "max_number" : scenario . max_requests ,
307
+ "max_duration" : scenario . max_seconds ,
302
308
},
303
309
)
304
310
report = asyncio .run (_run_executor_for_result (executor ))
0 commit comments