Skip to content

Commit aa1f1d7

Browse files
committed
[https://nvbugs/5575913][fix] Use separate thresholds for 2120b/20b gptoss
Signed-off-by: Dongfeng Yu <[email protected]>
1 parent e47c787 commit aa1f1d7

File tree

2 files changed

+18
-6
lines changed

2 files changed

+18
-6
lines changed

tests/integration/defs/accuracy/references/gsm8k.yaml

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ GPT-OSS/BF16:
212212
- accuracy: 90.3
213213
- kv_cache_quant_algo: FP8
214214
accuracy: 90.3
215-
GPT-OSS/MXFP4:
215+
GPT-OSS/120B-MXFP4:
216216
- accuracy: 90.3
217217
- quant_algo: W4A8_MXFP4_MXFP8
218218
accuracy: 90.3
@@ -229,6 +229,18 @@ GPT-OSS/MXFP4:
229229
- quant_algo: W4A16_MXFP4
230230
kv_cache_quant_algo: FP8
231231
accuracy: 90.3
232+
GPT-OSS/20B-MXFP4:
233+
- accuracy: 85.0
234+
- quant_algo: W4A8_MXFP4_MXFP8
235+
accuracy: 85.0
236+
- quant_algo: W4A8_MXFP4_MXFP8
237+
kv_cache_quant_algo: FP8
238+
accuracy: 85.0
239+
- quant_algo: W4A16_MXFP4
240+
accuracy: 85.0
241+
- quant_algo: W4A16_MXFP4
242+
kv_cache_quant_algo: FP8
243+
accuracy: 85.0
232244
LGAI-EXAONE/EXAONE-4.0-32B:
233245
- accuracy: 88.36
234246
ByteDance-Seed/Seed-OSS-36B-Instruct:

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3450,7 +3450,7 @@ def test_w4_1gpu(self, kv_cache_dtype, moe_backend, cuda_graph,
34503450
moe_config=MoeConfig(backend=moe_backend))
34513451

34523452
with llm:
3453-
model_name = "GPT-OSS/MXFP4"
3453+
model_name = "GPT-OSS/20B-MXFP4"
34543454
task = GSM8K(model_name)
34553455
task.evaluate(llm,
34563456
extra_evaluator_kwargs=self.extra_evaluator_kwargs)
@@ -3461,7 +3461,7 @@ def test_dummy_load_format(self):
34613461
load_format="dummy",
34623462
)
34633463
with llm:
3464-
model_name = "GPT-OSS/MXFP4"
3464+
model_name = "GPT-OSS/20B-MXFP4"
34653465
task = GSM8K(model_name)
34663466
task.evaluate(llm, is_integration_test=True)
34673467

@@ -3508,7 +3508,7 @@ def test_w4_4gpus(self, kv_cache_dtype, moe_backend, tp_size, pp_size,
35083508
moe_config=MoeConfig(backend=moe_backend))
35093509

35103510
with llm:
3511-
model_name = "GPT-OSS/MXFP4"
3511+
model_name = "GPT-OSS/120B-MXFP4"
35123512
task = GSM8K(model_name)
35133513
task.evaluate(llm,
35143514
extra_evaluator_kwargs=self.extra_evaluator_kwargs)
@@ -3595,7 +3595,7 @@ def test_w4_2gpus(self, kv_cache_dtype, moe_backend, tp_size, pp_size,
35953595
moe_config=MoeConfig(backend=moe_backend))
35963596

35973597
with llm:
3598-
model_name = "GPT-OSS/MXFP4"
3598+
model_name = "GPT-OSS/20B-MXFP4"
35993599
task = GSM8K(model_name)
36003600
mocker.patch.object(GSM8K, "MAX_OUTPUT_LEN", 8192)
36013601
mocker.patch.dict(GSM8K.EVALUATE_KWARGS,
@@ -3622,7 +3622,7 @@ def test_w4_chunked_prefill(self, kv_cache_dtype, moe_backend, mocker):
36223622
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.6,
36233623
dtype=kv_cache_dtype)
36243624

3625-
model_name = "GPT-OSS/MXFP4"
3625+
model_name = "GPT-OSS/120B-MXFP4"
36263626
with LLM(self.MODEL_PATH,
36273627
tensor_parallel_size=4,
36283628
pipeline_parallel_size=1,

0 commit comments

Comments
 (0)