Skip to content

Commit 57e50b1

Browse files
qwen 3 vl with apply_chat
_template true Signed-off-by: Brian Dellabetta <[email protected]>
1 parent 175d562 commit 57e50b1

File tree

4 files changed

+18
-18
lines changed

4 files changed

+18
-18
lines changed
Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,17 @@
11
cadence: weekly
2-
model: Qwen/Qwen2.5-VL-7B-Instruct
3-
model_class: Qwen2_5_VLForConditionalGeneration
2+
model: Qwen/Qwen3-VL-8B-Instruct
3+
model_class: Qwen3VLForConditionalGeneration
44
scheme: FP8_DYNAMIC
55
recipe: tests/e2e/vLLM/recipes/FP8/recipe_fp8_dynamic.yaml
66
lmeval:
77
model: "hf-multimodal"
88
model_args:
99
dtype: bfloat16
10-
add_bos_token: True
1110
convert_img_format: True
1211
task: mmmu_val_literature
12+
apply_chat_template: True
1313
num_fewshot: 0
1414
batch_size: 8
15-
# dense model achieves accuracy of 0.9 +/ 0.0557
15+
# dense model achieves accuracy of 0.833
1616
metrics:
17-
acc,none: 0.8333
18-
acc_stderr,none: 0.0557
17+
acc,none: 0.833
Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
cadence: "weekly"
2-
model: Qwen/Qwen2.5-VL-7B-Instruct
3-
model_class: Qwen2_5_VLForConditionalGeneration
2+
model: Qwen/Qwen3-VL-8B-Instruct
3+
model_class: Qwen3VLForConditionalGeneration
44
scheme: INT8_dyn_per_token
55
recipe: tests/e2e/vLLM/recipes/INT8/recipe_int8_channel_weight_dynamic_per_token.yaml
66
dataset_id: lmms-lab/flickr30k
@@ -9,12 +9,11 @@ lmeval:
99
model: "hf-multimodal"
1010
model_args:
1111
dtype: bfloat16
12-
add_bos_token: True
1312
convert_img_format: True
1413
task: mmmu_val_literature
14+
apply_chat_template: True
1515
num_fewshot: 0
1616
batch_size: 8
17-
# dense model achieves accuracy of 0.9 +/ 0.0557
17+
# dense model achieves accuracy of 0.833
1818
metrics:
19-
acc,none: 0.833
20-
acc_stderr,none: 0.0557
19+
acc,none: 0.833
Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
cadence: "weekly"
2-
model: Qwen/Qwen2.5-VL-7B-Instruct
3-
model_class: Qwen2_5_VLForConditionalGeneration
2+
model: Qwen/Qwen3-VL-8B-Instruct
3+
model_class: Qwen3VLForConditionalGeneration
44
scheme: W4A16_actorder_weight
55
recipe: tests/e2e/vLLM/recipes/actorder/recipe_w4a16_actorder_weight.yaml
66
dataset_id: lmms-lab/flickr30k
@@ -9,12 +9,11 @@ lmeval:
99
model: "hf-multimodal"
1010
model_args:
1111
dtype: bfloat16
12-
add_bos_token: True
1312
convert_img_format: True
1413
task: mmmu_val_literature
14+
apply_chat_template: True
1515
num_fewshot: 0
1616
batch_size: 8
17-
# dense model achieves accuracy of 0.9 +/ 0.0557
17+
# dense model achieves accuracy of 0.8333
1818
metrics:
19-
acc,none: 0.8333
20-
acc_stderr,none: 0.0557
19+
acc,none: 0.800

tests/lmeval/test_lmeval.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ class LmEvalConfig(BaseModel):
2525
num_fewshot: int = 5
2626
limit: int = 1000
2727
batch_size: int = 100
28+
apply_chat_template: bool = False
2829
# Recovery testing (default): compare against base model performance
2930
# Default threshold is 0.95 (retain ≥95% of base), can be overridden
3031
recovery_threshold: Union[float, dict] = 0.95
@@ -160,6 +161,7 @@ def _eval_base_model(self):
160161
num_fewshot=self.lmeval.num_fewshot,
161162
limit=self.lmeval.limit,
162163
device="cuda:0",
164+
apply_chat_template=self.lmeval.apply_chat_template,
163165
batch_size=self.lmeval.batch_size,
164166
)
165167

@@ -190,6 +192,7 @@ def _run_lm_eval(self):
190192
num_fewshot=self.lmeval.num_fewshot,
191193
limit=self.lmeval.limit,
192194
device="cuda:0",
195+
apply_chat_template=self.lmeval.apply_chat_template,
193196
batch_size=self.lmeval.batch_size,
194197
)
195198

0 commit comments

Comments
 (0)