File tree Expand file tree Collapse file tree 4 files changed +18
-18
lines changed Expand file tree Collapse file tree 4 files changed +18
-18
lines changed Original file line number Diff line number Diff line change 11cadence : weekly 
2- model : Qwen/Qwen2.5 -VL-7B -Instruct 
3- model_class : Qwen2_5_VLForConditionalGeneration 
2+ model : Qwen/Qwen3 -VL-8B -Instruct 
3+ model_class : Qwen3VLForConditionalGeneration 
44scheme : FP8_DYNAMIC 
55recipe : tests/e2e/vLLM/recipes/FP8/recipe_fp8_dynamic.yaml 
66lmeval :
77  model : " hf-multimodal" 
88  model_args :
99    dtype : bfloat16 
10-     add_bos_token : True 
1110    convert_img_format : True 
1211  task : mmmu_val_literature 
12+   apply_chat_template : True 
1313  num_fewshot : 0 
1414  batch_size : 8 
15-   #  dense model achieves accuracy of 0.9 +/ 0.0557 
15+   #  dense model achieves accuracy of 0.833 
1616  metrics :
17-     acc,none : 0.8333 
18-     acc_stderr,none : 0.0557 
17+     acc,none : 0.833 
Original file line number Diff line number Diff line change 11cadence : " weekly" 
2- model : Qwen/Qwen2.5 -VL-7B -Instruct 
3- model_class : Qwen2_5_VLForConditionalGeneration 
2+ model : Qwen/Qwen3 -VL-8B -Instruct 
3+ model_class : Qwen3VLForConditionalGeneration 
44scheme : INT8_dyn_per_token 
55recipe : tests/e2e/vLLM/recipes/INT8/recipe_int8_channel_weight_dynamic_per_token.yaml 
66dataset_id : lmms-lab/flickr30k 
@@ -9,12 +9,11 @@ lmeval:
99  model : " hf-multimodal" 
1010  model_args :
1111    dtype : bfloat16 
12-     add_bos_token : True 
1312    convert_img_format : True 
1413  task : mmmu_val_literature 
14+   apply_chat_template : True 
1515  num_fewshot : 0 
1616  batch_size : 8 
17-   #  dense model achieves accuracy of 0.9 +/ 0.0557 
17+   #  dense model achieves accuracy of 0.833 
1818  metrics :
19-     acc,none : 0.833 
20-     acc_stderr,none : 0.0557 
19+     acc,none : 0.833 
Original file line number Diff line number Diff line change 11cadence : " weekly" 
2- model : Qwen/Qwen2.5 -VL-7B -Instruct 
3- model_class : Qwen2_5_VLForConditionalGeneration 
2+ model : Qwen/Qwen3 -VL-8B -Instruct 
3+ model_class : Qwen3VLForConditionalGeneration 
44scheme : W4A16_actorder_weight 
55recipe : tests/e2e/vLLM/recipes/actorder/recipe_w4a16_actorder_weight.yaml 
66dataset_id : lmms-lab/flickr30k 
@@ -9,12 +9,11 @@ lmeval:
99  model : " hf-multimodal" 
1010  model_args :
1111    dtype : bfloat16 
12-     add_bos_token : True 
1312    convert_img_format : True 
1413  task : mmmu_val_literature 
14+   apply_chat_template : True 
1515  num_fewshot : 0 
1616  batch_size : 8 
17-   #  dense model achieves accuracy of 0.9 +/ 0.0557 
17+   #  dense model achieves accuracy of 0.8333 
1818  metrics :
19-     acc,none : 0.8333 
20-     acc_stderr,none : 0.0557 
19+     acc,none : 0.800 
Original file line number Diff line number Diff line change @@ -25,6 +25,7 @@ class LmEvalConfig(BaseModel):
2525    num_fewshot : int  =  5 
2626    limit : int  =  1000 
2727    batch_size : int  =  100 
28+     apply_chat_template : bool  =  False 
2829    # Recovery testing (default): compare against base model performance 
2930    # Default threshold is 0.95 (retain ≥95% of base), can be overridden 
3031    recovery_threshold : Union [float , dict ] =  0.95 
@@ -160,6 +161,7 @@ def _eval_base_model(self):
160161            num_fewshot = self .lmeval .num_fewshot ,
161162            limit = self .lmeval .limit ,
162163            device = "cuda:0" ,
164+             apply_chat_template = self .lmeval .apply_chat_template ,
163165            batch_size = self .lmeval .batch_size ,
164166        )
165167
@@ -190,6 +192,7 @@ def _run_lm_eval(self):
190192            num_fewshot = self .lmeval .num_fewshot ,
191193            limit = self .lmeval .limit ,
192194            device = "cuda:0" ,
195+             apply_chat_template = self .lmeval .apply_chat_template ,
193196            batch_size = self .lmeval .batch_size ,
194197        )
195198
 
 
   
 
     
   
   
          
    
    
     
    
      
     
     
    You can’t perform that action at this time.
  
 
    
  
    
      
        
     
       
      
     
   
 
    
    
  
 
  
 
     
    
0 commit comments