@@ -576,7 +576,7 @@ def test_int4wo_offload(self):
576576 "model.layers.18" : 0 ,
577577 "model.layers.19" : "cpu" ,
578578 "model.layers.20" : "cpu" ,
579- "model.layers.21" : "disk " ,
579+ "model.layers.21" : "cpu " ,
580580 "model.norm" : 0 ,
581581 "model.rotary_emb" : 0 ,
582582 "lm_head" : 0 ,
@@ -599,7 +599,7 @@ def test_int4wo_offload(self):
599599 EXPECTED_OUTPUTS = Expectations (
600600 {
601601 ("xpu" , 3 ): "What are we having for dinner?\n \n Jessica: (smiling)" ,
602- ("cuda" , 7 ): "What are we having for dinner?\n - 2 . What is the temperature outside" ,
602+ ("cuda" , 7 ): "What are we having for dinner?\n - 1 . What is the temperature outside" ,
603603 }
604604 )
605605 # fmt: on
@@ -712,7 +712,7 @@ def check_serialization_expected_output(self, device, expected_output, safe_seri
712712 dtype = torch .bfloat16 if isinstance (self .quant_scheme , Int4WeightOnlyConfig ) else "auto"
713713 with tempfile .TemporaryDirectory () as tmpdirname :
714714 self .quantized_model .save_pretrained (tmpdirname , safe_serialization = safe_serialization )
715- loaded_quantized_model = AutoModelForCausalLM .from_pretrained (tmpdirname , dtype = dtype , device_map = device )
715+ loaded_quantized_model = AutoModelForCausalLM .from_pretrained (tmpdirname , dtype = dtype , device_map = device , use_safetensors = safe_serialization )
716716 input_ids = self .tokenizer (self .input_text , return_tensors = "pt" ).to (device )
717717
718718 output = loaded_quantized_model .generate (** input_ids , max_new_tokens = self .max_new_tokens )
@@ -729,7 +729,7 @@ class TorchAoSafeSerializationTest(TorchAoSerializationTest):
729729 @classmethod
730730 def setUpClass (cls ):
731731 cls .tokenizer = AutoTokenizer .from_pretrained (cls .model_name )
732- cls .EXPECTED_OUTPUT = "What are we having for dinner?\n - 1. What is the temperature outside "
732+ cls .EXPECTED_OUTPUT = "What are we having for dinner?\n \n Jessica: (smiling) "
733733 # placeholder
734734 cls .quant_scheme = torchao .quantization .Float8WeightOnlyConfig ()
735735
0 commit comments