[VLM]fix bs and grad reset (#344)

n1ck-guo · web-flow · commit 28bbf4074066 · 2024-11-27T15:24:13.000+08:00
Signed-off-by: n1ck-guo &lt;heng.guo@intel.com&gt;
diff --git a/auto_round/autoround.py b/auto_round/autoround.py
@@ -294,6 +294,7 @@ def quantize(self):
             accelerate.hooks.remove_hook_from_submodules(self.model)  ##self.model.hf_device_map has not been changed
         self.model = mv_module_from_gpu(self.model, self.low_cpu_mem_usage)
         logger.info("caching done")
+        pbar = tqdm(range(0, sum([len(i) for i in all_blocks]), self.nblocks))
         for block_names in all_blocks:
             inputs = all_inputs[block_names[0]]
             all_inputs.pop(block_names[0])
@@ -318,6 +319,7 @@ def quantize(self):
                 block_names,
                 nblocks=self.nblocks,
                 device=self.device,
+                pbar=pbar
             )
 
         self.quant_layers(layer_names, all_inputs)
@@ -1124,6 +1126,7 @@ def quant_blocks(
             block_names,
             nblocks=1,
             device=torch.device("cpu"),
+            pbar=None
     ):
         """Quantize and dequantize the weights of the specified blocks in the model.
 
@@ -1162,8 +1165,10 @@ def quant_blocks(
                     to_dtype(input_others[key][i], tmp_dtype)
         quant_block = compile_func(self.quant_block, device, self.enable_torch_compile)
 
-        pbar = tqdm(range(0, len(block_names), nblocks))
-        for i in pbar:
+        if pbar is None:
+            pbar = tqdm(range(0, len(block_names), nblocks))
+        # for i in pbar:
+        for i in range(len(block_names)):
             if nblocks == 1:
                 n = block_names[i]
                 pbar.set_description(f"Quantizing {n}")
@@ -1184,6 +1189,7 @@ def quant_blocks(
                 q_input=q_input,
                 device=device,
             )
+            pbar.update(1)
 
         self.model = mv_module_from_gpu(self.model, self.low_cpu_mem_usage)
 
diff --git a/auto_round/mllm/autoround_mllm.py b/auto_round/mllm/autoround_mllm.py
@@ -120,7 +120,7 @@ def __init__(
             low_gpu_mem_usage: bool = False,
             low_cpu_mem_usage: bool = False,
             iters: int = 200,
-            seqlen: int = 2048,
+            seqlen: int = None,
             nsamples: int = 128,
             sampler: str = "rand",
             seed: int = 42,
@@ -136,7 +136,7 @@ def __init__(
             act_dynamic: bool = True,
             to_quant_block_names: Union[str, list] = None,
             enable_norm_bias_tuning: bool = False,
-            truncation: bool = False,
+            truncation: bool = None,
             enable_torch_compile: bool = None,
             **kwargs,
     ):
@@ -152,10 +152,6 @@ def __init__(
         
         dataset = self.template.default_dataset if dataset is None else dataset
         
-        if nsamples % batch_size != 0:
-            nsamples = (nsamples // batch_size + 1) * batch_size
-            logger.warning(f"'nsamples' is not divisible by 'batch_size', will adjusted to {nsamples}")
-            
         from ..calib_dataset import CALIB_DATASETS
         from .mllm_dataset import MLLM_DATASET
         if isinstance(dataset, str):
@@ -170,17 +166,31 @@ def __init__(
 
             if dataset in MLLM_DATASET.keys():
                 truncation = False
-                batch_size = 1
                 seqlen = 512 if seqlen is None else seqlen
+                if batch_size != 1:
+                    logger.warning(
+                        f"rest batch_size({batch_size}) to 1 and "
+                        f"gradient_accumulate_steps({gradient_accumulate_steps}) "
+                        f"to {batch_size * gradient_accumulate_steps}, "
+                        f"cause batch_size={batch_size} cannot be used for {dataset}")
+                    gradient_accumulate_steps = batch_size * gradient_accumulate_steps
+                    batch_size = 1
         if quant_nontext_module and batch_size != 1:
-            logger.warning(f"batch_size({batch_size}) cannot be used for calibrating non-text modules,"
-                           "reset to 1")
+            logger.warning(
+                f"rest batch_size({batch_size}) to 1 and "
+                f"gradient_accumulate_steps({gradient_accumulate_steps}) "
+                f"to {batch_size * gradient_accumulate_steps}, "
+                f"cause batch_size={batch_size} cannot be used for calibrating non-text modules.")
             gradient_accumulate_steps = batch_size * gradient_accumulate_steps
             batch_size = 1
         seqlen = 2048 if seqlen is None else seqlen
         truncation = True if truncation is None else truncation
         self.truncation = truncation
 
+        if nsamples % batch_size != 0:
+            nsamples = (nsamples // batch_size + 1) * batch_size
+            logger.warning(f"'nsamples' is not divisible by 'batch_size', will adjusted to {nsamples}")
+
         super(AutoRoundMLLM, self).__init__(
             model=model,
             tokenizer=tokenizer,
@@ -259,7 +269,7 @@ def calib(self, nsamples, bs):
                 m = m.to(self.device)
 
         total = nsamples if not hasattr(self.dataloader, "len") else min(nsamples, len(self.dataloader))
-        with tqdm(range(1, total + 1), desc="calib") as pbar:
+        with tqdm(range(1, total + 1), desc="cache block inputs") as pbar:
             for data in self.dataloader:
                 if data is None:
                     pbar.update(1)
@@ -337,7 +347,7 @@ def calib(self, nsamples, bs):
             exit(-1)
         elif total_cnt < nsamples:
             logger.warning(
-                f"Insufficient number of samples collected may affect the quantification. "
+                f"Insufficient number of samples collected may affect the quantization. "
                 f"target samples count is {nsamples}, while valid samples count is {total_cnt}"
             )
             if total_cnt < self.batch_size:
diff --git a/auto_round/script/llm.py b/auto_round/script/llm.py
@@ -156,7 +156,7 @@ def setup_parser():
     parser.add_argument("--group_size", default=128, type=int,
                         help="group size")
 
-    parser.add_argument("--batch_size", "--train_bs", default=8, type=int,
+    parser.add_argument("--batch_size", "--train_bs", "--bs", default=8, type=int,
                         help="train batch size")
 
     parser.add_argument("--iters", "--iter", default=200, type=int,
@@ -178,7 +178,7 @@ def setup_best_parser():
     parser.add_argument("--group_size", default=128, type=int,
                         help="group size")
 
-    parser.add_argument("--batch_size", "--train_bs", default=8, type=int,
+    parser.add_argument("--batch_size", "--train_bs", "--bs", default=8, type=int,
                         help="train batch size")
 
     parser.add_argument("--iters", "--iter", default=1000, type=int,
@@ -202,7 +202,7 @@ def setup_fast_parser():
     parser.add_argument("--group_size", default=128, type=int,
                         help="group size")
 
-    parser.add_argument("--batch_size", "--train_bs", default=4, type=int,
+    parser.add_argument("--batch_size", "--train_bs", "--bs", default=4, type=int,
                         help="train batch size")
 
     parser.add_argument("--iters", default=200, type=int,
diff --git a/auto_round/script/mllm.py b/auto_round/script/mllm.py
@@ -170,7 +170,7 @@ def setup_parser():
     parser.add_argument("--group_size", default=128, type=int,
                         help="group size")
 
-    parser.add_argument("--batch_size", "--train_bs", default=8, type=int,
+    parser.add_argument("--batch_size", "--train_bs", "--bs", default=8, type=int,
                         help="train batch size")
 
     parser.add_argument("--iters", "--iter", default=200, type=int,
@@ -450,6 +450,7 @@ def setup_lmms_parser():
     )
     parser.add_argument(
         "--batch_size",
+        "--bs",
         "-b",
         type=str,
         default=1,
diff --git a/examples/language-modeling/main.py b/examples/language-modeling/main.py
@@ -20,7 +20,7 @@
     parser.add_argument("--group_size", default=128, type=int,
                         help="group size")
 
-    parser.add_argument("--batch_size", "--train_bs", default=8, type=int,
+    parser.add_argument("--batch_size", "--bs", "--train_bs", default=8, type=int,
                         help="train batch size")
 
     parser.add_argument("--eval_bs", default=None, type=int,
@@ -136,7 +136,7 @@
     args = parser.parse_args()
 
     print(
-        "Warning, examples/language-modeling/main.py is deprecated, please use auto-round cmd line instead. The file will be deleted in the V0.4.1 release ")
+        "Warning, examples/language-modeling/main.py is deprecated, please use auto-round cmd line instead. The file will be deleted in the V0.4.2 release ")
 
     if args.enable_minmax_tuning:
         print(
diff --git a/examples/multimodal-modeling/Common_model/main.py b/examples/multimodal-modeling/Common_model/main.py
@@ -277,7 +277,7 @@ def get_train_dataloader(train_dataset, model, data_collator=default_data_collat
     parser.add_argument("--group_size", default=128, type=int,
                         help="group size")
 
-    parser.add_argument("--batch_size", default=1, type=int,
+    parser.add_argument("--batch_size", "--bs", default=1, type=int,
                         help="train batch size")
 
     parser.add_argument("--eval_bs", default=4, type=int,
diff --git a/examples/multimodal-modeling/Llava/main.py b/examples/multimodal-modeling/Llava/main.py
@@ -113,7 +113,7 @@ def save_tower(model, save_path, quant_nontext_module: bool = False, max_shard_s
     parser.add_argument("--group_size", default=128, type=int,
                         help="group size")
 
-    parser.add_argument("--batch_size", default=1, type=int,
+    parser.add_argument("--batch_size", "--bs", default=1, type=int,
                         help="train batch size")
 
     parser.add_argument("--eval_bs", default=4, type=int,
diff --git a/examples/multimodal-modeling/Phi-3-vision/main.py b/examples/multimodal-modeling/Phi-3-vision/main.py
@@ -167,7 +167,7 @@ def create_data_loader(dataset, batch_size=1, data_collator=None):
     parser.add_argument("--group_size", default=128, type=int,
                         help="group size")
 
-    parser.add_argument("--batch_size", default=1, type=int,
+    parser.add_argument("--batch_size", "--bs", default=1, type=int,
                         help="train batch size")
 
     parser.add_argument("--eval_bs", default=4, type=int,