quic · quic-akuruvil · Jul 7, 2025 · Jul 8, 2025 · Jul 29, 2025 · Jul 29, 2025
@@ -124,10 +124,9 @@ def train(
 
         if train_config.use_peft and train_config.from_peft_checkpoint:
             intermediate_epoch = int(train_config.from_peft_checkpoint.split("/")[-2].split("_")[-1]) - 1
+            intermediate_step = int(train_config.from_peft_checkpoint.split("/")[-1].split("_")[-1])
             if epoch < intermediate_epoch:
                 logger.log_rank_zero(f"Skipping epoch {epoch + 1} since fine tuning has already completed for it.")
-                # to bring the count of train_step in sync with where it left off
-                total_train_steps += len(train_dataloader)
                 continue
 
         logger.log_rank_zero(f"Starting epoch {epoch + 1}/{train_config.num_epochs}")
@@ -149,20 +148,18 @@ def train(
 
         num_dummy_samples = 0
         for step, batch in enumerate(train_dataloader):
+            # total_train_steps indicates the cumulative number of training steps completed across all epochs.
+            # When resuming fine-tuning from previously saved checkpoints, total_train_steps indicates the total number of steps trained across the earlier session and the ongoing one.
+            total_train_steps = (epoch) * len(train_dataloader) + step
             # resume training from a particular checkpoint, assuming the dataset is not shuffled
             if train_config.use_peft and train_config.from_peft_checkpoint:
-                intermediate_step = int(train_config.from_peft_checkpoint.split("/")[-1].split("_")[-1])
-                intermediate_epoch = int(train_config.from_peft_checkpoint.split("/")[-2].split("_")[-1]) - 1
                 # to bring the count of train_step in sync with where it left off
                 if epoch == intermediate_epoch and step == 0:
-                    total_train_steps += intermediate_step
                     logger.log_rank_zero(
                         f"Skipping first {intermediate_step} steps for epoch {epoch + 1}, since fine tuning has already completed for it."
                     )
                 if epoch == intermediate_epoch and step < intermediate_step:
-                    total_train_steps += 1
                     continue
-            total_train_steps += 1
 
             if train_config.max_train_step > 0 and total_train_steps >= train_config.max_train_step:
                 max_steps_reached = True
@@ -235,12 +232,12 @@ def train(
             else:
                 num_samples_in_cur_update = len(train_dataloader) % train_config.gradient_accumulation_steps
 
-            loss = loss / num_samples_in_cur_update
+            normalized_loss = loss / num_samples_in_cur_update
 
             if train_config.grad_scaler:
-                scaler.scale(loss).backward()  # backward pass
+                scaler.scale(normalized_loss).backward()  # backward pass
             else:
-                loss.backward()  # backward pass
+                normalized_loss.backward()  # backward pass
 
             if is_optimizer_step:
                 if train_config.grad_scaler:

@@ -648,6 +648,17 @@ def forward(self, input_ids, position_ids, pixel_values, image_idx, past_key_val
         image_idx = (indices1.max() + 1).unsqueeze(0).unsqueeze(0)
         return outputs.logits, pixel_values, image_idx, outputs.past_key_values
 
+    def get_npi_file(self, model_name: str, **compiler_options):
+        if model_name == "google/gemma-3-4b-it":
+            compiler_options["node_precision_info"] = constants.DEFAULT_GEMMA3_4B_NODE_PRECISION_INFO
+        elif model_name == "google/gemma-3-27b-it":
+            compiler_options["node_precision_info"] = constants.DEFAULT_GEMMA3_27B_NODE_PRECISION_INFO
+        else:
+            raise ValueError(
+                f"For Model {self.pretrained_model_name_or_path} default NPI file is not supported/added. Please use one of the following: google/gemma-3-4b-it, google/gemma-3-27b-it"
+            )
+        return compiler_options
+
     def get_specializations(
         self,
         batch_size: int,

@@ -681,6 +681,9 @@ def compile(
             **compiler_options,
         )
 
+        if hasattr(self.model, "get_npi_file"):
+            compiler_options = self.model.get_npi_file(self.model.pretrained_model_name_or_path, **compiler_options)
+
         custom_io_vision = {}
         kv_cache_dtype = "mxint8" if mxint8_kv_cache else "float16"
         custom_io_vision["pixel_values"] = "float16"
@@ -1030,6 +1033,9 @@ def compile(
             **compiler_options,
         )
 
+        if hasattr(self.model, "get_npi_file"):
+            self.model.get_npi_file(self.pretrained_model_name_or_path)
+
         custom_io = {}
         kv_cache_dtype = "mxint8" if mxint8_kv_cache else "float16"
         # inputs

@@ -7,6 +7,9 @@
 
 import os
 from dataclasses import dataclass
+from pathlib import Path
+
+from QEfficient.transformers.models import gemma3 as gemma3
 
 UTILS_DIR = os.path.dirname(os.path.abspath(__file__))
 QEFF_DIR = os.path.dirname(UTILS_DIR)
@@ -100,7 +103,12 @@ def get_models_dir():
 LLAMA4_MAX_POSITION_EMBEDDINGS = 65536
 
 # Gemma3 Constant
-GEMMA3_MAX_POSITION_EMBEDDINGS = 32768
+DEFAULT_GEMMA3_4B_NODE_PRECISION_INFO = (
+    Path(__file__).resolve().parent.parent / "transformers" / "models" / "gemma3" / "fp32_nodes_gemma3_4b_mm.yaml"
+)
+DEFAULT_GEMMA3_27B_NODE_PRECISION_INFO = (
+    Path(__file__).resolve().parent.parent / "transformers" / "models" / "gemma3" / "fp32_nodes_gemma3_27b_mm.yaml"
+)
 
 
 class Constants: