From 960330f97ed67fa65f095f2fadfea6395a085326 Mon Sep 17 00:00:00 2001
From: Dipankar Sarkar <quic_dipankar@quicinc.com>
Date: Wed, 30 Apr 2025 06:15:03 +0000
Subject: [PATCH 1/8] Adding model name to hash

Signed-off-by: Dipankar Sarkar <quic_dipankar@quicinc.com>
---
 .../transformers/models/modeling_auto.py       | 18 +++++++++++++++---
 .../models/test_causal_lm_models.py            |  8 ++++----
 .../models/test_embedding_models.py            |  2 +-
 .../models/test_speech_seq2seq_models.py       |  2 +-
 4 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/QEfficient/transformers/models/modeling_auto.py b/QEfficient/transformers/models/modeling_auto.py
index f181ee5eb..6dbc3ad99 100644
--- a/QEfficient/transformers/models/modeling_auto.py
+++ b/QEfficient/transformers/models/modeling_auto.py
@@ -156,10 +156,11 @@ class QEFFAutoModel(QEFFTransformersBase):
     _pytorch_transforms = [CustomOpsTransform, AwqToMatmulNbitsTransform, GPTQToMatmulNbitsTransform]
     _onnx_transforms = [FP16ClipTransform, SplitTensorsTransform]
 
-    def __init__(self, model: nn.Module, **kwargs):
+    def __init__(self, model: nn.Module, model_name: str, **kwargs):
         super().__init__(model)
         self.model.config.use_cache = True
         self.num_layers = model.config.num_hidden_layers
+        self.model.model_name = model_name
 
     @classmethod
     @with_replaced_quantizers
@@ -226,6 +227,7 @@ def model_hash(self) -> str:
         mhash = hashlib.sha256()
         mhash.update(to_hashable(self.model.config.to_diff_dict()))
         mhash.update(to_hashable(self._transform_names()))
+        mhash.update(to_hashable(self.model.model_name))
         mhash = mhash.hexdigest()[:16]
         return mhash
 
@@ -533,7 +535,6 @@ def __init__(
         self.config = model.config
         self.vision_model = QEffVisionEncoderForTextImageToTextModel(model)
         self.lang_model = QEffCausalLMForTextImageToTextModel(model)
-
         self.input_shapes, self.output_names = None, None
 
     @property
@@ -1299,6 +1300,8 @@ def __init__(
         model: nn.Module,
         continuous_batching: bool = False,
         qaic_config: Optional[dict] = None,
+        is_tlm: bool = False,
+        model_name: str = None,
         **kwargs,
     ):
         model_class_name = model.__class__.__name__
@@ -1326,6 +1329,12 @@ def __init__(
         self.continuous_batching = continuous_batching
         self.model, transformed = SpDTransform.apply(self.model, qaic_config, **kwargs)
         self.is_tlm = transformed
+        self.model.model_name = model_name
+
+        if is_tlm:
+            # TODO: It is possible to always apply this transform and make value of indices as last indices by default in PyTorch
+            self.model, transformed = SpDTransform.apply(self.model)
+        self.is_tlm = is_tlm
 
     @property
     def model_name(self) -> str:
@@ -1415,6 +1424,7 @@ def model_hash(self) -> str:
         mhash.update(to_hashable({"continuous_batching": self.continuous_batching}))
         mhash.update(to_hashable({"is_tlm": self.is_tlm}))
         mhash.update(to_hashable(self._transform_names()))
+        mhash.update(to_hashable(self.model.model_name))
         mhash = mhash.hexdigest()[:16]
         return mhash
 
@@ -1747,7 +1757,7 @@ class QEFFAutoModelForSpeechSeq2Seq(QEFFTransformersBase, MultimodalUtilityMixin
     _pytorch_transforms = [CustomOpsTransform, AwqToMatmulNbitsTransform, GPTQToMatmulNbitsTransform, KVCacheTransform]
     _onnx_transforms = [FP16ClipTransform, SplitTensorsTransform]
 
-    def __init__(self, model: nn.Module, **kwargs):
+    def __init__(self, model: nn.Module, model_name, **kwargs):
         model_class_name = model.__class__.__name__
         if not (model_class_name.endswith("ForConditionalGeneration")):
             raise TypeError(f"Required pytorch module with ForConditionalGeneration, got {model_class_name}")
@@ -1755,6 +1765,7 @@ def __init__(self, model: nn.Module, **kwargs):
         super().__init__(model)
         self.model.config.use_cache = True
         self.num_layers = model.config.num_hidden_layers
+        self.model.model_name = model_name
 
     @property
     def model_hash(self) -> str:
@@ -1768,6 +1779,7 @@ def model_hash(self) -> str:
         mhash = hashlib.sha256()
         mhash.update(to_hashable(self.model.config.to_diff_dict()))
         mhash.update(to_hashable(self._transform_names()))
+        mhash.update(to_hashable(self.model.mode))
         mhash = mhash.hexdigest()[:16]
         return mhash
 
diff --git a/tests/transformers/models/test_causal_lm_models.py b/tests/transformers/models/test_causal_lm_models.py
index efa2187b7..cf28332b3 100644
--- a/tests/transformers/models/test_causal_lm_models.py
+++ b/tests/transformers/models/test_causal_lm_models.py
@@ -32,7 +32,7 @@
     "Felladrin/Minueza-32M-Base",
     "wtang06/mpt-125m-c4",
     "hakurei/gpt-j-random-tinier",
-    "mistralai/Mixtral-8x7B-Instruct-v0.1",
+    # "mistralai/Mixtral-8x7B-Instruct-v0.1",
     "meta-llama/Llama-3.2-1B",
     "unsloth/gemma-2b",
     "unsloth/gemma-2-2b",
@@ -123,7 +123,7 @@ def check_causal_lm_pytorch_vs_kv_vs_ort_vs_ai100(
     pytorch_hf_tokens = api_runner.run_hf_model_on_pytorch(model_hf)
 
     is_tlm = False if num_speculative_tokens is None else True
-    qeff_model = QEFFAutoModelForCausalLM(model_hf, is_tlm=is_tlm)
+    qeff_model = QEFFAutoModelForCausalLM(model_hf, is_tlm=is_tlm, model_name=model_name)
 
     pytorch_kv_tokens = api_runner.run_kv_model_on_pytorch(qeff_model.model)
 
@@ -183,7 +183,7 @@ def check_causal_lm_pytorch_vs_kv_vs_ort_vs_ai100(
     pytorch_hf_tokens = api_runner.run_hf_model_on_pytorch_CB(model_hf)
     pytorch_hf_tokens = np.vstack(pytorch_hf_tokens)
 
-    qeff_model = QEFFAutoModelForCausalLM(model_hf, continuous_batching=True, is_tlm=is_tlm)
+    qeff_model = QEFFAutoModelForCausalLM(model_hf, continuous_batching=True, is_tlm=is_tlm, model_name=model_name)
     onnx_model_path = qeff_model.export()
 
     if not get_available_device_id():
@@ -219,7 +219,7 @@ def test_causal_lm_export_with_deprecated_api(model_name):
     model_config["n_layer"] = 1
     model, _ = load_causal_lm_model(model_config)
     tokenizer = load_hf_tokenizer(pretrained_model_name_or_path=model_name)
-    qeff_model = QEFFAutoModelForCausalLM(model)
+    qeff_model = QEFFAutoModelForCausalLM(model, model_name=model_name)
     new_api_onnx_model_path = qeff_model.export()
     _, old_api_onnx_model_path = qualcomm_efficient_converter(
         model_name=model_name, model_kv=qeff_model, tokenizer=tokenizer
diff --git a/tests/transformers/models/test_embedding_models.py b/tests/transformers/models/test_embedding_models.py
index 22f4bd580..62c872443 100644
--- a/tests/transformers/models/test_embedding_models.py
+++ b/tests/transformers/models/test_embedding_models.py
@@ -47,7 +47,7 @@ def check_embed_pytorch_vs_ort_vs_ai100(
     pt_outputs = pt_model(**inputs)
     pt_embeddings = pt_outputs[0][0].detach().numpy()
     # Pytorch transformed model
-    qeff_model = QEFFAutoModel(pt_model)
+    qeff_model = QEFFAutoModel(pt_model, model_name)
     qeff_pt_outputs = qeff_model.generate(inputs=inputs, runtime_ai100=False)
     qeff_pt_embeddings = qeff_pt_outputs[0][0].detach().numpy()
     mad = np.mean(np.abs(pt_embeddings - qeff_pt_embeddings))
diff --git a/tests/transformers/models/test_speech_seq2seq_models.py b/tests/transformers/models/test_speech_seq2seq_models.py
index 63fd318c7..981430729 100644
--- a/tests/transformers/models/test_speech_seq2seq_models.py
+++ b/tests/transformers/models/test_speech_seq2seq_models.py
@@ -314,7 +314,7 @@ def check_seq2seq_pytorch_vs_kv_vs_ort_vs_ai100(
 
     pytorch_hf_tokens = run_seq2seq_pytorch_hf(model_hf, processor, data, sample_rate, ctx_len)
 
-    qeff_model = QEFFAutoModelForSpeechSeq2Seq(model_hf)
+    qeff_model = QEFFAutoModelForSpeechSeq2Seq(model_hf, model_name)
 
     pytorch_kv_tokens = run_seq2seq_pytorch_with_kv(qeff_model, processor, data, sample_rate, ctx_len)
 

From 81fb24c4b6c5f399bf8863e4cf299ce22e6ab1d7 Mon Sep 17 00:00:00 2001
From: Dipankar Sarkar <quic_dipankar@quicinc.com>
Date: Wed, 30 Apr 2025 06:24:24 +0000
Subject: [PATCH 2/8] Cleaning and Minor Fix 1

Signed-off-by: Dipankar Sarkar <quic_dipankar@quicinc.com>
---
 tests/transformers/models/test_causal_lm_models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/transformers/models/test_causal_lm_models.py b/tests/transformers/models/test_causal_lm_models.py
index cf28332b3..0e264406b 100644
--- a/tests/transformers/models/test_causal_lm_models.py
+++ b/tests/transformers/models/test_causal_lm_models.py
@@ -32,7 +32,7 @@
     "Felladrin/Minueza-32M-Base",
     "wtang06/mpt-125m-c4",
     "hakurei/gpt-j-random-tinier",
-    # "mistralai/Mixtral-8x7B-Instruct-v0.1",
+    "mistralai/Mixtral-8x7B-Instruct-v0.1",
     "meta-llama/Llama-3.2-1B",
     "unsloth/gemma-2b",
     "unsloth/gemma-2-2b",

From 1588cc619e315ac3e77dd62bbbb7477d3b6a5460 Mon Sep 17 00:00:00 2001
From: Dipankar Sarkar <quic_dipankar@quicinc.com>
Date: Mon, 12 May 2025 17:55:12 +0000
Subject: [PATCH 3/8] Implementing the model card hash through from_pretrained
 method

Signed-off-by: Dipankar Sarkar <quic_dipankar@quicinc.com>
---
 .../transformers/models/modeling_auto.py      | 25 +++++++++----------
 .../models/test_causal_lm_models.py           |  8 +++---
 .../models/test_embedding_models.py           |  2 +-
 .../models/test_speech_seq2seq_models.py      |  2 +-
 4 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/QEfficient/transformers/models/modeling_auto.py b/QEfficient/transformers/models/modeling_auto.py
index 6dbc3ad99..72a890c4d 100644
--- a/QEfficient/transformers/models/modeling_auto.py
+++ b/QEfficient/transformers/models/modeling_auto.py
@@ -85,7 +85,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: str, is_tlm: bool = Fals
         kwargs.update({"attn_implementation": "eager", "low_cpu_mem_usage": False})
 
         model = cls._hf_auto_class.from_pretrained(pretrained_model_name_or_path, *args, **kwargs)
-        return cls(model, is_tlm=is_tlm)
+        return cls(model, is_tlm=is_tlm, pretrained_model_name_or_path=pretrained_model_name_or_path)
 
     @property
     def model_name(self) -> str:
@@ -156,11 +156,11 @@ class QEFFAutoModel(QEFFTransformersBase):
     _pytorch_transforms = [CustomOpsTransform, AwqToMatmulNbitsTransform, GPTQToMatmulNbitsTransform]
     _onnx_transforms = [FP16ClipTransform, SplitTensorsTransform]
 
-    def __init__(self, model: nn.Module, model_name: str, **kwargs):
+    def __init__(self, model: nn.Module, **kwargs):
         super().__init__(model)
         self.model.config.use_cache = True
         self.num_layers = model.config.num_hidden_layers
-        self.model.model_name = model_name
+        self.pretrained_model_name_or_path = kwargs.get("pretrained_model_name_or_path", None)
 
     @classmethod
     @with_replaced_quantizers
@@ -213,7 +213,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs):
                 model, kv_offload=kv_offload
             )
 
-        return cls(model)
+        return cls(model, pretrained_model_name_or_path=pretrained_model_name_or_path)
 
     @property
     def model_hash(self) -> str:
@@ -227,7 +227,7 @@ def model_hash(self) -> str:
         mhash = hashlib.sha256()
         mhash.update(to_hashable(self.model.config.to_diff_dict()))
         mhash.update(to_hashable(self._transform_names()))
-        mhash.update(to_hashable(self.model.model_name))
+        mhash.update(to_hashable(self.pretrained_model_name_or_path))
         mhash = mhash.hexdigest()[:16]
         return mhash
 
@@ -1301,7 +1301,6 @@ def __init__(
         continuous_batching: bool = False,
         qaic_config: Optional[dict] = None,
         is_tlm: bool = False,
-        model_name: str = None,
         **kwargs,
     ):
         model_class_name = model.__class__.__name__
@@ -1322,14 +1321,14 @@ def __init__(
             )
 
         super().__init__(model)
-
+        # breakpoint()
         # Set use_cache=True to get KV values as output during ONNX export
         self.model.config.use_cache = True
         self.num_layers = model.config.num_hidden_layers
         self.continuous_batching = continuous_batching
         self.model, transformed = SpDTransform.apply(self.model, qaic_config, **kwargs)
         self.is_tlm = transformed
-        self.model.model_name = model_name
+        self.pretrained_model_name_or_path = kwargs.get("pretrained_model_name_or_path", None)
 
         if is_tlm:
             # TODO: It is possible to always apply this transform and make value of indices as last indices by default in PyTorch
@@ -1408,11 +1407,11 @@ def from_pretrained(
             return MISCLASSIFIED_CAUSAL_LM_TO_QEFF_AUTO_CLASS_MAP[model.__class__.__name__](
                 model, kv_offload=kv_offload
             )
-
         return cls(
             model,
             continuous_batching=continuous_batching,
             qaic_config=qaic_config,
+            pretrained_model_name_or_path=pretrained_model_name_or_path,
             **kwargs,
         )
 
@@ -1424,7 +1423,7 @@ def model_hash(self) -> str:
         mhash.update(to_hashable({"continuous_batching": self.continuous_batching}))
         mhash.update(to_hashable({"is_tlm": self.is_tlm}))
         mhash.update(to_hashable(self._transform_names()))
-        mhash.update(to_hashable(self.model.model_name))
+        mhash.update(to_hashable(self.pretrained_model_name_or_path))
         mhash = mhash.hexdigest()[:16]
         return mhash
 
@@ -1757,7 +1756,7 @@ class QEFFAutoModelForSpeechSeq2Seq(QEFFTransformersBase, MultimodalUtilityMixin
     _pytorch_transforms = [CustomOpsTransform, AwqToMatmulNbitsTransform, GPTQToMatmulNbitsTransform, KVCacheTransform]
     _onnx_transforms = [FP16ClipTransform, SplitTensorsTransform]
 
-    def __init__(self, model: nn.Module, model_name, **kwargs):
+    def __init__(self, model: nn.Module, **kwargs):
         model_class_name = model.__class__.__name__
         if not (model_class_name.endswith("ForConditionalGeneration")):
             raise TypeError(f"Required pytorch module with ForConditionalGeneration, got {model_class_name}")
@@ -1765,7 +1764,7 @@ def __init__(self, model: nn.Module, model_name, **kwargs):
         super().__init__(model)
         self.model.config.use_cache = True
         self.num_layers = model.config.num_hidden_layers
-        self.model.model_name = model_name
+        self.pretrained_model_name_or_path = kwargs.get("pretrained_model_name_or_path", None)
 
     @property
     def model_hash(self) -> str:
@@ -1779,7 +1778,7 @@ def model_hash(self) -> str:
         mhash = hashlib.sha256()
         mhash.update(to_hashable(self.model.config.to_diff_dict()))
         mhash.update(to_hashable(self._transform_names()))
-        mhash.update(to_hashable(self.model.mode))
+        mhash.update(to_hashable(self.pretrained_model_name_or_path))
         mhash = mhash.hexdigest()[:16]
         return mhash
 
diff --git a/tests/transformers/models/test_causal_lm_models.py b/tests/transformers/models/test_causal_lm_models.py
index 0e264406b..1302b45d9 100644
--- a/tests/transformers/models/test_causal_lm_models.py
+++ b/tests/transformers/models/test_causal_lm_models.py
@@ -123,7 +123,7 @@ def check_causal_lm_pytorch_vs_kv_vs_ort_vs_ai100(
     pytorch_hf_tokens = api_runner.run_hf_model_on_pytorch(model_hf)
 
     is_tlm = False if num_speculative_tokens is None else True
-    qeff_model = QEFFAutoModelForCausalLM(model_hf, is_tlm=is_tlm, model_name=model_name)
+    qeff_model = QEFFAutoModelForCausalLM(model_hf, is_tlm=is_tlm, pretrained_model_name_or_path=model_name)
 
     pytorch_kv_tokens = api_runner.run_kv_model_on_pytorch(qeff_model.model)
 
@@ -183,7 +183,9 @@ def check_causal_lm_pytorch_vs_kv_vs_ort_vs_ai100(
     pytorch_hf_tokens = api_runner.run_hf_model_on_pytorch_CB(model_hf)
     pytorch_hf_tokens = np.vstack(pytorch_hf_tokens)
 
-    qeff_model = QEFFAutoModelForCausalLM(model_hf, continuous_batching=True, is_tlm=is_tlm, model_name=model_name)
+    qeff_model = QEFFAutoModelForCausalLM(
+        model_hf, continuous_batching=True, is_tlm=is_tlm, pretrained_model_name_or_path=model_name
+    )
     onnx_model_path = qeff_model.export()
 
     if not get_available_device_id():
@@ -219,7 +221,7 @@ def test_causal_lm_export_with_deprecated_api(model_name):
     model_config["n_layer"] = 1
     model, _ = load_causal_lm_model(model_config)
     tokenizer = load_hf_tokenizer(pretrained_model_name_or_path=model_name)
-    qeff_model = QEFFAutoModelForCausalLM(model, model_name=model_name)
+    qeff_model = QEFFAutoModelForCausalLM(model, model_name=model_name, pretrained_model_name_or_path=model_name)
     new_api_onnx_model_path = qeff_model.export()
     _, old_api_onnx_model_path = qualcomm_efficient_converter(
         model_name=model_name, model_kv=qeff_model, tokenizer=tokenizer
diff --git a/tests/transformers/models/test_embedding_models.py b/tests/transformers/models/test_embedding_models.py
index 62c872443..14d96221e 100644
--- a/tests/transformers/models/test_embedding_models.py
+++ b/tests/transformers/models/test_embedding_models.py
@@ -47,7 +47,7 @@ def check_embed_pytorch_vs_ort_vs_ai100(
     pt_outputs = pt_model(**inputs)
     pt_embeddings = pt_outputs[0][0].detach().numpy()
     # Pytorch transformed model
-    qeff_model = QEFFAutoModel(pt_model, model_name)
+    qeff_model = QEFFAutoModel(pt_model, pretrained_model_name_or_path=model_name)
     qeff_pt_outputs = qeff_model.generate(inputs=inputs, runtime_ai100=False)
     qeff_pt_embeddings = qeff_pt_outputs[0][0].detach().numpy()
     mad = np.mean(np.abs(pt_embeddings - qeff_pt_embeddings))
diff --git a/tests/transformers/models/test_speech_seq2seq_models.py b/tests/transformers/models/test_speech_seq2seq_models.py
index 981430729..df0180a7e 100644
--- a/tests/transformers/models/test_speech_seq2seq_models.py
+++ b/tests/transformers/models/test_speech_seq2seq_models.py
@@ -314,7 +314,7 @@ def check_seq2seq_pytorch_vs_kv_vs_ort_vs_ai100(
 
     pytorch_hf_tokens = run_seq2seq_pytorch_hf(model_hf, processor, data, sample_rate, ctx_len)
 
-    qeff_model = QEFFAutoModelForSpeechSeq2Seq(model_hf, model_name)
+    qeff_model = QEFFAutoModelForSpeechSeq2Seq(model_hf, pretrained_model_name_or_path=model_name)
 
     pytorch_kv_tokens = run_seq2seq_pytorch_with_kv(qeff_model, processor, data, sample_rate, ctx_len)
 

From 564e3ad691480153d59b98a997ea819d2d7be484 Mon Sep 17 00:00:00 2001
From: Dipankar Sarkar <quic_dipankar@quicinc.com>
Date: Tue, 13 May 2025 04:45:44 +0000
Subject: [PATCH 4/8] Comment Addressed 1 and CI issue resolved

Signed-off-by: Dipankar Sarkar <quic_dipankar@quicinc.com>
---
 QEfficient/transformers/models/modeling_auto.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/QEfficient/transformers/models/modeling_auto.py b/QEfficient/transformers/models/modeling_auto.py
index 72a890c4d..f07bdc991 100644
--- a/QEfficient/transformers/models/modeling_auto.py
+++ b/QEfficient/transformers/models/modeling_auto.py
@@ -1321,7 +1321,6 @@ def __init__(
             )
 
         super().__init__(model)
-        # breakpoint()
         # Set use_cache=True to get KV values as output during ONNX export
         self.model.config.use_cache = True
         self.num_layers = model.config.num_hidden_layers
@@ -1330,11 +1329,6 @@ def __init__(
         self.is_tlm = transformed
         self.pretrained_model_name_or_path = kwargs.get("pretrained_model_name_or_path", None)
 
-        if is_tlm:
-            # TODO: It is possible to always apply this transform and make value of indices as last indices by default in PyTorch
-            self.model, transformed = SpDTransform.apply(self.model)
-        self.is_tlm = is_tlm
-
     @property
     def model_name(self) -> str:
         mname = self.model.__class__.__name__

From a439bdb70486c0beb22375215e38f4fb3d762258 Mon Sep 17 00:00:00 2001
From: Dipankar Sarkar <quic_dipankar@quicinc.com>
Date: Tue, 13 May 2025 04:53:53 +0000
Subject: [PATCH 5/8] Cleaning1 done

Signed-off-by: Dipankar Sarkar <quic_dipankar@quicinc.com>
---
 QEfficient/transformers/models/modeling_auto.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/QEfficient/transformers/models/modeling_auto.py b/QEfficient/transformers/models/modeling_auto.py
index f07bdc991..35a852a30 100644
--- a/QEfficient/transformers/models/modeling_auto.py
+++ b/QEfficient/transformers/models/modeling_auto.py
@@ -1300,7 +1300,6 @@ def __init__(
         model: nn.Module,
         continuous_batching: bool = False,
         qaic_config: Optional[dict] = None,
-        is_tlm: bool = False,
         **kwargs,
     ):
         model_class_name = model.__class__.__name__

From 30266bcdb2d0c4c62edafba836313b926725a3b9 Mon Sep 17 00:00:00 2001
From: Dipankar Sarkar <quic_dipankar@quicinc.com>
Date: Wed, 14 May 2025 16:38:29 +0000
Subject: [PATCH 6/8] Hash change for vision models added

Signed-off-by: Dipankar Sarkar <quic_dipankar@quicinc.com>
---
 QEfficient/transformers/models/modeling_auto.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/QEfficient/transformers/models/modeling_auto.py b/QEfficient/transformers/models/modeling_auto.py
index 35a852a30..d1a24cec0 100644
--- a/QEfficient/transformers/models/modeling_auto.py
+++ b/QEfficient/transformers/models/modeling_auto.py
@@ -409,6 +409,7 @@ def __init__(self, model: nn.modules):
         super().__init__(model)
         self.model = model.get_qeff_vision_encoder()
 
+
     def export(self, inputs, output_names, dynamic_axes, export_dir=None):
         return self._export(inputs, output_names, dynamic_axes, export_dir)
 
@@ -443,6 +444,8 @@ def model_hash(self) -> str:
         mhash.update(to_hashable(self.model.model.config.to_diff_dict()))
         mhash.update(to_hashable(self._transform_names()))
         mhash.update(to_hashable({"QEffVisionEncoderForTextImageToTextModel": True}))
+        breakpoint()
+        mhash.update(to_hashable(self.model.model.pretrained_model_name_or_path))
         mhash = mhash.hexdigest()[:16]
         return mhash
 
@@ -506,6 +509,8 @@ def model_hash(self) -> str:
         mhash.update(to_hashable(self.model.config.to_diff_dict()))
         mhash.update(to_hashable(self._transform_names()))
         mhash.update(to_hashable({"QEffCausalLMForTextImageToTextModel": True}))
+        breakpoint()
+        mhash.update(to_hashable(self.model.model.pretrained_model_name_or_path))
         mhash = mhash.hexdigest()[:16]
         return mhash
 
@@ -533,9 +538,11 @@ def __init__(
             raise NotImplementedError("Continuous batching is not supported for image-text-to-text models yet.")
         self.model = model
         self.config = model.config
+        self.model.pretrained_model_name_or_path=kwargs.get("pretrained_model_name_or_path",None)
         self.vision_model = QEffVisionEncoderForTextImageToTextModel(model)
         self.lang_model = QEffCausalLMForTextImageToTextModel(model)
         self.input_shapes, self.output_names = None, None
+        
 
     @property
     def model_name(self) -> str:
@@ -554,7 +561,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs):
 
         kwargs.update({"attn_implementation": "eager", "low_cpu_mem_usage": False})
         model = cls._hf_auto_class.from_pretrained(pretrained_model_name_or_path, **kwargs)
-        return cls(model, **kwargs)
+        return cls(model, pretrained_model_name_or_path=pretrained_model_name_or_path,**kwargs)
 
     @property
     def onnx_path(self):
@@ -901,7 +908,7 @@ def from_pretrained(
         config.vision_config.use_flash_attn = "false"
         model = cls._hf_auto_class.from_pretrained(pretrained_model_name_or_path, config, *args, **kwargs)
 
-        return cls(model, **kwargs)
+        return cls(model, pretrained_model_name_or_path=pretrained_model_name_or_path,**kwargs)
 
     def export(
         self,
@@ -1255,7 +1262,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: str, kv_offload: Optiona
 
         kwargs.update({"attn_implementation": "eager", "low_cpu_mem_usage": False})
         model = cls._hf_auto_class.from_pretrained(pretrained_model_name_or_path, **kwargs)
-        return cls(model, kv_offload=kv_offload, **kwargs)
+        return cls(model, kv_offload=kv_offload, pretrained_model_name_or_path=pretrained_model_name_or_path,**kwargs)
 
 
 MISCLASSIFIED_CAUSAL_LM_TO_QEFF_AUTO_CLASS_MAP = {"InternVLChatModel": QEFFAutoModelForImageTextToText}

From d1e02b76cd0e5b90b4c8f21505f8fc88a72b6f3d Mon Sep 17 00:00:00 2001
From: Dipankar Sarkar <quic_dipankar@quicinc.com>
Date: Wed, 14 May 2025 16:40:35 +0000
Subject: [PATCH 7/8] Cleaning 2 done

Signed-off-by: Dipankar Sarkar <quic_dipankar@quicinc.com>
---
 QEfficient/transformers/models/modeling_auto.py | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/QEfficient/transformers/models/modeling_auto.py b/QEfficient/transformers/models/modeling_auto.py
index d1a24cec0..c870c9632 100644
--- a/QEfficient/transformers/models/modeling_auto.py
+++ b/QEfficient/transformers/models/modeling_auto.py
@@ -409,7 +409,6 @@ def __init__(self, model: nn.modules):
         super().__init__(model)
         self.model = model.get_qeff_vision_encoder()
 
-
     def export(self, inputs, output_names, dynamic_axes, export_dir=None):
         return self._export(inputs, output_names, dynamic_axes, export_dir)
 
@@ -444,7 +443,6 @@ def model_hash(self) -> str:
         mhash.update(to_hashable(self.model.model.config.to_diff_dict()))
         mhash.update(to_hashable(self._transform_names()))
         mhash.update(to_hashable({"QEffVisionEncoderForTextImageToTextModel": True}))
-        breakpoint()
         mhash.update(to_hashable(self.model.model.pretrained_model_name_or_path))
         mhash = mhash.hexdigest()[:16]
         return mhash
@@ -509,7 +507,6 @@ def model_hash(self) -> str:
         mhash.update(to_hashable(self.model.config.to_diff_dict()))
         mhash.update(to_hashable(self._transform_names()))
         mhash.update(to_hashable({"QEffCausalLMForTextImageToTextModel": True}))
-        breakpoint()
         mhash.update(to_hashable(self.model.model.pretrained_model_name_or_path))
         mhash = mhash.hexdigest()[:16]
         return mhash
@@ -538,11 +535,10 @@ def __init__(
             raise NotImplementedError("Continuous batching is not supported for image-text-to-text models yet.")
         self.model = model
         self.config = model.config
-        self.model.pretrained_model_name_or_path=kwargs.get("pretrained_model_name_or_path",None)
+        self.model.pretrained_model_name_or_path = kwargs.get("pretrained_model_name_or_path", None)
         self.vision_model = QEffVisionEncoderForTextImageToTextModel(model)
         self.lang_model = QEffCausalLMForTextImageToTextModel(model)
         self.input_shapes, self.output_names = None, None
-        
 
     @property
     def model_name(self) -> str:
@@ -561,7 +557,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs):
 
         kwargs.update({"attn_implementation": "eager", "low_cpu_mem_usage": False})
         model = cls._hf_auto_class.from_pretrained(pretrained_model_name_or_path, **kwargs)
-        return cls(model, pretrained_model_name_or_path=pretrained_model_name_or_path,**kwargs)
+        return cls(model, pretrained_model_name_or_path=pretrained_model_name_or_path, **kwargs)
 
     @property
     def onnx_path(self):
@@ -908,7 +904,7 @@ def from_pretrained(
         config.vision_config.use_flash_attn = "false"
         model = cls._hf_auto_class.from_pretrained(pretrained_model_name_or_path, config, *args, **kwargs)
 
-        return cls(model, pretrained_model_name_or_path=pretrained_model_name_or_path,**kwargs)
+        return cls(model, pretrained_model_name_or_path=pretrained_model_name_or_path, **kwargs)
 
     def export(
         self,
@@ -1262,7 +1258,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: str, kv_offload: Optiona
 
         kwargs.update({"attn_implementation": "eager", "low_cpu_mem_usage": False})
         model = cls._hf_auto_class.from_pretrained(pretrained_model_name_or_path, **kwargs)
-        return cls(model, kv_offload=kv_offload, pretrained_model_name_or_path=pretrained_model_name_or_path,**kwargs)
+        return cls(model, kv_offload=kv_offload, pretrained_model_name_or_path=pretrained_model_name_or_path, **kwargs)
 
 
 MISCLASSIFIED_CAUSAL_LM_TO_QEFF_AUTO_CLASS_MAP = {"InternVLChatModel": QEFFAutoModelForImageTextToText}

From 6263312a5bccabf32ecd5c71e428002802aa5ec6 Mon Sep 17 00:00:00 2001
From: Dipankar Sarkar <quic_dipankar@quicinc.com>
Date: Wed, 14 May 2025 16:52:54 +0000
Subject: [PATCH 8/8] Cleaning 3 done

Signed-off-by: Dipankar Sarkar <quic_dipankar@quicinc.com>
---
 QEfficient/transformers/models/modeling_auto.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/QEfficient/transformers/models/modeling_auto.py b/QEfficient/transformers/models/modeling_auto.py
index c870c9632..512f343bf 100644
--- a/QEfficient/transformers/models/modeling_auto.py
+++ b/QEfficient/transformers/models/modeling_auto.py
@@ -882,6 +882,7 @@ def __init__(
             self.model.config.vision_config.use_flash_attn = "false"
         else:
             self.model.config.text_config.use_cache = True
+        self.pretrained_model_name_or_path = kwargs.get("pretrained_model_name_or_path", None)
 
     @classmethod
     def from_pretrained(
@@ -1143,6 +1144,7 @@ def model_hash(self) -> str:
         mhash.update(to_hashable(self.model.config.to_diff_dict()))
         mhash.update(to_hashable(self._transform_names()))
         mhash.update(to_hashable({"QEFFAutoModelForImageTextToText1QPC": True}))
+        mhash.update(to_hashable(self.pretrained_model_name_or_path))
         mhash = mhash.hexdigest()[:16]
         return mhash