From 960330f97ed67fa65f095f2fadfea6395a085326 Mon Sep 17 00:00:00 2001 From: Dipankar Sarkar Date: Wed, 30 Apr 2025 06:15:03 +0000 Subject: [PATCH 1/8] Adding model name to hash Signed-off-by: Dipankar Sarkar --- .../transformers/models/modeling_auto.py | 18 +++++++++++++++--- .../models/test_causal_lm_models.py | 8 ++++---- .../models/test_embedding_models.py | 2 +- .../models/test_speech_seq2seq_models.py | 2 +- 4 files changed, 21 insertions(+), 9 deletions(-) diff --git a/QEfficient/transformers/models/modeling_auto.py b/QEfficient/transformers/models/modeling_auto.py index f181ee5eb..6dbc3ad99 100644 --- a/QEfficient/transformers/models/modeling_auto.py +++ b/QEfficient/transformers/models/modeling_auto.py @@ -156,10 +156,11 @@ class QEFFAutoModel(QEFFTransformersBase): _pytorch_transforms = [CustomOpsTransform, AwqToMatmulNbitsTransform, GPTQToMatmulNbitsTransform] _onnx_transforms = [FP16ClipTransform, SplitTensorsTransform] - def __init__(self, model: nn.Module, **kwargs): + def __init__(self, model: nn.Module, model_name: str, **kwargs): super().__init__(model) self.model.config.use_cache = True self.num_layers = model.config.num_hidden_layers + self.model.model_name = model_name @classmethod @with_replaced_quantizers @@ -226,6 +227,7 @@ def model_hash(self) -> str: mhash = hashlib.sha256() mhash.update(to_hashable(self.model.config.to_diff_dict())) mhash.update(to_hashable(self._transform_names())) + mhash.update(to_hashable(self.model.model_name)) mhash = mhash.hexdigest()[:16] return mhash @@ -533,7 +535,6 @@ def __init__( self.config = model.config self.vision_model = QEffVisionEncoderForTextImageToTextModel(model) self.lang_model = QEffCausalLMForTextImageToTextModel(model) - self.input_shapes, self.output_names = None, None @property @@ -1299,6 +1300,8 @@ def __init__( model: nn.Module, continuous_batching: bool = False, qaic_config: Optional[dict] = None, + is_tlm: bool = False, + model_name: str = None, **kwargs, ): model_class_name = model.__class__.__name__ @@ -1326,6 +1329,12 @@ def __init__( self.continuous_batching = continuous_batching self.model, transformed = SpDTransform.apply(self.model, qaic_config, **kwargs) self.is_tlm = transformed + self.model.model_name = model_name + + if is_tlm: + # TODO: It is possible to always apply this transform and make value of indices as last indices by default in PyTorch + self.model, transformed = SpDTransform.apply(self.model) + self.is_tlm = is_tlm @property def model_name(self) -> str: @@ -1415,6 +1424,7 @@ def model_hash(self) -> str: mhash.update(to_hashable({"continuous_batching": self.continuous_batching})) mhash.update(to_hashable({"is_tlm": self.is_tlm})) mhash.update(to_hashable(self._transform_names())) + mhash.update(to_hashable(self.model.model_name)) mhash = mhash.hexdigest()[:16] return mhash @@ -1747,7 +1757,7 @@ class QEFFAutoModelForSpeechSeq2Seq(QEFFTransformersBase, MultimodalUtilityMixin _pytorch_transforms = [CustomOpsTransform, AwqToMatmulNbitsTransform, GPTQToMatmulNbitsTransform, KVCacheTransform] _onnx_transforms = [FP16ClipTransform, SplitTensorsTransform] - def __init__(self, model: nn.Module, **kwargs): + def __init__(self, model: nn.Module, model_name, **kwargs): model_class_name = model.__class__.__name__ if not (model_class_name.endswith("ForConditionalGeneration")): raise TypeError(f"Required pytorch module with ForConditionalGeneration, got {model_class_name}") @@ -1755,6 +1765,7 @@ def __init__(self, model: nn.Module, **kwargs): super().__init__(model) self.model.config.use_cache = True self.num_layers = model.config.num_hidden_layers + self.model.model_name = model_name @property def model_hash(self) -> str: @@ -1768,6 +1779,7 @@ def model_hash(self) -> str: mhash = hashlib.sha256() mhash.update(to_hashable(self.model.config.to_diff_dict())) mhash.update(to_hashable(self._transform_names())) + mhash.update(to_hashable(self.model.mode)) mhash = mhash.hexdigest()[:16] return mhash diff --git a/tests/transformers/models/test_causal_lm_models.py b/tests/transformers/models/test_causal_lm_models.py index efa2187b7..cf28332b3 100644 --- a/tests/transformers/models/test_causal_lm_models.py +++ b/tests/transformers/models/test_causal_lm_models.py @@ -32,7 +32,7 @@ "Felladrin/Minueza-32M-Base", "wtang06/mpt-125m-c4", "hakurei/gpt-j-random-tinier", - "mistralai/Mixtral-8x7B-Instruct-v0.1", + # "mistralai/Mixtral-8x7B-Instruct-v0.1", "meta-llama/Llama-3.2-1B", "unsloth/gemma-2b", "unsloth/gemma-2-2b", @@ -123,7 +123,7 @@ def check_causal_lm_pytorch_vs_kv_vs_ort_vs_ai100( pytorch_hf_tokens = api_runner.run_hf_model_on_pytorch(model_hf) is_tlm = False if num_speculative_tokens is None else True - qeff_model = QEFFAutoModelForCausalLM(model_hf, is_tlm=is_tlm) + qeff_model = QEFFAutoModelForCausalLM(model_hf, is_tlm=is_tlm, model_name=model_name) pytorch_kv_tokens = api_runner.run_kv_model_on_pytorch(qeff_model.model) @@ -183,7 +183,7 @@ def check_causal_lm_pytorch_vs_kv_vs_ort_vs_ai100( pytorch_hf_tokens = api_runner.run_hf_model_on_pytorch_CB(model_hf) pytorch_hf_tokens = np.vstack(pytorch_hf_tokens) - qeff_model = QEFFAutoModelForCausalLM(model_hf, continuous_batching=True, is_tlm=is_tlm) + qeff_model = QEFFAutoModelForCausalLM(model_hf, continuous_batching=True, is_tlm=is_tlm, model_name=model_name) onnx_model_path = qeff_model.export() if not get_available_device_id(): @@ -219,7 +219,7 @@ def test_causal_lm_export_with_deprecated_api(model_name): model_config["n_layer"] = 1 model, _ = load_causal_lm_model(model_config) tokenizer = load_hf_tokenizer(pretrained_model_name_or_path=model_name) - qeff_model = QEFFAutoModelForCausalLM(model) + qeff_model = QEFFAutoModelForCausalLM(model, model_name=model_name) new_api_onnx_model_path = qeff_model.export() _, old_api_onnx_model_path = qualcomm_efficient_converter( model_name=model_name, model_kv=qeff_model, tokenizer=tokenizer diff --git a/tests/transformers/models/test_embedding_models.py b/tests/transformers/models/test_embedding_models.py index 22f4bd580..62c872443 100644 --- a/tests/transformers/models/test_embedding_models.py +++ b/tests/transformers/models/test_embedding_models.py @@ -47,7 +47,7 @@ def check_embed_pytorch_vs_ort_vs_ai100( pt_outputs = pt_model(**inputs) pt_embeddings = pt_outputs[0][0].detach().numpy() # Pytorch transformed model - qeff_model = QEFFAutoModel(pt_model) + qeff_model = QEFFAutoModel(pt_model, model_name) qeff_pt_outputs = qeff_model.generate(inputs=inputs, runtime_ai100=False) qeff_pt_embeddings = qeff_pt_outputs[0][0].detach().numpy() mad = np.mean(np.abs(pt_embeddings - qeff_pt_embeddings)) diff --git a/tests/transformers/models/test_speech_seq2seq_models.py b/tests/transformers/models/test_speech_seq2seq_models.py index 63fd318c7..981430729 100644 --- a/tests/transformers/models/test_speech_seq2seq_models.py +++ b/tests/transformers/models/test_speech_seq2seq_models.py @@ -314,7 +314,7 @@ def check_seq2seq_pytorch_vs_kv_vs_ort_vs_ai100( pytorch_hf_tokens = run_seq2seq_pytorch_hf(model_hf, processor, data, sample_rate, ctx_len) - qeff_model = QEFFAutoModelForSpeechSeq2Seq(model_hf) + qeff_model = QEFFAutoModelForSpeechSeq2Seq(model_hf, model_name) pytorch_kv_tokens = run_seq2seq_pytorch_with_kv(qeff_model, processor, data, sample_rate, ctx_len) From 81fb24c4b6c5f399bf8863e4cf299ce22e6ab1d7 Mon Sep 17 00:00:00 2001 From: Dipankar Sarkar Date: Wed, 30 Apr 2025 06:24:24 +0000 Subject: [PATCH 2/8] Cleaning and Minor Fix 1 Signed-off-by: Dipankar Sarkar --- tests/transformers/models/test_causal_lm_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/transformers/models/test_causal_lm_models.py b/tests/transformers/models/test_causal_lm_models.py index cf28332b3..0e264406b 100644 --- a/tests/transformers/models/test_causal_lm_models.py +++ b/tests/transformers/models/test_causal_lm_models.py @@ -32,7 +32,7 @@ "Felladrin/Minueza-32M-Base", "wtang06/mpt-125m-c4", "hakurei/gpt-j-random-tinier", - # "mistralai/Mixtral-8x7B-Instruct-v0.1", + "mistralai/Mixtral-8x7B-Instruct-v0.1", "meta-llama/Llama-3.2-1B", "unsloth/gemma-2b", "unsloth/gemma-2-2b", From 1588cc619e315ac3e77dd62bbbb7477d3b6a5460 Mon Sep 17 00:00:00 2001 From: Dipankar Sarkar Date: Mon, 12 May 2025 17:55:12 +0000 Subject: [PATCH 3/8] Implementing the model card hash through from_pretrained method Signed-off-by: Dipankar Sarkar --- .../transformers/models/modeling_auto.py | 25 +++++++++---------- .../models/test_causal_lm_models.py | 8 +++--- .../models/test_embedding_models.py | 2 +- .../models/test_speech_seq2seq_models.py | 2 +- 4 files changed, 19 insertions(+), 18 deletions(-) diff --git a/QEfficient/transformers/models/modeling_auto.py b/QEfficient/transformers/models/modeling_auto.py index 6dbc3ad99..72a890c4d 100644 --- a/QEfficient/transformers/models/modeling_auto.py +++ b/QEfficient/transformers/models/modeling_auto.py @@ -85,7 +85,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: str, is_tlm: bool = Fals kwargs.update({"attn_implementation": "eager", "low_cpu_mem_usage": False}) model = cls._hf_auto_class.from_pretrained(pretrained_model_name_or_path, *args, **kwargs) - return cls(model, is_tlm=is_tlm) + return cls(model, is_tlm=is_tlm, pretrained_model_name_or_path=pretrained_model_name_or_path) @property def model_name(self) -> str: @@ -156,11 +156,11 @@ class QEFFAutoModel(QEFFTransformersBase): _pytorch_transforms = [CustomOpsTransform, AwqToMatmulNbitsTransform, GPTQToMatmulNbitsTransform] _onnx_transforms = [FP16ClipTransform, SplitTensorsTransform] - def __init__(self, model: nn.Module, model_name: str, **kwargs): + def __init__(self, model: nn.Module, **kwargs): super().__init__(model) self.model.config.use_cache = True self.num_layers = model.config.num_hidden_layers - self.model.model_name = model_name + self.pretrained_model_name_or_path = kwargs.get("pretrained_model_name_or_path", None) @classmethod @with_replaced_quantizers @@ -213,7 +213,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs): model, kv_offload=kv_offload ) - return cls(model) + return cls(model, pretrained_model_name_or_path=pretrained_model_name_or_path) @property def model_hash(self) -> str: @@ -227,7 +227,7 @@ def model_hash(self) -> str: mhash = hashlib.sha256() mhash.update(to_hashable(self.model.config.to_diff_dict())) mhash.update(to_hashable(self._transform_names())) - mhash.update(to_hashable(self.model.model_name)) + mhash.update(to_hashable(self.pretrained_model_name_or_path)) mhash = mhash.hexdigest()[:16] return mhash @@ -1301,7 +1301,6 @@ def __init__( continuous_batching: bool = False, qaic_config: Optional[dict] = None, is_tlm: bool = False, - model_name: str = None, **kwargs, ): model_class_name = model.__class__.__name__ @@ -1322,14 +1321,14 @@ def __init__( ) super().__init__(model) - + # breakpoint() # Set use_cache=True to get KV values as output during ONNX export self.model.config.use_cache = True self.num_layers = model.config.num_hidden_layers self.continuous_batching = continuous_batching self.model, transformed = SpDTransform.apply(self.model, qaic_config, **kwargs) self.is_tlm = transformed - self.model.model_name = model_name + self.pretrained_model_name_or_path = kwargs.get("pretrained_model_name_or_path", None) if is_tlm: # TODO: It is possible to always apply this transform and make value of indices as last indices by default in PyTorch @@ -1408,11 +1407,11 @@ def from_pretrained( return MISCLASSIFIED_CAUSAL_LM_TO_QEFF_AUTO_CLASS_MAP[model.__class__.__name__]( model, kv_offload=kv_offload ) - return cls( model, continuous_batching=continuous_batching, qaic_config=qaic_config, + pretrained_model_name_or_path=pretrained_model_name_or_path, **kwargs, ) @@ -1424,7 +1423,7 @@ def model_hash(self) -> str: mhash.update(to_hashable({"continuous_batching": self.continuous_batching})) mhash.update(to_hashable({"is_tlm": self.is_tlm})) mhash.update(to_hashable(self._transform_names())) - mhash.update(to_hashable(self.model.model_name)) + mhash.update(to_hashable(self.pretrained_model_name_or_path)) mhash = mhash.hexdigest()[:16] return mhash @@ -1757,7 +1756,7 @@ class QEFFAutoModelForSpeechSeq2Seq(QEFFTransformersBase, MultimodalUtilityMixin _pytorch_transforms = [CustomOpsTransform, AwqToMatmulNbitsTransform, GPTQToMatmulNbitsTransform, KVCacheTransform] _onnx_transforms = [FP16ClipTransform, SplitTensorsTransform] - def __init__(self, model: nn.Module, model_name, **kwargs): + def __init__(self, model: nn.Module, **kwargs): model_class_name = model.__class__.__name__ if not (model_class_name.endswith("ForConditionalGeneration")): raise TypeError(f"Required pytorch module with ForConditionalGeneration, got {model_class_name}") @@ -1765,7 +1764,7 @@ def __init__(self, model: nn.Module, model_name, **kwargs): super().__init__(model) self.model.config.use_cache = True self.num_layers = model.config.num_hidden_layers - self.model.model_name = model_name + self.pretrained_model_name_or_path = kwargs.get("pretrained_model_name_or_path", None) @property def model_hash(self) -> str: @@ -1779,7 +1778,7 @@ def model_hash(self) -> str: mhash = hashlib.sha256() mhash.update(to_hashable(self.model.config.to_diff_dict())) mhash.update(to_hashable(self._transform_names())) - mhash.update(to_hashable(self.model.mode)) + mhash.update(to_hashable(self.pretrained_model_name_or_path)) mhash = mhash.hexdigest()[:16] return mhash diff --git a/tests/transformers/models/test_causal_lm_models.py b/tests/transformers/models/test_causal_lm_models.py index 0e264406b..1302b45d9 100644 --- a/tests/transformers/models/test_causal_lm_models.py +++ b/tests/transformers/models/test_causal_lm_models.py @@ -123,7 +123,7 @@ def check_causal_lm_pytorch_vs_kv_vs_ort_vs_ai100( pytorch_hf_tokens = api_runner.run_hf_model_on_pytorch(model_hf) is_tlm = False if num_speculative_tokens is None else True - qeff_model = QEFFAutoModelForCausalLM(model_hf, is_tlm=is_tlm, model_name=model_name) + qeff_model = QEFFAutoModelForCausalLM(model_hf, is_tlm=is_tlm, pretrained_model_name_or_path=model_name) pytorch_kv_tokens = api_runner.run_kv_model_on_pytorch(qeff_model.model) @@ -183,7 +183,9 @@ def check_causal_lm_pytorch_vs_kv_vs_ort_vs_ai100( pytorch_hf_tokens = api_runner.run_hf_model_on_pytorch_CB(model_hf) pytorch_hf_tokens = np.vstack(pytorch_hf_tokens) - qeff_model = QEFFAutoModelForCausalLM(model_hf, continuous_batching=True, is_tlm=is_tlm, model_name=model_name) + qeff_model = QEFFAutoModelForCausalLM( + model_hf, continuous_batching=True, is_tlm=is_tlm, pretrained_model_name_or_path=model_name + ) onnx_model_path = qeff_model.export() if not get_available_device_id(): @@ -219,7 +221,7 @@ def test_causal_lm_export_with_deprecated_api(model_name): model_config["n_layer"] = 1 model, _ = load_causal_lm_model(model_config) tokenizer = load_hf_tokenizer(pretrained_model_name_or_path=model_name) - qeff_model = QEFFAutoModelForCausalLM(model, model_name=model_name) + qeff_model = QEFFAutoModelForCausalLM(model, model_name=model_name, pretrained_model_name_or_path=model_name) new_api_onnx_model_path = qeff_model.export() _, old_api_onnx_model_path = qualcomm_efficient_converter( model_name=model_name, model_kv=qeff_model, tokenizer=tokenizer diff --git a/tests/transformers/models/test_embedding_models.py b/tests/transformers/models/test_embedding_models.py index 62c872443..14d96221e 100644 --- a/tests/transformers/models/test_embedding_models.py +++ b/tests/transformers/models/test_embedding_models.py @@ -47,7 +47,7 @@ def check_embed_pytorch_vs_ort_vs_ai100( pt_outputs = pt_model(**inputs) pt_embeddings = pt_outputs[0][0].detach().numpy() # Pytorch transformed model - qeff_model = QEFFAutoModel(pt_model, model_name) + qeff_model = QEFFAutoModel(pt_model, pretrained_model_name_or_path=model_name) qeff_pt_outputs = qeff_model.generate(inputs=inputs, runtime_ai100=False) qeff_pt_embeddings = qeff_pt_outputs[0][0].detach().numpy() mad = np.mean(np.abs(pt_embeddings - qeff_pt_embeddings)) diff --git a/tests/transformers/models/test_speech_seq2seq_models.py b/tests/transformers/models/test_speech_seq2seq_models.py index 981430729..df0180a7e 100644 --- a/tests/transformers/models/test_speech_seq2seq_models.py +++ b/tests/transformers/models/test_speech_seq2seq_models.py @@ -314,7 +314,7 @@ def check_seq2seq_pytorch_vs_kv_vs_ort_vs_ai100( pytorch_hf_tokens = run_seq2seq_pytorch_hf(model_hf, processor, data, sample_rate, ctx_len) - qeff_model = QEFFAutoModelForSpeechSeq2Seq(model_hf, model_name) + qeff_model = QEFFAutoModelForSpeechSeq2Seq(model_hf, pretrained_model_name_or_path=model_name) pytorch_kv_tokens = run_seq2seq_pytorch_with_kv(qeff_model, processor, data, sample_rate, ctx_len) From 564e3ad691480153d59b98a997ea819d2d7be484 Mon Sep 17 00:00:00 2001 From: Dipankar Sarkar Date: Tue, 13 May 2025 04:45:44 +0000 Subject: [PATCH 4/8] Comment Addressed 1 and CI issue resolved Signed-off-by: Dipankar Sarkar --- QEfficient/transformers/models/modeling_auto.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/QEfficient/transformers/models/modeling_auto.py b/QEfficient/transformers/models/modeling_auto.py index 72a890c4d..f07bdc991 100644 --- a/QEfficient/transformers/models/modeling_auto.py +++ b/QEfficient/transformers/models/modeling_auto.py @@ -1321,7 +1321,6 @@ def __init__( ) super().__init__(model) - # breakpoint() # Set use_cache=True to get KV values as output during ONNX export self.model.config.use_cache = True self.num_layers = model.config.num_hidden_layers @@ -1330,11 +1329,6 @@ def __init__( self.is_tlm = transformed self.pretrained_model_name_or_path = kwargs.get("pretrained_model_name_or_path", None) - if is_tlm: - # TODO: It is possible to always apply this transform and make value of indices as last indices by default in PyTorch - self.model, transformed = SpDTransform.apply(self.model) - self.is_tlm = is_tlm - @property def model_name(self) -> str: mname = self.model.__class__.__name__ From a439bdb70486c0beb22375215e38f4fb3d762258 Mon Sep 17 00:00:00 2001 From: Dipankar Sarkar Date: Tue, 13 May 2025 04:53:53 +0000 Subject: [PATCH 5/8] Cleaning1 done Signed-off-by: Dipankar Sarkar --- QEfficient/transformers/models/modeling_auto.py | 1 - 1 file changed, 1 deletion(-) diff --git a/QEfficient/transformers/models/modeling_auto.py b/QEfficient/transformers/models/modeling_auto.py index f07bdc991..35a852a30 100644 --- a/QEfficient/transformers/models/modeling_auto.py +++ b/QEfficient/transformers/models/modeling_auto.py @@ -1300,7 +1300,6 @@ def __init__( model: nn.Module, continuous_batching: bool = False, qaic_config: Optional[dict] = None, - is_tlm: bool = False, **kwargs, ): model_class_name = model.__class__.__name__ From 30266bcdb2d0c4c62edafba836313b926725a3b9 Mon Sep 17 00:00:00 2001 From: Dipankar Sarkar Date: Wed, 14 May 2025 16:38:29 +0000 Subject: [PATCH 6/8] Hash change for vision models added Signed-off-by: Dipankar Sarkar --- QEfficient/transformers/models/modeling_auto.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/QEfficient/transformers/models/modeling_auto.py b/QEfficient/transformers/models/modeling_auto.py index 35a852a30..d1a24cec0 100644 --- a/QEfficient/transformers/models/modeling_auto.py +++ b/QEfficient/transformers/models/modeling_auto.py @@ -409,6 +409,7 @@ def __init__(self, model: nn.modules): super().__init__(model) self.model = model.get_qeff_vision_encoder() + def export(self, inputs, output_names, dynamic_axes, export_dir=None): return self._export(inputs, output_names, dynamic_axes, export_dir) @@ -443,6 +444,8 @@ def model_hash(self) -> str: mhash.update(to_hashable(self.model.model.config.to_diff_dict())) mhash.update(to_hashable(self._transform_names())) mhash.update(to_hashable({"QEffVisionEncoderForTextImageToTextModel": True})) + breakpoint() + mhash.update(to_hashable(self.model.model.pretrained_model_name_or_path)) mhash = mhash.hexdigest()[:16] return mhash @@ -506,6 +509,8 @@ def model_hash(self) -> str: mhash.update(to_hashable(self.model.config.to_diff_dict())) mhash.update(to_hashable(self._transform_names())) mhash.update(to_hashable({"QEffCausalLMForTextImageToTextModel": True})) + breakpoint() + mhash.update(to_hashable(self.model.model.pretrained_model_name_or_path)) mhash = mhash.hexdigest()[:16] return mhash @@ -533,9 +538,11 @@ def __init__( raise NotImplementedError("Continuous batching is not supported for image-text-to-text models yet.") self.model = model self.config = model.config + self.model.pretrained_model_name_or_path=kwargs.get("pretrained_model_name_or_path",None) self.vision_model = QEffVisionEncoderForTextImageToTextModel(model) self.lang_model = QEffCausalLMForTextImageToTextModel(model) self.input_shapes, self.output_names = None, None + @property def model_name(self) -> str: @@ -554,7 +561,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs): kwargs.update({"attn_implementation": "eager", "low_cpu_mem_usage": False}) model = cls._hf_auto_class.from_pretrained(pretrained_model_name_or_path, **kwargs) - return cls(model, **kwargs) + return cls(model, pretrained_model_name_or_path=pretrained_model_name_or_path,**kwargs) @property def onnx_path(self): @@ -901,7 +908,7 @@ def from_pretrained( config.vision_config.use_flash_attn = "false" model = cls._hf_auto_class.from_pretrained(pretrained_model_name_or_path, config, *args, **kwargs) - return cls(model, **kwargs) + return cls(model, pretrained_model_name_or_path=pretrained_model_name_or_path,**kwargs) def export( self, @@ -1255,7 +1262,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: str, kv_offload: Optiona kwargs.update({"attn_implementation": "eager", "low_cpu_mem_usage": False}) model = cls._hf_auto_class.from_pretrained(pretrained_model_name_or_path, **kwargs) - return cls(model, kv_offload=kv_offload, **kwargs) + return cls(model, kv_offload=kv_offload, pretrained_model_name_or_path=pretrained_model_name_or_path,**kwargs) MISCLASSIFIED_CAUSAL_LM_TO_QEFF_AUTO_CLASS_MAP = {"InternVLChatModel": QEFFAutoModelForImageTextToText} From d1e02b76cd0e5b90b4c8f21505f8fc88a72b6f3d Mon Sep 17 00:00:00 2001 From: Dipankar Sarkar Date: Wed, 14 May 2025 16:40:35 +0000 Subject: [PATCH 7/8] Cleaning 2 done Signed-off-by: Dipankar Sarkar --- QEfficient/transformers/models/modeling_auto.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/QEfficient/transformers/models/modeling_auto.py b/QEfficient/transformers/models/modeling_auto.py index d1a24cec0..c870c9632 100644 --- a/QEfficient/transformers/models/modeling_auto.py +++ b/QEfficient/transformers/models/modeling_auto.py @@ -409,7 +409,6 @@ def __init__(self, model: nn.modules): super().__init__(model) self.model = model.get_qeff_vision_encoder() - def export(self, inputs, output_names, dynamic_axes, export_dir=None): return self._export(inputs, output_names, dynamic_axes, export_dir) @@ -444,7 +443,6 @@ def model_hash(self) -> str: mhash.update(to_hashable(self.model.model.config.to_diff_dict())) mhash.update(to_hashable(self._transform_names())) mhash.update(to_hashable({"QEffVisionEncoderForTextImageToTextModel": True})) - breakpoint() mhash.update(to_hashable(self.model.model.pretrained_model_name_or_path)) mhash = mhash.hexdigest()[:16] return mhash @@ -509,7 +507,6 @@ def model_hash(self) -> str: mhash.update(to_hashable(self.model.config.to_diff_dict())) mhash.update(to_hashable(self._transform_names())) mhash.update(to_hashable({"QEffCausalLMForTextImageToTextModel": True})) - breakpoint() mhash.update(to_hashable(self.model.model.pretrained_model_name_or_path)) mhash = mhash.hexdigest()[:16] return mhash @@ -538,11 +535,10 @@ def __init__( raise NotImplementedError("Continuous batching is not supported for image-text-to-text models yet.") self.model = model self.config = model.config - self.model.pretrained_model_name_or_path=kwargs.get("pretrained_model_name_or_path",None) + self.model.pretrained_model_name_or_path = kwargs.get("pretrained_model_name_or_path", None) self.vision_model = QEffVisionEncoderForTextImageToTextModel(model) self.lang_model = QEffCausalLMForTextImageToTextModel(model) self.input_shapes, self.output_names = None, None - @property def model_name(self) -> str: @@ -561,7 +557,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs): kwargs.update({"attn_implementation": "eager", "low_cpu_mem_usage": False}) model = cls._hf_auto_class.from_pretrained(pretrained_model_name_or_path, **kwargs) - return cls(model, pretrained_model_name_or_path=pretrained_model_name_or_path,**kwargs) + return cls(model, pretrained_model_name_or_path=pretrained_model_name_or_path, **kwargs) @property def onnx_path(self): @@ -908,7 +904,7 @@ def from_pretrained( config.vision_config.use_flash_attn = "false" model = cls._hf_auto_class.from_pretrained(pretrained_model_name_or_path, config, *args, **kwargs) - return cls(model, pretrained_model_name_or_path=pretrained_model_name_or_path,**kwargs) + return cls(model, pretrained_model_name_or_path=pretrained_model_name_or_path, **kwargs) def export( self, @@ -1262,7 +1258,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: str, kv_offload: Optiona kwargs.update({"attn_implementation": "eager", "low_cpu_mem_usage": False}) model = cls._hf_auto_class.from_pretrained(pretrained_model_name_or_path, **kwargs) - return cls(model, kv_offload=kv_offload, pretrained_model_name_or_path=pretrained_model_name_or_path,**kwargs) + return cls(model, kv_offload=kv_offload, pretrained_model_name_or_path=pretrained_model_name_or_path, **kwargs) MISCLASSIFIED_CAUSAL_LM_TO_QEFF_AUTO_CLASS_MAP = {"InternVLChatModel": QEFFAutoModelForImageTextToText} From 6263312a5bccabf32ecd5c71e428002802aa5ec6 Mon Sep 17 00:00:00 2001 From: Dipankar Sarkar Date: Wed, 14 May 2025 16:52:54 +0000 Subject: [PATCH 8/8] Cleaning 3 done Signed-off-by: Dipankar Sarkar --- QEfficient/transformers/models/modeling_auto.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/QEfficient/transformers/models/modeling_auto.py b/QEfficient/transformers/models/modeling_auto.py index c870c9632..512f343bf 100644 --- a/QEfficient/transformers/models/modeling_auto.py +++ b/QEfficient/transformers/models/modeling_auto.py @@ -882,6 +882,7 @@ def __init__( self.model.config.vision_config.use_flash_attn = "false" else: self.model.config.text_config.use_cache = True + self.pretrained_model_name_or_path = kwargs.get("pretrained_model_name_or_path", None) @classmethod def from_pretrained( @@ -1143,6 +1144,7 @@ def model_hash(self) -> str: mhash.update(to_hashable(self.model.config.to_diff_dict())) mhash.update(to_hashable(self._transform_names())) mhash.update(to_hashable({"QEFFAutoModelForImageTextToText1QPC": True})) + mhash.update(to_hashable(self.pretrained_model_name_or_path)) mhash = mhash.hexdigest()[:16] return mhash