Incoperated all subfunction changes

abhishek-singh591 · abhishek-singh591 · commit 611637728350 · 2025-10-29T12:30:22.000Z
diff --git a/QEfficient/__init__.py b/QEfficient/__init__.py
@@ -27,6 +27,7 @@
 # TODO: Find a better way to do this, this is temp. fix.
 apply_torch_patches()
 
+
 def check_qaic_sdk():
     """Check if QAIC SDK is installed"""
     try:
diff --git a/QEfficient/base/modeling_qeff.py b/QEfficient/base/modeling_qeff.py
@@ -251,7 +251,7 @@ def _export(
             CustomOpTransform.register_custom_op("CtxGatherFunc", CtxGatherFunc, CtxGather)
             decoder_layer_classes = get_decoder_layer_classes_for_export(self.model)
             export_kwargs = {} if export_kwargs is None else export_kwargs
-            
+
             torch.onnx.export(
                 self.model,
                 (example_inputs,),
@@ -269,10 +269,11 @@ def _export(
 
             _ = self._offload_model_weights(offload_pt_weights)
             model = onnx.load(tmp_onnx_path, load_external_data=False)
-            model,transformed = rename_function_outputs(model)
-            
+            model, transformed = rename_function_outputs(model)
+
             transform_kwargs = {
                 "onnx_base_dir": str(tmp_onnx_dir),
+                "temp_onnx_path": tmp_onnx_path,
                 "model_name": self.model_name,
             }
             if onnx_transform_kwargs is not None:
diff --git a/QEfficient/base/onnx_transforms.py b/QEfficient/base/onnx_transforms.py
@@ -5,9 +5,12 @@
 #
 # ----------------------------------------------------------------------------
 
-from typing import Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple
 
 import numpy as np
+import onnx
+import onnxslim
+import torch
 from onnx import ModelProto, external_data_helper, numpy_helper
 
 
@@ -100,9 +103,130 @@ def apply(
                 external_data_helper.set_external_data(tensor, f"{model_name}_{file_num}.onnx.data")
         return model, transformed
 
+
+class OnnxSlimTransform(OnnxTransform):
+    """
+    Applies onnx-slim transformations on the given ONNX graph.
+    """
+
+    @classmethod
+    def apply(
+        cls,
+        model: ModelProto,
+        *,
+        onnx_base_dir: Optional[str] = None,
+        **kwargs,
+    ) -> Tuple[ModelProto, bool]:
+        """
+        :param enable_onnx_slim_transform: If True, applies onnx-slim transformations.
+        :param temp_onnx_path: Path to save the slimmed ONNX model.
+        """
+        transformed = False
+        onnx_slim_transform = True  # kwargs.get("enable_onnx_slim_transform", False)
+        temp_onnx_path = kwargs.get("temp_onnx_path", None)
+        if not temp_onnx_path:
+            err_str = "temp_onnx_path is required for onnx-slim transform."
+            raise RuntimeError(err_str)
+        if onnx_slim_transform:
+            transformed = True
+            slimmed_model = onnxslim.slim(model)
+            onnx.save(slimmed_model, temp_onnx_path)
+            return slimmed_model, transformed
+        return model, transformed
+
+
+class CustomOpTransform(OnnxTransform):
+    """
+    Transform to register custom operations and add their function protos to the ONNX model.
+    """
+
+    # Registry of custom operations
+    _custom_ops: Dict[str, Tuple[Any, Any]] = {}  # op_name -> (func_class, onnxscript_func)
+
+    @classmethod
+    def register_custom_op(cls, op_name: str, func_class: Any, onnxscript_func: Any):
+        """Register a custom operation."""
+        cls._custom_ops[op_name] = (func_class, onnxscript_func)
+
+    @classmethod
+    def apply(cls, model: ModelProto, *, opset_version: int = 17, **kwargs) -> Tuple[ModelProto, bool]:
+        """
+        Apply custom op registration and add function protos to the model.
+
+        :param model: The ONNX model to transform
+        :param opset_version: ONNX opset version for symbolic registration
+        :returns: Transformed model and success flag
+        """
+        transformed = False
+
+        # Register all custom op symbolic functions with torch.onnx
+        for op_name, (func_class, _) in cls._custom_ops.items():
+            if hasattr(func_class, "symbolic"):
+                torch.onnx.register_custom_op_symbolic(f"::{op_name}", func_class.symbolic, opset_version)
+
+        # Add function protos for custom ops that are used in the model
+        used_protos = cls._get_function_protos_for_model(model)
+
+        for proto in used_protos:
+            # Check if proto already exists to avoid duplicates
+            proto_name = proto.name
+            if not any(func.name == proto_name for func in model.functions):
+                model.functions.append(proto)
+                transformed = True
+
+        return model, transformed
+
+    @classmethod
+    def _get_function_protos_for_model(cls, model: ModelProto) -> List[Any]:
+        """Get function protos for custom ops that are actually used in the model."""
+        used_protos = []
+
+        # Get all node op_types in the model
+        used_op_types = set()
+        for node in model.graph.node:
+            used_op_types.add(node.op_type)
+
+        # Also check function calls
+        for func in model.functions:
+            for node in func.node:
+                used_op_types.add(node.op_type)
+
+        # Check which custom ops are actually used
+        for op_name, (func_class, onnxscript_func) in cls._custom_ops.items():
+            # Check if the custom op is referenced in the model
+            if cls._is_custom_op_used(model, op_name, used_op_types):
+                proto = onnxscript_func.to_function_proto()
+                used_protos.append(proto)
+
+        return used_protos
+
+    @classmethod
+    def _is_custom_op_used(cls, model: ModelProto, op_name: str, used_op_types: set) -> bool:
+        """Check if a custom op is used in the model."""
+        # Check if the op_name appears in node op_types
+        if op_name in used_op_types:
+            return True
+
+        # Check for domain-specific ops (e.g., "com.qti.aisw.onnx::CustomRMSNorm")
+        custom_op_pattern = f"com.qti.aisw.onnx::{op_name.replace('Func', '')}"
+        if custom_op_pattern in used_op_types:
+            return True
+
+        # Heuristic checks based on op type
+        if "RMSNorm" in op_name:
+            # Check if any RMSNorm-related ops are present
+            return any("RMSNorm" in op_type for op_type in used_op_types)
+
+        if "Ctx" in op_name:
+            # Check if Gather/Scatter operations are present (indicating KV cache usage)
+            return any(op_type in ["Gather", "GatherND", "Scatter", "ScatterND"] for op_type in used_op_types)
+
+        return False
+
+
 def rename_function_outputs(model):
     graph = model.graph
-    op_type_to_func_map = {func.name:func for func in model.functions}
+    op_type_to_func_map = {func.name: func for func in model.functions}
     decoder_layer_patterns = ["DecoderLayer", "Block", "Layer"]
     transformed = False
     model_graph_outputs = [val.name for val in model.graph.output]
@@ -117,11 +241,11 @@ def rename_function_outputs(model):
                     if "key" in func.output[i]:
                         new_name = f"past_key.{node_count}_RetainedState"
                     elif "value" in func.output[i]:
-                        new_name= f"past_value.{node_count}_RetainedState"
+                        new_name = f"past_value.{node_count}_RetainedState"
                     else:
                         raise NotImplementedError()
                     print(f"renaming {node.output[i]} to {new_name}")
                     node.output[i] = new_name
                     model.graph.output[model_graph_outputs.index(tmp)].name = new_name
-            node_count+=1                    
-    return model, transformed
+            node_count += 1
+    return model, transformed
diff --git a/QEfficient/transformers/models/modeling_auto.py b/QEfficient/transformers/models/modeling_auto.py
@@ -26,7 +26,12 @@
 
 import QEfficient
 from QEfficient.base.modeling_qeff import QEFFBaseModel
-from QEfficient.base.onnx_transforms import FP16ClipTransform, SplitTensorsTransform
+from QEfficient.base.onnx_transforms import (
+    CustomOpTransform,
+    FP16ClipTransform,
+    OnnxSlimTransform,
+    SplitTensorsTransform,
+)
 from QEfficient.base.pytorch_transforms import SplitGateUpWeightsTransform
 from QEfficient.generation.cloud_infer import QAICInferenceSession
 from QEfficient.generation.text_generation_inference import (
@@ -347,7 +352,7 @@ def export(self, export_dir: Optional[str] = None) -> str:
             dynamic_axes,
             export_dir=export_dir,
         )
- 
+
     def compile(
         self,
         onnx_path: Optional[str] = None,
@@ -2037,7 +2042,7 @@ class QEFFAutoModelForCausalLM(QEFFBaseModel):
         SplitGateUpWeightsTransform,
         KVCacheExternalModuleMapperTransform,
     ]
-    _onnx_transforms = [FP16ClipTransform, SplitTensorsTransform]
+    _onnx_transforms = [FP16ClipTransform, CustomOpTransform, OnnxSlimTransform, SplitTensorsTransform]
 
     def __init__(
         self,
diff --git a/QEfficient/transformers/models/pytorch_transforms.py b/QEfficient/transformers/models/pytorch_transforms.py
@@ -789,6 +789,7 @@ def apply(cls, model: nn.Module, pooling: Union[str, Callable]) -> Tuple[nn.Modu
         warnings.warn("Pooling is applied to the model.")
         return model, transformed
 
+
 def get_decoder_layer_classes_for_export(model: nn.Module) -> set:
     """
     Dynamically determine which DecoderLayer classes should be exported as functions
@@ -812,4 +813,4 @@ def get_decoder_layer_classes_for_export(model: nn.Module) -> set:
         if module.__class__ in decoder_layer_classes:
             model_decoder_classes.add(module.__class__)
 
-    return model_decoder_classes
+    return model_decoder_classes
diff --git a/QEfficient/utils/patches.py b/QEfficient/utils/patches.py
@@ -117,4 +117,4 @@ def apply_torch_patches():
 
 def is_patched():
     """Check if patches have been applied."""
-    return onnx_utils._setup_trace_module_map == _setup_trace_module_map_patched
+    return onnx_utils._setup_trace_module_map == _setup_trace_module_map_patched