diff --git a/QEfficient/transformers/models/falcon/modeling_falcon.py b/QEfficient/transformers/models/falcon/modeling_falcon.py
index 1cfdf88e1..63c80d501 100644
--- a/QEfficient/transformers/models/falcon/modeling_falcon.py
+++ b/QEfficient/transformers/models/falcon/modeling_falcon.py
@@ -8,9 +8,10 @@
 """PyTorch Falcon model."""
 
 import math
-from typing import Optional, Tuple, Union
+from typing import Optional, Tuple, Type, Union
 
 import torch
+import torch.nn as nn
 import torch.utils.checkpoint
 from torch.nn import functional as F
 from transformers.cache_utils import Cache
@@ -353,6 +354,16 @@ class QEffFalconForCausalLM(FalconForCausalLM):
     - update the hidden_states, and fix for onnx model
     """
 
+    def get_repeated_layer_class(self) -> Type[nn.Module]:
+        """
+        Return the set of class used as the repeated layer across the model for subfunction extraction.
+
+        Notes:
+            This method should return the *class object* (not an instance).
+            Downstream code can use this to find/build subfunctions for repeated blocks.
+        """
+        return {QEffFalconDecoderLayer}
+
     def forward(
         self,
         input_ids: torch.LongTensor = None,
diff --git a/QEfficient/transformers/models/gemma/modeling_gemma.py b/QEfficient/transformers/models/gemma/modeling_gemma.py
index 1edb8ef53..ee4671ca9 100644
--- a/QEfficient/transformers/models/gemma/modeling_gemma.py
+++ b/QEfficient/transformers/models/gemma/modeling_gemma.py
@@ -5,7 +5,7 @@
 #
 # -----------------------------------------------------------------------------
 
-from typing import List, Optional, Tuple, Union
+from typing import List, Optional, Tuple, Type, Union
 
 import torch
 from torch import nn
@@ -336,6 +336,16 @@ class QEffGemmaForCausalLM(GemmaForCausalLM):
     - add new args cache idx for the kv retention
     """
 
+    def get_repeated_layer_class(self) -> Type[nn.Module]:
+        """
+        Return the set of class used as the repeated layer across the model for subfunction extraction.
+
+        Notes:
+            This method should return the *class object* (not an instance).
+            Downstream code can use this to find/build subfunctions for repeated blocks.
+        """
+        return {QEffGemmaDecoderLayer}
+
     def forward(
         self,
         input_ids: torch.LongTensor = None,
diff --git a/QEfficient/transformers/models/gemma2/modeling_gemma2.py b/QEfficient/transformers/models/gemma2/modeling_gemma2.py
index 2944601c9..ce3e1cd99 100644
--- a/QEfficient/transformers/models/gemma2/modeling_gemma2.py
+++ b/QEfficient/transformers/models/gemma2/modeling_gemma2.py
@@ -5,7 +5,7 @@
 #
 # -----------------------------------------------------------------------------
 
-from typing import Callable, List, Optional, Tuple, Union
+from typing import Callable, List, Optional, Tuple, Type, Union
 
 import torch
 from torch import nn
@@ -388,6 +388,16 @@ class QEffGemma2ForCausalLM(Gemma2ForCausalLM, GenerationMixin):
     - add new args cache idx for the kv retention
     """
 
+    def get_repeated_layer_class(self) -> Type[nn.Module]:
+        """
+        Return the set of class used as the repeated layer across the model for subfunction extraction.
+
+        Notes:
+            This method should return the *class object* (not an instance).
+            Downstream code can use this to find/build subfunctions for repeated blocks.
+        """
+        return {QEffGemma2DecoderLayer}
+
     def forward(
         self,
         input_ids: torch.LongTensor = None,
diff --git a/QEfficient/transformers/models/gemma3/modeling_gemma3.py b/QEfficient/transformers/models/gemma3/modeling_gemma3.py
index a6e451bec..5747389ed 100644
--- a/QEfficient/transformers/models/gemma3/modeling_gemma3.py
+++ b/QEfficient/transformers/models/gemma3/modeling_gemma3.py
@@ -6,7 +6,7 @@
 # -----------------------------------------------------------------------------
 
 import copy
-from typing import List, Optional, Tuple, Union
+from typing import List, Optional, Tuple, Type, Union
 
 import torch
 from torch import nn
@@ -589,6 +589,16 @@ def __init__(self, model):
         self.model = model
         self.model.vision_model = self.model.vision_tower
 
+    def get_repeated_layer_class(self) -> Type[nn.Module]:
+        """
+        Return the set of class used as the repeated layer across the model for subfunction extraction.
+
+        Notes:
+            This method should return the *class object* (not an instance).
+            Downstream code can use this to find/build subfunctions for repeated blocks.
+        """
+        return {self.model.vision_tower.vision_model.encoder.layers[0].__class__}
+
     def forward(self, pixel_values):
         image_features = self.model.get_image_features(pixel_values=pixel_values)
         return image_features
@@ -602,6 +612,16 @@ def __init__(self, model):
         self.config = self.model.config
         self.lm_head = self.model.lm_head
 
+    def get_repeated_layer_class(self) -> Type[nn.Module]:
+        """
+        Return the set of class used as the repeated layer across the model for subfunction extraction.
+
+        Notes:
+            This method should return the *class object* (not an instance).
+            Downstream code can use this to find/build subfunctions for repeated blocks.
+        """
+        return {QEffGemma3DecoderLayer}
+
     def forward(
         self,
         input_ids,
diff --git a/QEfficient/transformers/models/gpt2/modeling_gpt2.py b/QEfficient/transformers/models/gpt2/modeling_gpt2.py
index 6136a2c5d..60424c851 100644
--- a/QEfficient/transformers/models/gpt2/modeling_gpt2.py
+++ b/QEfficient/transformers/models/gpt2/modeling_gpt2.py
@@ -5,7 +5,7 @@
 #
 # -----------------------------------------------------------------------------
 
-from typing import Callable, Optional, Tuple, Union
+from typing import Callable, Optional, Tuple, Type, Union
 
 import torch
 from torch import nn
@@ -397,6 +397,16 @@ class QEffGPT2LMHeadModel(GPT2LMHeadModel):
     - add new args position idx for the cache_kwargs for kv retention
     """
 
+    def get_repeated_layer_class(self) -> Type[nn.Module]:
+        """
+        Return the set of class used as the repeated layer across the model for subfunction extraction.
+
+        Notes:
+            This method should return the *class object* (not an instance).
+            Downstream code can use this to find/build subfunctions for repeated blocks.
+        """
+        return {QEffGPT2Block}
+
     def forward(
         self,
         input_ids: Optional[torch.LongTensor] = None,
diff --git a/QEfficient/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py b/QEfficient/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py
index 85ea42674..0354ec34c 100644
--- a/QEfficient/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py
+++ b/QEfficient/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py
@@ -7,7 +7,7 @@
 
 """PyTorch GPTBigCode model."""
 
-from typing import Optional, Tuple, Union
+from typing import Optional, Tuple, Type, Union
 
 import torch
 import torch.utils.checkpoint
@@ -378,6 +378,16 @@ def forward(
 
 
 class QEffGPTBigCodeForCausalLM(GPTBigCodeForCausalLM):
+    def get_repeated_layer_class(self) -> Type[nn.Module]:
+        """
+        Return the set of class used as the repeated layer across the model for subfunction extraction.
+
+        Notes:
+            This method should return the *class object* (not an instance).
+            Downstream code can use this to find/build subfunctions for repeated blocks.
+        """
+        return {QEffGPTBigCodeBlock}
+
     def forward(
         self,
         input_ids: Optional[torch.Tensor] = None,
diff --git a/QEfficient/transformers/models/gpt_oss/modeling_gpt_oss.py b/QEfficient/transformers/models/gpt_oss/modeling_gpt_oss.py
index 3efe890b8..b31e88651 100644
--- a/QEfficient/transformers/models/gpt_oss/modeling_gpt_oss.py
+++ b/QEfficient/transformers/models/gpt_oss/modeling_gpt_oss.py
@@ -1205,6 +1205,16 @@ def forward(
 
 
 class QEffGptOssForCausalLM(GptOssForCausalLM):
+    def get_repeated_layer_class(self) -> Type[nn.Module]:
+        """
+        Return the set of class used as the repeated layer across the model for subfunction extraction.
+
+        Notes:
+            This method should return the *class object* (not an instance).
+            Downstream code can use this to find/build subfunctions for repeated blocks.
+        """
+        return {QEffGptOssDecoderLayer}
+
     def forward(
         self,
         input_ids: Optional[torch.LongTensor] = None,
diff --git a/QEfficient/transformers/models/gptj/modeling_gptj.py b/QEfficient/transformers/models/gptj/modeling_gptj.py
index 1a9e45e97..3fad774ef 100644
--- a/QEfficient/transformers/models/gptj/modeling_gptj.py
+++ b/QEfficient/transformers/models/gptj/modeling_gptj.py
@@ -7,7 +7,7 @@
 
 """PyTorch GPT-J model."""
 
-from typing import Optional, Tuple, Union
+from typing import Optional, Tuple, Type, Union
 
 import torch
 from torch import nn
@@ -318,6 +318,16 @@ class QEffGPTJForCausalLM(GPTJForCausalLM):
     - update the hidden_states, and fix for onnx model
     """
 
+    def get_repeated_layer_class(self) -> Type[nn.Module]:
+        """
+        Return the set of class used as the repeated layer across the model for subfunction extraction.
+
+        Notes:
+            This method should return the *class object* (not an instance).
+            Downstream code can use this to find/build subfunctions for repeated blocks.
+        """
+        return {QEffGPTJBlock}
+
     def forward(
         self,
         input_ids: Optional[torch.LongTensor] = None,
diff --git a/QEfficient/transformers/models/granite/modeling_granite.py b/QEfficient/transformers/models/granite/modeling_granite.py
index 62be5f54d..f15b6b9af 100644
--- a/QEfficient/transformers/models/granite/modeling_granite.py
+++ b/QEfficient/transformers/models/granite/modeling_granite.py
@@ -5,7 +5,7 @@
 #
 # -----------------------------------------------------------------------------
 
-from typing import Callable, List, Optional, Tuple, Union
+from typing import Callable, List, Optional, Tuple, Type, Union
 
 import torch
 from torch import nn
@@ -347,6 +347,16 @@ class QEffGraniteForCausalLM(GraniteForCausalLM):
     Copied from GraniteForCausalLM: https://github.com/huggingface/transformers/blob/main/src/transformers/models/granite/modeling_granite.py
     """
 
+    def get_repeated_layer_class(self) -> Type[nn.Module]:
+        """
+        Return the set of class used as the repeated layer across the model for subfunction extraction.
+
+        Notes:
+            This method should return the *class object* (not an instance).
+            Downstream code can use this to find/build subfunctions for repeated blocks.
+        """
+        return {QEffGraniteDecoderLayer}
+
     def forward(
         self,
         input_ids: torch.LongTensor = None,
diff --git a/QEfficient/transformers/models/granitemoe/modeling_granitemoe.py b/QEfficient/transformers/models/granitemoe/modeling_granitemoe.py
index b158b4046..7147ab667 100644
--- a/QEfficient/transformers/models/granitemoe/modeling_granitemoe.py
+++ b/QEfficient/transformers/models/granitemoe/modeling_granitemoe.py
@@ -5,7 +5,7 @@
 #
 # -----------------------------------------------------------------------------
 
-from typing import List, Optional, Tuple, Union
+from typing import List, Optional, Tuple, Type, Union
 
 import torch
 import torch.nn.functional as F
@@ -493,6 +493,16 @@ class QEffGraniteMoeForCausalLM(GraniteMoeForCausalLM):
     Copied from GraniteForCausalLM: https://github.com/huggingface/transformers/blob/main/src/transformers/models/granite/modeling_granite.py
     """
 
+    def get_repeated_layer_class(self) -> Type[nn.Module]:
+        """
+        Return the set of class used as the repeated layer across the model for subfunction extraction.
+
+        Notes:
+            This method should return the *class object* (not an instance).
+            Downstream code can use this to find/build subfunctions for repeated blocks.
+        """
+        return {self.model.layers[0].__class__}
+
     def forward(
         self,
         input_ids: torch.LongTensor = None,
diff --git a/QEfficient/transformers/models/grok_1/modeling_grok1.py b/QEfficient/transformers/models/grok_1/modeling_grok1.py
index 2d8fc412d..a930f2383 100644
--- a/QEfficient/transformers/models/grok_1/modeling_grok1.py
+++ b/QEfficient/transformers/models/grok_1/modeling_grok1.py
@@ -5,7 +5,7 @@
 #
 # ----------------------------------------------------------------------------
 
-from typing import List, Optional, Tuple, Union
+from typing import List, Optional, Tuple, Type, Union
 
 import torch
 import torch.nn as nn
@@ -397,6 +397,16 @@ class QEffGrok1ModelForCausalLM(nn.Module):
     Grok model for causal language modeling.
     """
 
+    def get_repeated_layer_class(self) -> Type[nn.Module]:
+        """
+        Return the set of class used as the repeated layer across the model for subfunction extraction.
+
+        Notes:
+            This method should return the *class object* (not an instance).
+            Downstream code can use this to find/build subfunctions for repeated blocks.
+        """
+        return {QEffGrok1DecoderLayer}
+
     def forward(
         self,
         input_ids: torch.LongTensor = None,
diff --git a/QEfficient/transformers/models/internvl/modeling_internvl.py b/QEfficient/transformers/models/internvl/modeling_internvl.py
index b47db7eda..2f626ba14 100644
--- a/QEfficient/transformers/models/internvl/modeling_internvl.py
+++ b/QEfficient/transformers/models/internvl/modeling_internvl.py
@@ -5,7 +5,7 @@
 #
 # -----------------------------------------------------------------------------
 
-from typing import List, Optional
+from typing import List, Optional, Type
 
 import torch
 import torch.nn as nn
@@ -21,6 +21,16 @@ def __init__(self, model):
         super().__init__()
         self.model = model
 
+    def get_repeated_layer_class(self) -> Type[nn.Module]:
+        """
+        Return the set of class used as the repeated layer across the model for subfunction extraction.
+
+        Notes:
+            This method should return the *class object* (not an instance).
+            Downstream code can use this to find/build subfunctions for repeated blocks.
+        """
+        return {self.model.vision_model.encoder.layers[0].__class__}
+
     def forward(self, pixel_values):
         vision_embeds = self.model.extract_feature(pixel_values)
         # Reshape from [num_patches, 256, hidden_dim] -> [1, num_patches*256, head_dim]
@@ -36,6 +46,16 @@ def __init__(self, model):
         self.config = self.model.language_model.config
         self.language_model = self.model.language_model
 
+    def get_repeated_layer_class(self) -> Type[nn.Module]:
+        """
+        Return the set of  class used as the repeated layer across the model for subfunction extraction.
+
+        Notes:
+            This method should return the *class object* (not an instance).
+            Downstream code can use this to find/build subfunctions for repeated blocks.
+        """
+        return {self.model.language_model.model.layers[0].__class__}
+
     def forward(
         self,
         input_ids,
diff --git a/QEfficient/transformers/models/llama/modeling_llama.py b/QEfficient/transformers/models/llama/modeling_llama.py
index fb3aed556..56dbaaa16 100644
--- a/QEfficient/transformers/models/llama/modeling_llama.py
+++ b/QEfficient/transformers/models/llama/modeling_llama.py
@@ -5,7 +5,7 @@
 #
 # -----------------------------------------------------------------------------
 
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any, Dict, List, Optional, Tuple, Type, Union
 
 import torch
 from torch import nn
@@ -404,6 +404,16 @@ class QEffLlamaForCausalLM(LlamaForCausalLM):
     Copied from LlamaForCausalLM: https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py
     """
 
+    def get_repeated_layer_class(self) -> Type[nn.Module]:
+        """
+        Return the set of class used as the repeated layer across the model for subfunction extraction.
+
+        Notes:
+            This method should return the *class object* (not an instance).
+            Downstream code can use this to find/build subfunctions for repeated blocks.
+        """
+        return {QEffLlamaDecoderLayer}
+
     def forward(
         self,
         input_ids: torch.LongTensor = None,
diff --git a/QEfficient/transformers/models/llama4/modeling_llama4.py b/QEfficient/transformers/models/llama4/modeling_llama4.py
index 834ee8880..0e18e22f5 100644
--- a/QEfficient/transformers/models/llama4/modeling_llama4.py
+++ b/QEfficient/transformers/models/llama4/modeling_llama4.py
@@ -6,7 +6,7 @@
 # -----------------------------------------------------------------------------
 
 import math
-from typing import Callable, List, Optional, Tuple, Union
+from typing import Callable, List, Optional, Tuple, Type, Union
 
 import torch
 from torch import nn
@@ -822,6 +822,16 @@ def __init__(self, model):
         super().__init__()
         self.model = model
 
+    def get_repeated_layer_class(self) -> Type[nn.Module]:
+        """
+        Return the set of class used as the repeated layer across the model for subfunction extraction.
+
+        Notes:
+            This method should return the *class object* (not an instance).
+            Downstream code can use this to find/build subfunctions for repeated blocks.
+        """
+        return {self.model.vision_model.model.layers[0].__class__}
+
     def forward(self, pixel_values):
         vision_feature_layer = self.model.config.vision_config.vision_feature_layer
         vision_feature_select_strategy = self.model.config.vision_config.vision_feature_select_strategy
@@ -849,6 +859,16 @@ def __init__(self, model):
         self.language_model = self.model.language_model
         self.config = self.model.config
 
+    def get_repeated_layer_class(self) -> Type[nn.Module]:
+        """
+        Return the set of class used as the repeated layer across the model for subfunction extraction.
+
+        Notes:
+            This method should return the *class object* (not an instance).
+            Downstream code can use this to find/build subfunctions for repeated blocks.
+        """
+        return {QEffLlama4TextDecoderLayer}
+
     def forward(
         self,
         input_ids,
diff --git a/QEfficient/transformers/models/llama_swiftkv/modeling_llama_swiftkv.py b/QEfficient/transformers/models/llama_swiftkv/modeling_llama_swiftkv.py
index fa42b3f96..67879a894 100644
--- a/QEfficient/transformers/models/llama_swiftkv/modeling_llama_swiftkv.py
+++ b/QEfficient/transformers/models/llama_swiftkv/modeling_llama_swiftkv.py
@@ -11,7 +11,7 @@
 """Inference-only LLaMA model compatible with HuggingFace weights."""
 
 import math
-from typing import List, Optional, Tuple, Union
+from typing import List, Optional, Tuple, Type, Union
 
 import torch
 from torch import nn
@@ -416,6 +416,16 @@ def __init__(self, config: QEffLlamaSwiftKVConfig):
         self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
         self.config = config
 
+    def get_repeated_layer_class(self) -> Type[nn.Module]:
+        """
+        Return the set of class used as the repeated layer across the model for subfunction extraction.
+
+        Notes:
+            This method should return the *class object* (not an instance).
+            Downstream code can use this to find/build subfunctions for repeated blocks.
+        """
+        return {QEffLlamaSwiftKVDecoderLayer}
+
     def forward(
         self,
         input_ids: torch.Tensor,
diff --git a/QEfficient/transformers/models/llava/modeling_llava.py b/QEfficient/transformers/models/llava/modeling_llava.py
index abdb77ea5..08b668f31 100644
--- a/QEfficient/transformers/models/llava/modeling_llava.py
+++ b/QEfficient/transformers/models/llava/modeling_llava.py
@@ -5,7 +5,7 @@
 #
 # -----------------------------------------------------------------------------
 
-from typing import List, Optional
+from typing import List, Optional, Type
 
 import torch
 import torch.nn as nn
@@ -30,6 +30,16 @@ def __init__(self, model):
         self.model = model
         self.model.vision_model = self.model.vision_tower
 
+    def get_repeated_layer_class(self) -> Type[nn.Module]:
+        """
+        Return the set of class used as the repeated layer across the model for subfunction extraction.
+
+        Notes:
+            This method should return the *class object* (not an instance).
+            Downstream code can use this to find/build subfunctions for repeated blocks.
+        """
+        return {self.model.vision_tower.vision_model.encoder.layers[0].__class__}
+
     def forward(self, pixel_values):
         # Image features
         image_outputs = self.model.vision_tower(pixel_values, output_hidden_states=True)
@@ -54,6 +64,16 @@ def __init__(self, model):
         self.language_model = self.model.language_model
         self.lm_head = self.model.lm_head
 
+    def get_repeated_layer_class(self) -> Type[nn.Module]:
+        """
+        Return the set of class used as the repeated layer across the model for subfunction extraction.
+
+        Notes:
+            This method should return the *class object* (not an instance).
+            Downstream code can use this to find/build subfunctions for repeated blocks.
+        """
+        return {self.model.language_model.layers[0].__class__}
+
     def forward(
         self,
         input_ids,
diff --git a/QEfficient/transformers/models/llava_next/modeling_llava_next.py b/QEfficient/transformers/models/llava_next/modeling_llava_next.py
index 627f7393e..a1f43a59b 100755
--- a/QEfficient/transformers/models/llava_next/modeling_llava_next.py
+++ b/QEfficient/transformers/models/llava_next/modeling_llava_next.py
@@ -6,7 +6,7 @@
 # -----------------------------------------------------------------------------
 
 
-from typing import List, Optional
+from typing import List, Optional, Type
 
 import numpy as np
 import torch
@@ -30,6 +30,16 @@ def __init__(self, model):
         self.model = model
         self.model.vision_model = self.model.vision_tower
 
+    def get_repeated_layer_class(self) -> Type[nn.Module]:
+        """
+        Return the set of class used as the repeated layer across the model for subfunction extraction.
+
+        Notes:
+            This method should return the *class object* (not an instance).
+            Downstream code can use this to find/build subfunctions for repeated blocks.
+        """
+        return {self.model.vision_tower.vision_model.encoder.layers[0].__class__}
+
     def forward(self, pixel_values, image_sizes):
         if pixel_values.dim() == constants.GRANITEVISION_PIXEL_VALUE_DIM:
             pixel_values_new = pixel_values.squeeze(0)
@@ -128,6 +138,16 @@ def __init__(self, model):
         self.language_model = self.model.language_model
         self.lm_head = self.model.lm_head
 
+    def get_repeated_layer_class(self) -> Type[nn.Module]:
+        """
+        Return the set of class used as the repeated layer across the model for subfunction extraction.
+
+        Notes:
+            This method should return the *class object* (not an instance).
+            Downstream code can use this to find/build subfunctions for repeated blocks.
+        """
+        return {self.model.language_model.layers[0].__class__}
+
     def forward(
         self,
         input_ids,
diff --git a/QEfficient/transformers/models/mistral/modeling_mistral.py b/QEfficient/transformers/models/mistral/modeling_mistral.py
index 5edfb8f3a..6d96b87d9 100644
--- a/QEfficient/transformers/models/mistral/modeling_mistral.py
+++ b/QEfficient/transformers/models/mistral/modeling_mistral.py
@@ -7,7 +7,7 @@
 
 """PyTorch Mistral model."""
 
-from typing import Callable, List, Optional, Tuple, Union
+from typing import Callable, List, Optional, Tuple, Type, Union
 
 import torch
 import torch.utils.checkpoint
@@ -356,6 +356,16 @@ class QEffMistralForCausalLM(MistralForCausalLM):
     - add new args cache idx for the kv retention
     """
 
+    def get_repeated_layer_class(self) -> Type[nn.Module]:
+        """
+        Return the set of class used as the repeated layer across the model for subfunction extraction.
+
+        Notes:
+            This method should return the *class object* (not an instance).
+            Downstream code can use this to find/build subfunctions for repeated blocks.
+        """
+        return {QEffMistralDecoderLayer}
+
     def forward(
         self,
         input_ids: torch.LongTensor = None,
diff --git a/QEfficient/transformers/models/mistral3/modeling_mistral3.py b/QEfficient/transformers/models/mistral3/modeling_mistral3.py
index d2149b6bd..92593ce27 100644
--- a/QEfficient/transformers/models/mistral3/modeling_mistral3.py
+++ b/QEfficient/transformers/models/mistral3/modeling_mistral3.py
@@ -5,7 +5,7 @@
 #
 # -----------------------------------------------------------------------------
 
-from typing import List, Optional, Tuple, Union
+from typing import List, Optional, Tuple, Type, Union
 
 import torch
 import torch.nn as nn
@@ -151,6 +151,16 @@ def __init__(self, model):
         self.model = model
         self.model.vision_model = self.model.vision_tower
 
+    def get_repeated_layer_class(self) -> Type[nn.Module]:
+        """
+        Return the set of class used as the repeated layer across the model for subfunction extraction.
+
+        Notes:
+            This method should return the *class object* (not an instance).
+            Downstream code can use this to find/build subfunctions for repeated blocks.
+        """
+        return {self.model.vision_tower.transformer.layers[0].__class__}
+
     def forward(self, pixel_values):
         image_sizes = torch.tensor([[pixel_values.shape[2], pixel_values.shape[3]]]).repeat(pixel_values.shape[0], 1)
         image_features = self.model.get_image_features(
@@ -168,6 +178,16 @@ def __init__(self, model):
         self.config = self.model.config
         self.language_model = self.model.language_model
 
+    def get_repeated_layer_class(self) -> Type[nn.Module]:
+        """
+        Return the class used as the repeated layer across the model for subfunction extraction.
+
+        Notes:
+            This method should return the *class object* (not an instance).
+            Downstream code can use this to find/build subfunctions for repeated blocks.
+        """
+        return self.model.language_model.layers[0].__class__
+
     def forward(
         self,
         input_ids,
diff --git a/QEfficient/transformers/models/mixtral_moe/modeling_mixtral.py b/QEfficient/transformers/models/mixtral_moe/modeling_mixtral.py
index 862714fea..d89ffa8a6 100644
--- a/QEfficient/transformers/models/mixtral_moe/modeling_mixtral.py
+++ b/QEfficient/transformers/models/mixtral_moe/modeling_mixtral.py
@@ -7,7 +7,7 @@
 
 """PyTorch Mixtral model."""
 
-from typing import List, Optional, Tuple, Union
+from typing import List, Optional, Tuple, Type, Union
 
 import torch
 import torch.nn.functional as F
@@ -414,6 +414,16 @@ class QEffMixtralForCausalLM(MixtralForCausalLM):
     - update the hidden_states, and fix for onnx model
     """
 
+    def get_repeated_layer_class(self) -> Type[nn.Module]:
+        """
+        Return the set of class used as the repeated layer across the model for subfunction extraction.
+
+        Notes:
+            This method should return the *class object* (not an instance).
+            Downstream code can use this to find/build subfunctions for repeated blocks.
+        """
+        return {QeffMixtralDecoderLayer}
+
     def forward(
         self,
         input_ids: torch.LongTensor = None,
diff --git a/QEfficient/transformers/models/molmo/modeling_molmo.py b/QEfficient/transformers/models/molmo/modeling_molmo.py
index b686e6aed..d9db80800 100644
--- a/QEfficient/transformers/models/molmo/modeling_molmo.py
+++ b/QEfficient/transformers/models/molmo/modeling_molmo.py
@@ -6,7 +6,7 @@
 # -----------------------------------------------------------------------------
 
 import math
-from typing import Callable, List, Optional, Tuple, Union
+from typing import Callable, List, Optional, Tuple, Type, Union
 
 import torch
 import torch.nn as nn
@@ -568,6 +568,16 @@ def __init__(self, model):
         super().__init__()
         self.model = model
 
+    def get_repeated_layer_class(self) -> Type[nn.Module]:
+        """
+        Return the set of class used as the repeated layer across the model for subfunction extraction.
+
+        Notes:
+            This method should return the *class object* (not an instance).
+            Downstream code can use this to find/build subfunctions for repeated blocks.
+        """
+        return {self.model.model.transformer.blocks[0].__class__}
+
     def forward(self, pixel_values, image_masks, image_input_idx, valid_idx):
         image_features, _ = self.model.model.vision_backbone(pixel_values, image_masks)
         num_image, num_patch = image_features.shape[1:3]
@@ -588,6 +598,16 @@ def __init__(self, model):
         # self.language_model = self.model.language_model
         self.config = self.model.config
 
+    def get_repeated_layer_class(self) -> Type[nn.Module]:
+        """
+        Return the set of class used as the repeated layer across the model for subfunction extraction.
+
+        Notes:
+            This method should return the *class object* (not an instance).
+            Downstream code can use this to find/build subfunctions for repeated blocks.
+        """
+        return {self.model.model.vision_backbone.image_vit.transformer.resblocks[0].__class__}
+
     def forward(
         self,
         input_ids,
diff --git a/QEfficient/transformers/models/mpt/modeling_mpt.py b/QEfficient/transformers/models/mpt/modeling_mpt.py
index c1d98c1f8..4013a27f2 100644
--- a/QEfficient/transformers/models/mpt/modeling_mpt.py
+++ b/QEfficient/transformers/models/mpt/modeling_mpt.py
@@ -7,7 +7,7 @@
 
 """PyTorch MPT model."""
 
-from typing import Optional, Tuple, Union
+from typing import Optional, Tuple, Type, Union
 
 import torch
 import torch.utils.checkpoint
@@ -254,6 +254,16 @@ class QEffMptForCausalLM(MptForCausalLM):
     - add new args cache idx for the kv retention
     """
 
+    def get_repeated_layer_class(self) -> Type[nn.Module]:
+        """
+        Return the set of class used as the repeated layer across the model for subfunction extraction.
+
+        Notes:
+            This method should return the *class object* (not an instance).
+            Downstream code can use this to find/build subfunctions for repeated blocks.
+        """
+        return {QEffMptBlock}
+
     def forward(
         self,
         input_ids: Optional[torch.LongTensor] = None,
diff --git a/QEfficient/transformers/models/olmo2/modeling_olmo2.py b/QEfficient/transformers/models/olmo2/modeling_olmo2.py
index 00755cae5..d5b72219c 100644
--- a/QEfficient/transformers/models/olmo2/modeling_olmo2.py
+++ b/QEfficient/transformers/models/olmo2/modeling_olmo2.py
@@ -5,7 +5,7 @@
 #
 # -----------------------------------------------------------------------------
 
-from typing import Callable, List, Optional, Tuple, Union
+from typing import Callable, List, Optional, Tuple, Type, Union
 
 import torch
 from torch import nn
@@ -324,6 +324,16 @@ class QEffOlmo2ForCausalLM(Olmo2ForCausalLM):
     - add new args cache idx for the kv retention
     """
 
+    def get_repeated_layer_class(self) -> Type[nn.Module]:
+        """
+        Return the set of class used as the repeated layer across the model for subfunction extraction.
+
+        Notes:
+            This method should return the *class object* (not an instance).
+            Downstream code can use this to find/build subfunctions for repeated blocks.
+        """
+        return {QEffOlmo2DecoderLayer}
+
     def forward(
         self,
         input_ids: torch.LongTensor = None,
diff --git a/QEfficient/transformers/models/phi/modeling_phi.py b/QEfficient/transformers/models/phi/modeling_phi.py
index 4bf2e8785..7b1382c25 100644
--- a/QEfficient/transformers/models/phi/modeling_phi.py
+++ b/QEfficient/transformers/models/phi/modeling_phi.py
@@ -7,7 +7,7 @@
 
 """PyTorch Phi model."""
 
-from typing import Callable, List, Optional, Tuple, Union
+from typing import Callable, List, Optional, Tuple, Type, Union
 
 import torch
 from torch import nn
@@ -323,6 +323,16 @@ class QEffPhiForCausalLM(PhiForCausalLM):
     - update the hidden_states, and fix for onnx model
     """
 
+    def get_repeated_layer_class(self) -> Type[nn.Module]:
+        """
+        Return the set of class used as the repeated layer across the model for subfunction extraction.
+
+        Notes:
+            This method should return the *class object* (not an instance).
+            Downstream code can use this to find/build subfunctions for repeated blocks.
+        """
+        return {QEffPhiDecoderLayer}
+
     def forward(
         self,
         input_ids: torch.LongTensor = None,
diff --git a/QEfficient/transformers/models/phi3/modeling_phi3.py b/QEfficient/transformers/models/phi3/modeling_phi3.py
index b97a0ab8d..eed2a27bd 100644
--- a/QEfficient/transformers/models/phi3/modeling_phi3.py
+++ b/QEfficient/transformers/models/phi3/modeling_phi3.py
@@ -7,7 +7,7 @@
 
 """PyTorch Phi-3 model."""
 
-from typing import Callable, Optional, Tuple, Union
+from typing import Callable, Optional, Tuple, Type, Union
 
 import torch
 import torch.utils.checkpoint
@@ -351,6 +351,16 @@ class QEffPhi3ForCausalLM(Phi3ForCausalLM):
     - update the hidden_states, and fix for onnx model
     """
 
+    def get_repeated_layer_class(self) -> Type[nn.Module]:
+        """
+        Return the set of class used as the repeated layer across the model for subfunction extraction.
+
+        Notes:
+            This method should return the *class object* (not an instance).
+            Downstream code can use this to find/build subfunctions for repeated blocks.
+        """
+        return {QEffPhi3DecoderLayer}
+
     def forward(
         self,
         input_ids: torch.LongTensor = None,
diff --git a/QEfficient/transformers/models/pytorch_transforms.py b/QEfficient/transformers/models/pytorch_transforms.py
index b978b6193..abb364d0a 100644
--- a/QEfficient/transformers/models/pytorch_transforms.py
+++ b/QEfficient/transformers/models/pytorch_transforms.py
@@ -893,32 +893,6 @@ def apply(cls, model: nn.Module, pooling: Union[str, Callable]) -> Tuple[nn.Modu
         return model, transformed
 
 
-def get_decoder_layer_classes_for_export(model: nn.Module) -> set:
-    """
-    Dynamically determine which DecoderLayer classes should be exported as functions
-    based on the model's architecture using the existing KVCacheTransform mapping.
-    """
-    # Define patterns that identify decoder layer classes
-    DECODER_LAYER_PATTERNS = ["DecoderLayer", "Block", "Layer"]
-
-    # Get all QEff classes that are decoder layers from the existing mapping
-    decoder_layer_classes = set()
-
-    for original_class, qeff_class in KVCacheTransform._module_mapping.items():
-        # Check if the QEff class name contains decoder layer patterns
-        qeff_class_name = qeff_class.__name__
-        if any(pattern in qeff_class_name for pattern in DECODER_LAYER_PATTERNS):
-            decoder_layer_classes.add(qeff_class)
-
-    # Filter to only include classes that are actually used in the current model
-    model_decoder_classes = set()
-    for module in model.modules():
-        if module.__class__ in decoder_layer_classes:
-            model_decoder_classes.add(module.__class__)
-
-    return model_decoder_classes
-
-
 class BlockedKVAttentionTransform:
     _module_mapping = {
         QEffLlamaAttention,
diff --git a/QEfficient/transformers/models/qwen2/modeling_qwen2.py b/QEfficient/transformers/models/qwen2/modeling_qwen2.py
index 7c093a4b0..c9b22d261 100644
--- a/QEfficient/transformers/models/qwen2/modeling_qwen2.py
+++ b/QEfficient/transformers/models/qwen2/modeling_qwen2.py
@@ -7,7 +7,7 @@
 
 """PyTorch Qwen2 model."""
 
-from typing import List, Optional, Tuple, Union
+from typing import List, Optional, Tuple, Type, Union
 
 import torch
 import torch.utils.checkpoint
@@ -350,6 +350,16 @@ class QEffQwen2ForCausalLM(Qwen2ForCausalLM):
     - update the hidden_states, and fix for onnx model
     """
 
+    def get_repeated_layer_class(self) -> Type[nn.Module]:
+        """
+        Return the set of class used as the repeated layer across the model for subfunction extraction.
+
+        Notes:
+            This method should return the *class object* (not an instance).
+            Downstream code can use this to find/build subfunctions for repeated blocks.
+        """
+        return {QEffQwen2DecoderLayer}
+
     def forward(
         self,
         input_ids: torch.LongTensor = None,
diff --git a/QEfficient/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py b/QEfficient/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py
index 21d2e026e..056f2981b 100644
--- a/QEfficient/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py
+++ b/QEfficient/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py
@@ -7,7 +7,7 @@
 
 import math
 import os
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union
 
 import torch
 import torch.nn as nn
@@ -73,14 +73,10 @@ def qeff_apply_rotary_pos_emb(q, k, cos, sin, position_ids, mrope_section, unsqu
     Returns:
         `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
     """
-
-    mrope_section = mrope_section * 2
     cos = cos[position_ids]
     sin = sin[position_ids]
-
-    cos = torch.cat([m[i % 3] for i, m in enumerate(cos.split(mrope_section, dim=-1))], dim=-1).unsqueeze(unsqueeze_dim)
-    sin = torch.cat([m[i % 3] for i, m in enumerate(sin.split(mrope_section, dim=-1))], dim=-1).unsqueeze(unsqueeze_dim)
-
+    cos = torch.cat([cos[0, ..., 0:32], cos[0, ..., 32:80], cos[0, ..., 80:128]], dim=-1).unsqueeze(0)
+    sin = torch.cat([sin[0, ..., 0:32], sin[0, ..., 32:80], sin[0, ..., 80:128]], dim=-1).unsqueeze(0)
     q_embed = (q * cos) + (rotate_half(q) * sin)
     k_embed = (k * cos) + (rotate_half(k) * sin)
 
@@ -872,6 +868,16 @@ def __init__(self, model):
         self.model = model
         self.model.vision_model = self.model.visual
 
+    def get_repeated_layer_class(self) -> Type[nn.Module]:
+        """
+        Return the set of class used as the repeated layer across the model for subfunction extraction.
+
+        Notes:
+            This method should return the *class object* (not an instance).
+            Downstream code can use this to find/build subfunctions for repeated blocks.
+        """
+        return {self.model.visual.blocks[0].__class__}
+
     def forward(self, pixel_values, image_grid_thw):
         image_embeds = self.model.visual(pixel_values, grid_thw=image_grid_thw)
         bs = image_grid_thw.shape[0]
@@ -887,6 +893,16 @@ def __init__(self, model):
         self.model = model
         self.language_model = self.model.model.language_model
 
+    def get_repeated_layer_class(self) -> Type[nn.Module]:
+        """
+        Return the set of class used as the repeated layer across the model for subfunction extraction.
+
+        Notes:
+            This method should return the *class object* (not an instance).
+            Downstream code can use this to find/build subfunctions for repeated blocks.
+        """
+        return {QEffQwen2_5_VLDecoderLayer}
+
     def forward(
         self,
         input_ids,
diff --git a/QEfficient/transformers/models/qwen3/modeling_qwen3.py b/QEfficient/transformers/models/qwen3/modeling_qwen3.py
index 540bad4c7..919f49166 100644
--- a/QEfficient/transformers/models/qwen3/modeling_qwen3.py
+++ b/QEfficient/transformers/models/qwen3/modeling_qwen3.py
@@ -7,7 +7,7 @@
 
 """PyTorch Qwen3 model."""
 
-from typing import List, Optional, Tuple, Union
+from typing import List, Optional, Tuple, Type, Union
 
 import torch
 import torch.utils.checkpoint
@@ -351,6 +351,16 @@ class QEffQwen3ForCausalLM(Qwen3ForCausalLM):
     - update the hidden_states, and fix for onnx model
     """
 
+    def get_repeated_layer_class(self) -> Type[nn.Module]:
+        """
+        Return the set of class used as the repeated layer across the model for subfunction extraction.
+
+        Notes:
+            This method should return the *class object* (not an instance).
+            Downstream code can use this to find/build subfunctions for repeated blocks.
+        """
+        return {QEffQwen3DecoderLayer}
+
     def forward(
         self,
         input_ids: torch.LongTensor = None,
diff --git a/QEfficient/transformers/models/qwen3_moe/modeling_qwen3_moe.py b/QEfficient/transformers/models/qwen3_moe/modeling_qwen3_moe.py
index cbd80d8ca..8140056c5 100644
--- a/QEfficient/transformers/models/qwen3_moe/modeling_qwen3_moe.py
+++ b/QEfficient/transformers/models/qwen3_moe/modeling_qwen3_moe.py
@@ -5,7 +5,7 @@
 #
 # -----------------------------------------------------------------------------
 
-from typing import List, Optional, Tuple
+from typing import List, Optional, Tuple, Type
 
 import torch
 import torch.nn.functional as F
@@ -371,6 +371,16 @@ def forward(
 
 
 class QEffQwen3MoeForCausalLM(Qwen3MoeForCausalLM):
+    def get_repeated_layer_class(self) -> Type[nn.Module]:
+        """
+        Return the set of class used as the repeated layer across the model for subfunction extraction.
+
+        Notes:
+            This method should return the *class object* (not an instance).
+            Downstream code can use this to find/build subfunctions for repeated blocks.
+        """
+        return {QEffQwen3MoeDecoderLayer}
+
     def forward(
         self,
         input_ids: Optional[torch.LongTensor] = None,
diff --git a/QEfficient/transformers/models/starcoder2/modeling_starcoder2.py b/QEfficient/transformers/models/starcoder2/modeling_starcoder2.py
index c86e7478b..310c31254 100644
--- a/QEfficient/transformers/models/starcoder2/modeling_starcoder2.py
+++ b/QEfficient/transformers/models/starcoder2/modeling_starcoder2.py
@@ -7,7 +7,7 @@
 
 """PyTorch Starcoder2 model."""
 
-from typing import List, Optional, Tuple, Union
+from typing import List, Optional, Tuple, Type, Union
 
 import torch
 from torch import nn
@@ -275,6 +275,16 @@ class QEffStarcoder2ForCausalLM(Starcoder2ForCausalLM):
     - update the hidden_states, and fix for onnx model
     """
 
+    def get_repeated_layer_class(self) -> Type[nn.Module]:
+        """
+        Return the set of class used as the repeated layer across the model for subfunction extraction.
+
+        Notes:
+            This method should return the *class object* (not an instance).
+            Downstream code can use this to find/build subfunctions for repeated blocks.
+        """
+        return {QEFFStarcoder2DecoderLayer}
+
     def forward(
         self,
         input_ids: torch.LongTensor = None,
diff --git a/QEfficient/transformers/models/whisper/modeling_whisper.py b/QEfficient/transformers/models/whisper/modeling_whisper.py
index a03ffecf7..2257ec06b 100644
--- a/QEfficient/transformers/models/whisper/modeling_whisper.py
+++ b/QEfficient/transformers/models/whisper/modeling_whisper.py
@@ -5,7 +5,7 @@
 #
 # ----------------------------------------------------------------------------
 
-from typing import Optional, Tuple, Union
+from typing import Optional, Tuple, Type, Union
 
 import torch
 from torch import nn
@@ -718,6 +718,16 @@ class QEffWhisperForConditionalGeneration(WhisperForConditionalGeneration):
     - changed forward inputs decoder_input_ids and decoder_position_ids to input_ids and position_ids
     """
 
+    def get_repeated_layer_class(self) -> Type[nn.Module]:
+        """
+        Return the set of class used as the repeated layer across the model for subfunction extraction.
+
+        Notes:
+            This method should return the *class object* (not an instance).
+            Downstream code can use this to find/build subfunctions for repeated blocks.
+        """
+        return {self.model.encoder.layers[0].__class__, QEffWhisperDecoderLayer}
+
     def forward(
         self,
         input_features: Optional[torch.FloatTensor] = None,
diff --git a/QEfficient/utils/export_utils.py b/QEfficient/utils/export_utils.py
index 33ba694cf..7637da8ab 100644
--- a/QEfficient/utils/export_utils.py
+++ b/QEfficient/utils/export_utils.py
@@ -164,20 +164,27 @@ def _setup_onnx_subfunctions(qeff_model, args, kwargs):
     # Transform output names for subfunction compatibility
     if "output_names" in kwargs:
         kwargs["output_names"] = [
-            re.sub("_RetainedState", "_InternalRetainedState", name) for name in kwargs["output_names"]
+            re.sub("_RetainedState", "_InternalRetainedState", name)
+            if name.endswith("_RetainedState") and ("key" in name or "value" in name)
+            else name
+            for name in kwargs["output_names"]
         ]
     else:
         args = list(args)
-        args[1] = [re.sub("_RetainedState", "_InternalRetainedState", name) for name in args[1]]
+        args[1] = [
+            re.sub("_RetainedState", "_InternalRetainedState", name)
+            if name.endswith("_RetainedState") and ("key" in name or "value" in name)
+            else name
+            for name in args[1]
+        ]
         args = tuple(args)
+
     # Add subfunction-specific ONNX transforms
     qeff_model._onnx_transforms.append(RenameFunctionOutputsTransform)
     qeff_model._onnx_transforms.append(CustomOpTransform)
 
     # TODO: Handle this in the modelling class QEFFTransformersBase,remove from here. Refer diffusers implementation
-    decoder_layer_classes = get_decoder_layer_classes_for_export(qeff_model.model)
-    if decoder_layer_classes:
-        kwargs["export_modules_as_functions"] = decoder_layer_classes
+    kwargs["export_modules_as_functions"] = qeff_model.model.get_repeated_layer_class()
     return args, kwargs
 
 
diff --git a/QEfficient/utils/torch_patches.py b/QEfficient/utils/torch_patches.py
index 0b9b37afa..241b32fbf 100644
--- a/QEfficient/utils/torch_patches.py
+++ b/QEfficient/utils/torch_patches.py
@@ -7,6 +7,8 @@
 
 """Monkey patches for torch.onnx.utils to fix ONNX export issues."""
 
+import warnings
+
 import torch
 import torch.onnx.utils as onnx_utils
 from torch import _C
@@ -37,9 +39,13 @@ def _track_module_attributes_forward_hook(module, input, output):
             if hasattr(module, attr_name):
                 onnx_attrs = getattr(module, attr_name)
                 delattr(module, attr_name)
+
             # FIX: use empty dict to avoid type mismatch
-            onnx_attrs = {}
-            _C._jit_pass_onnx_track_scope_attributes(graph, onnx_attrs)
+            # onnx_attrs = {}
+            try:
+                _C._jit_pass_onnx_track_scope_attributes(graph, onnx_attrs)
+            except Exception as e:
+                warnings.warn(f"Failed to track ONNX scope attributes: {e}. Skipping this step.")
 
         for m in model.modules():
             m.register_forward_hook(_track_module_attributes_forward_hook)
diff --git a/examples/disagg_serving/without_subfunc_npi_120b.yaml b/examples/disagg_serving/without_subfunc_npi_120b.yaml
new file mode 100644
index 000000000..ec6cf034f
--- /dev/null
+++ b/examples/disagg_serving/without_subfunc_npi_120b.yaml
@@ -0,0 +1,148 @@
+FP32NodeInstanceNames:
+  - /model/layers.0/Add_1_output_0
+  - /model/layers.0/Add_output_0
+  - /model/layers.0/input_layernorm/CustomRMSNorm_output_0
+  - /model/layers.0/post_attention_layernorm/CustomRMSNorm_output_0
+  - /model/layers.1/Add_1_output_0
+  - /model/layers.1/Add_output_0
+  - /model/layers.1/input_layernorm/CustomRMSNorm_output_0
+  - /model/layers.1/post_attention_layernorm/CustomRMSNorm_output_0
+  - /model/layers.10/Add_1_output_0
+  - /model/layers.10/Add_output_0
+  - /model/layers.10/input_layernorm/CustomRMSNorm_output_0
+  - /model/layers.10/post_attention_layernorm/CustomRMSNorm_output_0
+  - /model/layers.11/Add_1_output_0
+  - /model/layers.11/Add_output_0
+  - /model/layers.11/input_layernorm/CustomRMSNorm_output_0
+  - /model/layers.11/post_attention_layernorm/CustomRMSNorm_output_0
+  - /model/layers.12/Add_1_output_0
+  - /model/layers.12/Add_output_0
+  - /model/layers.12/input_layernorm/CustomRMSNorm_output_0
+  - /model/layers.12/post_attention_layernorm/CustomRMSNorm_output_0
+  - /model/layers.13/Add_1_output_0
+  - /model/layers.13/Add_output_0
+  - /model/layers.13/input_layernorm/CustomRMSNorm_output_0
+  - /model/layers.13/post_attention_layernorm/CustomRMSNorm_output_0
+  - /model/layers.14/Add_1_output_0
+  - /model/layers.14/Add_output_0
+  - /model/layers.14/input_layernorm/CustomRMSNorm_output_0
+  - /model/layers.14/post_attention_layernorm/CustomRMSNorm_output_0
+  - /model/layers.15/Add_1_output_0
+  - /model/layers.15/Add_output_0
+  - /model/layers.15/input_layernorm/CustomRMSNorm_output_0
+  - /model/layers.15/post_attention_layernorm/CustomRMSNorm_output_0
+  - /model/layers.16/Add_1_output_0
+  - /model/layers.16/Add_output_0
+  - /model/layers.16/input_layernorm/CustomRMSNorm_output_0
+  - /model/layers.16/post_attention_layernorm/CustomRMSNorm_output_0
+  - /model/layers.17/Add_1_output_0
+  - /model/layers.17/Add_output_0
+  - /model/layers.17/input_layernorm/CustomRMSNorm_output_0
+  - /model/layers.17/post_attention_layernorm/CustomRMSNorm_output_0
+  - /model/layers.18/Add_1_output_0
+  - /model/layers.18/Add_output_0
+  - /model/layers.18/input_layernorm/CustomRMSNorm_output_0
+  - /model/layers.18/post_attention_layernorm/CustomRMSNorm_output_0
+  - /model/layers.19/Add_1_output_0
+  - /model/layers.19/Add_output_0
+  - /model/layers.19/input_layernorm/CustomRMSNorm_output_0
+  - /model/layers.19/post_attention_layernorm/CustomRMSNorm_output_0
+  - /model/layers.2/Add_1_output_0
+  - /model/layers.2/Add_output_0
+  - /model/layers.2/input_layernorm/CustomRMSNorm_output_0
+  - /model/layers.2/post_attention_layernorm/CustomRMSNorm_output_0
+  - /model/layers.20/Add_1_output_0
+  - /model/layers.20/Add_output_0
+  - /model/layers.20/input_layernorm/CustomRMSNorm_output_0
+  - /model/layers.20/post_attention_layernorm/CustomRMSNorm_output_0
+  - /model/layers.21/Add_1_output_0
+  - /model/layers.21/Add_output_0
+  - /model/layers.21/input_layernorm/CustomRMSNorm_output_0
+  - /model/layers.21/post_attention_layernorm/CustomRMSNorm_output_0
+  - /model/layers.22/Add_1_output_0
+  - /model/layers.22/Add_output_0
+  - /model/layers.22/input_layernorm/CustomRMSNorm_output_0
+  - /model/layers.22/post_attention_layernorm/CustomRMSNorm_output_0
+  - /model/layers.23/Add_1_output_0
+  - /model/layers.23/Add_output_0
+  - /model/layers.23/input_layernorm/CustomRMSNorm_output_0
+  - /model/layers.23/post_attention_layernorm/CustomRMSNorm_output_0
+  - /model/layers.24/Add_1_output_0
+  - /model/layers.24/Add_output_0
+  - /model/layers.24/input_layernorm/CustomRMSNorm_output_0
+  - /model/layers.24/post_attention_layernorm/CustomRMSNorm_output_0
+  - /model/layers.25/Add_1_output_0
+  - /model/layers.25/Add_output_0
+  - /model/layers.25/input_layernorm/CustomRMSNorm_output_0
+  - /model/layers.25/post_attention_layernorm/CustomRMSNorm_output_0
+  - /model/layers.26/Add_1_output_0
+  - /model/layers.26/Add_output_0
+  - /model/layers.26/input_layernorm/CustomRMSNorm_output_0
+  - /model/layers.26/post_attention_layernorm/CustomRMSNorm_output_0
+  - /model/layers.27/Add_1_output_0
+  - /model/layers.27/Add_output_0
+  - /model/layers.27/input_layernorm/CustomRMSNorm_output_0
+  - /model/layers.27/post_attention_layernorm/CustomRMSNorm_output_0
+  - /model/layers.28/Add_1_output_0
+  - /model/layers.28/Add_output_0
+  - /model/layers.28/input_layernorm/CustomRMSNorm_output_0
+  - /model/layers.28/post_attention_layernorm/CustomRMSNorm_output_0
+  - /model/layers.29/Add_1_output_0
+  - /model/layers.29/Add_output_0
+  - /model/layers.29/input_layernorm/CustomRMSNorm_output_0
+  - /model/layers.29/post_attention_layernorm/CustomRMSNorm_output_0
+  - /model/layers.3/Add_1_output_0
+  - /model/layers.3/Add_output_0
+  - /model/layers.3/input_layernorm/CustomRMSNorm_output_0
+  - /model/layers.3/post_attention_layernorm/CustomRMSNorm_output_0
+  - /model/layers.30/Add_1_output_0
+  - /model/layers.30/Add_output_0
+  - /model/layers.30/input_layernorm/CustomRMSNorm_output_0
+  - /model/layers.30/post_attention_layernorm/CustomRMSNorm_output_0
+  - /model/layers.31/Add_1_output_0
+  - /model/layers.31/Add_output_0
+  - /model/layers.31/input_layernorm/CustomRMSNorm_output_0
+  - /model/layers.31/post_attention_layernorm/CustomRMSNorm_output_0
+  - /model/layers.32/Add_1_output_0
+  - /model/layers.32/Add_output_0
+  - /model/layers.32/input_layernorm/CustomRMSNorm_output_0
+  - /model/layers.32/post_attention_layernorm/CustomRMSNorm_output_0
+  - /model/layers.33/Add_1_output_0
+  - /model/layers.33/Add_output_0
+  - /model/layers.33/input_layernorm/CustomRMSNorm_output_0
+  - /model/layers.33/post_attention_layernorm/CustomRMSNorm_output_0
+  - /model/layers.34/Add_1_output_0
+  - /model/layers.34/Add_output_0
+  - /model/layers.34/input_layernorm/CustomRMSNorm_output_0
+  - /model/layers.34/post_attention_layernorm/CustomRMSNorm_output_0
+  - /model/layers.35/Add_1_output_0
+  - /model/layers.35/Add_output_0
+  - /model/norm/Add_output_0
+  - /model/layers.35/input_layernorm/CustomRMSNorm_output_0
+  - /model/layers.35/post_attention_layernorm/CustomRMSNorm_output_0
+  - /model/layers.4/Add_1_output_0
+  - /model/layers.4/Add_output_0
+  - /model/layers.4/input_layernorm/CustomRMSNorm_output_0
+  - /model/layers.4/post_attention_layernorm/CustomRMSNorm_output_0
+  - /model/layers.5/Add_1_output_0
+  - /model/layers.5/Add_output_0
+  - /model/layers.5/input_layernorm/CustomRMSNorm_output_0
+  - /model/layers.5/post_attention_layernorm/CustomRMSNorm_output_0
+  - /model/layers.6/Add_1_output_0
+  - /model/layers.6/Add_output_0
+  - /model/layers.6/input_layernorm/CustomRMSNorm_output_0
+  - /model/layers.6/post_attention_layernorm/CustomRMSNorm_output_0
+  - /model/layers.7/Add_1_output_0
+  - /model/layers.7/Add_output_0
+  - /model/layers.7/input_layernorm/CustomRMSNorm_output_0
+  - /model/layers.7/post_attention_layernorm/CustomRMSNorm_output_0
+  - /model/layers.8/Add_1_output_0
+  - /model/layers.8/Add_output_0
+  - /model/layers.8/input_layernorm/CustomRMSNorm_output_0
+  - /model/layers.8/post_attention_layernorm/CustomRMSNorm_output_0
+  - /model/layers.9/Add_1_output_0
+  - /model/layers.9/Add_output_0
+  - /model/layers.9/input_layernorm/CustomRMSNorm_output_0
+  - /model/layers.9/post_attention_layernorm/CustomRMSNorm_output_0
+  - /model/norm/CustomRMSNorm_output_0
+ 
\ No newline at end of file
diff --git a/examples/gpt_oss_disagg_mode_with_chunking.py b/examples/gpt_oss_disagg_mode_with_chunking.py
new file mode 100644
index 000000000..363e2806c
--- /dev/null
+++ b/examples/gpt_oss_disagg_mode_with_chunking.py
@@ -0,0 +1,137 @@
+# -----------------------------------------------------------------------------
+#
+# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+# SPDX-License-Identifier: BSD-3-Clause
+#
+# -----------------------------------------------------------------------------
+
+import time
+
+import numpy as np
+import torch
+from transformers import AutoConfig, AutoTokenizer
+
+from QEfficient import QEFFAutoModelForCausalLM
+from QEfficient.generation.cloud_infer import QAICInferenceSession
+
+model_id = "openai/gpt-oss-20b"  # weights are not required to convert to fp32
+
+prompt = """
+Once upon a time, in a small town, there lived a young boy named Alex. Alex was a curious and adventurous child, always eager to explore the world around him. One day, while playing in the park, Alex stumbled upon a mysterious old book hidden beneath a pile of leaves. The book was filled with stories of distant lands, magical creatures, and extraordinary adventures.
+
+As Alex flipped through the pages, he discovered a map that led to a hidden treasure. Excited by the prospect of a real-life treasure hunt, Alex decided to embark on a thrilling journey. He packed his backpack with snacks, a flashlight, and a compass, and set off into the unknown.
+
+The path to the treasure was not an easy one. Alex had to navigate through dense forests, cross rickety bridges, and solve riddles that guarded the treasure's location.
+"""
+# Run prefill
+config = AutoConfig.from_pretrained(model_id)
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+PREFILL_SEQ_LEN = 128
+CTX_LEN = 128 * 3
+
+qeff_model = QEFFAutoModelForCausalLM.from_pretrained(model_id)
+
+decode_qpc_path = qeff_model.compile(
+    prefill_seq_len=1,
+    ctx_len=CTX_LEN,
+    num_cores=16,
+    mxfp6_matmul=True,
+    mxint8_kv_cache=True,
+    num_devices=1,
+    mos=1,
+    aic_enable_depth_first=True,
+    num_speculative_tokens=None,
+    offload_pt_weights=False,  # Need the weights in memory for prefill-model export/compilation in the next step
+    retain_full_kv=True,
+)
+
+
+# Following command errors out by default, the user is supposed to run the printed command and provide the generated qpc path as prefill_qpc_path commenting out lines 55-68
+# prefill_qpc_path = "provide path here"
+prefill_qpc_path = qeff_model.compile(
+    prefill_seq_len=PREFILL_SEQ_LEN,
+    ctx_len=CTX_LEN,
+    num_cores=16,
+    mxfp6_matmul=True,
+    mxint8_kv_cache=True,
+    num_devices=1,
+    mos=1,
+    aic_enable_depth_first=True,
+    num_speculative_tokens=None,
+    prefill_only=True,
+    enable_chunking=True,
+    use_onnx_subfunctions=True,
+)
+
+
+inputs = tokenizer(prompt, return_tensors="np", padding=True)
+position_ids = inputs["attention_mask"].sum(1, keepdims=True)
+generation_len = CTX_LEN - position_ids.max()
+padded_len = inputs["input_ids"].shape[1]
+num_chunks = -(padded_len // -PREFILL_SEQ_LEN)  # ceil divide without float
+padded_len = num_chunks * PREFILL_SEQ_LEN  # Convert to a multiple of prompt_len
+inputs = tokenizer(prompt, return_tensors="np", padding="max_length", max_length=padded_len)
+inputs["position_ids"] = np.where(inputs.pop("attention_mask"), np.arange(padded_len), -1)
+inputs.pop("token_type_ids", None)
+inputs = {k: torch.from_numpy(v) for k, v in inputs.items()}
+inputs.pop("past_key_values", None)
+inputs = {k: v.detach().numpy() for k, v in inputs.items()}
+
+
+decode_session = QAICInferenceSession(decode_qpc_path)
+prefill_session = QAICInferenceSession(prefill_qpc_path)
+
+all_outputs = []
+for i in range(num_chunks):
+    chunk_inputs = inputs.copy()
+    chunk_inputs["input_ids"] = inputs["input_ids"][:, i * PREFILL_SEQ_LEN : (i + 1) * PREFILL_SEQ_LEN]
+    chunk_inputs["position_ids"] = inputs["position_ids"][:, i * PREFILL_SEQ_LEN : (i + 1) * PREFILL_SEQ_LEN]
+    ins = time.time()
+    qpc_out = prefill_session.run(chunk_inputs)
+    print(f"time for this run={time.time() - ins}")
+    for i in range(config.num_hidden_layers):
+        inputs[f"past_key.{i}"] = qpc_out[f"past_key.{i}_RetainedState"]
+        inputs[f"past_value.{i}"] = qpc_out[f"past_value.{i}_RetainedState"]
+
+all_outputs.append(np.argmax(qpc_out["logits"]))
+decode_inputs = {
+    "input_ids": np.argmax(qpc_out["logits"]).reshape(1, 1),
+    "position_ids": np.max(inputs["position_ids"]).reshape(1, 1) + 1,
+}
+for i in range(config.num_hidden_layers):
+    decode_inputs[f"past_key.{i}"] = qpc_out[f"past_key.{i}_RetainedState"]
+    decode_inputs[f"past_value.{i}"] = qpc_out[f"past_value.{i}_RetainedState"]
+
+st = time.time()
+decode_out = decode_session.run(decode_inputs)
+print(f"time for first run of decode with KV as input = {time.time() - st} sec\n")
+all_outputs.append(np.argmax(decode_out["logits"]))
+pos_id = np.max(decode_inputs["position_ids"]).reshape(1, 1) + 1
+loop_decode_inputs = {
+    "input_ids": np.argmax(decode_out["logits"]).reshape(1, 1),
+    "position_ids": pos_id,
+}
+
+for i in range(config.num_hidden_layers):
+    loop_decode_inputs[f"past_key.{i}"] = decode_out[f"past_key.{i}_RetainedState"]
+    loop_decode_inputs[f"past_value.{i}"] = decode_out[f"past_value.{i}_RetainedState"]
+
+st = time.time()
+for i in range(generation_len - 2):
+    decode_out = decode_session.run(loop_decode_inputs)
+    all_outputs.append(np.argmax(decode_out["logits"]))
+    pos_id += 1
+    for i in range(config.num_hidden_layers):
+        loop_decode_inputs[f"past_key.{i}"] = decode_out[f"past_key.{i}_RetainedState"]
+        loop_decode_inputs[f"past_value.{i}"] = decode_out[f"past_value.{i}_RetainedState"]
+
+    loop_decode_inputs.update(
+        {
+            "input_ids": np.argmax(decode_out["logits"]).reshape(1, 1),
+            "position_ids": pos_id,
+        }
+    )
+ft = time.time()
+
+print(f"decode tok/sec={(generation_len - 2) / (ft - st)}")
+print(f"input\n{prompt}\noutput\n{tokenizer.decode(all_outputs)}")
diff --git a/scripts/Jenkinsfile b/scripts/Jenkinsfile
index 3420c025b..414851eca 100644
--- a/scripts/Jenkinsfile
+++ b/scripts/Jenkinsfile
@@ -22,6 +22,7 @@ pipeline {
                    . preflight_qeff/bin/activate &&
                    pip install --upgrade pip setuptools &&
                    pip install .[test] &&
+                   pip install .[diffusers] &&
                    pip install junitparser pytest-xdist &&
                    pip install librosa==0.10.2 soundfile==0.13.1 && #packages needed to load example for whisper testing
                    pip install --extra-index-url https://download.pytorch.org/whl/cpu timm==1.0.14 torchvision==0.22.0+cpu einops==0.8.1 && #packages to load VLMs