Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion QEfficient/transformers/models/falcon/modeling_falcon.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@
"""PyTorch Falcon model."""

import math
from typing import Optional, Tuple, Union
from typing import Optional, Tuple, Type, Union

import torch
import torch.nn as nn
import torch.utils.checkpoint
from torch.nn import functional as F
from transformers.cache_utils import Cache
Expand Down Expand Up @@ -353,6 +354,16 @@ class QEffFalconForCausalLM(FalconForCausalLM):
- update the hidden_states, and fix for onnx model
"""

def get_repeated_layer_class(self) -> Type[nn.Module]:
"""
Return the set of class used as the repeated layer across the model for subfunction extraction.

Notes:
This method should return the *class object* (not an instance).
Downstream code can use this to find/build subfunctions for repeated blocks.
"""
return {QEffFalconDecoderLayer}

def forward(
self,
input_ids: torch.LongTensor = None,
Expand Down
12 changes: 11 additions & 1 deletion QEfficient/transformers/models/gemma/modeling_gemma.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#
# -----------------------------------------------------------------------------

from typing import List, Optional, Tuple, Union
from typing import List, Optional, Tuple, Type, Union

import torch
from torch import nn
Expand Down Expand Up @@ -336,6 +336,16 @@ class QEffGemmaForCausalLM(GemmaForCausalLM):
- add new args cache idx for the kv retention
"""

def get_repeated_layer_class(self) -> Type[nn.Module]:
"""
Return the set of class used as the repeated layer across the model for subfunction extraction.

Notes:
This method should return the *class object* (not an instance).
Downstream code can use this to find/build subfunctions for repeated blocks.
"""
return {QEffGemmaDecoderLayer}

def forward(
self,
input_ids: torch.LongTensor = None,
Expand Down
12 changes: 11 additions & 1 deletion QEfficient/transformers/models/gemma2/modeling_gemma2.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#
# -----------------------------------------------------------------------------

from typing import Callable, List, Optional, Tuple, Union
from typing import Callable, List, Optional, Tuple, Type, Union

import torch
from torch import nn
Expand Down Expand Up @@ -388,6 +388,16 @@ class QEffGemma2ForCausalLM(Gemma2ForCausalLM, GenerationMixin):
- add new args cache idx for the kv retention
"""

def get_repeated_layer_class(self) -> Type[nn.Module]:
"""
Return the set of class used as the repeated layer across the model for subfunction extraction.

Notes:
This method should return the *class object* (not an instance).
Downstream code can use this to find/build subfunctions for repeated blocks.
"""
return {QEffGemma2DecoderLayer}

def forward(
self,
input_ids: torch.LongTensor = None,
Expand Down
22 changes: 21 additions & 1 deletion QEfficient/transformers/models/gemma3/modeling_gemma3.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# -----------------------------------------------------------------------------

import copy
from typing import List, Optional, Tuple, Union
from typing import List, Optional, Tuple, Type, Union

import torch
from torch import nn
Expand Down Expand Up @@ -589,6 +589,16 @@ def __init__(self, model):
self.model = model
self.model.vision_model = self.model.vision_tower

def get_repeated_layer_class(self) -> Type[nn.Module]:
"""
Return the set of class used as the repeated layer across the model for subfunction extraction.

Notes:
This method should return the *class object* (not an instance).
Downstream code can use this to find/build subfunctions for repeated blocks.
"""
return {self.model.vision_tower.vision_model.encoder.layers[0].__class__}

def forward(self, pixel_values):
image_features = self.model.get_image_features(pixel_values=pixel_values)
return image_features
Expand All @@ -602,6 +612,16 @@ def __init__(self, model):
self.config = self.model.config
self.lm_head = self.model.lm_head

def get_repeated_layer_class(self) -> Type[nn.Module]:
"""
Return the set of class used as the repeated layer across the model for subfunction extraction.

Notes:
This method should return the *class object* (not an instance).
Downstream code can use this to find/build subfunctions for repeated blocks.
"""
return {QEffGemma3DecoderLayer}

def forward(
self,
input_ids,
Expand Down
12 changes: 11 additions & 1 deletion QEfficient/transformers/models/gpt2/modeling_gpt2.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#
# -----------------------------------------------------------------------------

from typing import Callable, Optional, Tuple, Union
from typing import Callable, Optional, Tuple, Type, Union

import torch
from torch import nn
Expand Down Expand Up @@ -397,6 +397,16 @@ class QEffGPT2LMHeadModel(GPT2LMHeadModel):
- add new args position idx for the cache_kwargs for kv retention
"""

def get_repeated_layer_class(self) -> Type[nn.Module]:
"""
Return the set of class used as the repeated layer across the model for subfunction extraction.

Notes:
This method should return the *class object* (not an instance).
Downstream code can use this to find/build subfunctions for repeated blocks.
"""
return {QEffGPT2Block}

def forward(
self,
input_ids: Optional[torch.LongTensor] = None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

"""PyTorch GPTBigCode model."""

from typing import Optional, Tuple, Union
from typing import Optional, Tuple, Type, Union

import torch
import torch.utils.checkpoint
Expand Down Expand Up @@ -378,6 +378,16 @@ def forward(


class QEffGPTBigCodeForCausalLM(GPTBigCodeForCausalLM):
def get_repeated_layer_class(self) -> Type[nn.Module]:
"""
Return the set of class used as the repeated layer across the model for subfunction extraction.

Notes:
This method should return the *class object* (not an instance).
Downstream code can use this to find/build subfunctions for repeated blocks.
"""
return {QEffGPTBigCodeBlock}

def forward(
self,
input_ids: Optional[torch.Tensor] = None,
Expand Down
10 changes: 10 additions & 0 deletions QEfficient/transformers/models/gpt_oss/modeling_gpt_oss.py
Original file line number Diff line number Diff line change
Expand Up @@ -1205,6 +1205,16 @@


class QEffGptOssForCausalLM(GptOssForCausalLM):
def get_repeated_layer_class(self) -> Type[nn.Module]:

Check failure on line 1208 in QEfficient/transformers/models/gpt_oss/modeling_gpt_oss.py

View workflow job for this annotation

GitHub Actions / lint

Ruff (F821)

QEfficient/transformers/models/gpt_oss/modeling_gpt_oss.py:1208:43: F821 Undefined name `Type`
"""
Return the set of class used as the repeated layer across the model for subfunction extraction.

Notes:
This method should return the *class object* (not an instance).
Downstream code can use this to find/build subfunctions for repeated blocks.
"""
return {QEffGptOssDecoderLayer}

def forward(
self,
input_ids: Optional[torch.LongTensor] = None,
Expand Down
12 changes: 11 additions & 1 deletion QEfficient/transformers/models/gptj/modeling_gptj.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

"""PyTorch GPT-J model."""

from typing import Optional, Tuple, Union
from typing import Optional, Tuple, Type, Union

import torch
from torch import nn
Expand Down Expand Up @@ -318,6 +318,16 @@ class QEffGPTJForCausalLM(GPTJForCausalLM):
- update the hidden_states, and fix for onnx model
"""

def get_repeated_layer_class(self) -> Type[nn.Module]:
"""
Return the set of class used as the repeated layer across the model for subfunction extraction.

Notes:
This method should return the *class object* (not an instance).
Downstream code can use this to find/build subfunctions for repeated blocks.
"""
return {QEffGPTJBlock}

def forward(
self,
input_ids: Optional[torch.LongTensor] = None,
Expand Down
12 changes: 11 additions & 1 deletion QEfficient/transformers/models/granite/modeling_granite.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#
# -----------------------------------------------------------------------------

from typing import Callable, List, Optional, Tuple, Union
from typing import Callable, List, Optional, Tuple, Type, Union

import torch
from torch import nn
Expand Down Expand Up @@ -347,6 +347,16 @@ class QEffGraniteForCausalLM(GraniteForCausalLM):
Copied from GraniteForCausalLM: https://github.com/huggingface/transformers/blob/main/src/transformers/models/granite/modeling_granite.py
"""

def get_repeated_layer_class(self) -> Type[nn.Module]:
"""
Return the set of class used as the repeated layer across the model for subfunction extraction.

Notes:
This method should return the *class object* (not an instance).
Downstream code can use this to find/build subfunctions for repeated blocks.
"""
return {QEffGraniteDecoderLayer}

def forward(
self,
input_ids: torch.LongTensor = None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#
# -----------------------------------------------------------------------------

from typing import List, Optional, Tuple, Union
from typing import List, Optional, Tuple, Type, Union

import torch
import torch.nn.functional as F
Expand Down Expand Up @@ -493,6 +493,16 @@ class QEffGraniteMoeForCausalLM(GraniteMoeForCausalLM):
Copied from GraniteForCausalLM: https://github.com/huggingface/transformers/blob/main/src/transformers/models/granite/modeling_granite.py
"""

def get_repeated_layer_class(self) -> Type[nn.Module]:
"""
Return the set of class used as the repeated layer across the model for subfunction extraction.

Notes:
This method should return the *class object* (not an instance).
Downstream code can use this to find/build subfunctions for repeated blocks.
"""
return {self.model.layers[0].__class__}

def forward(
self,
input_ids: torch.LongTensor = None,
Expand Down
12 changes: 11 additions & 1 deletion QEfficient/transformers/models/grok_1/modeling_grok1.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#
# ----------------------------------------------------------------------------

from typing import List, Optional, Tuple, Union
from typing import List, Optional, Tuple, Type, Union

import torch
import torch.nn as nn
Expand Down Expand Up @@ -397,6 +397,16 @@ class QEffGrok1ModelForCausalLM(nn.Module):
Grok model for causal language modeling.
"""

def get_repeated_layer_class(self) -> Type[nn.Module]:
"""
Return the set of class used as the repeated layer across the model for subfunction extraction.

Notes:
This method should return the *class object* (not an instance).
Downstream code can use this to find/build subfunctions for repeated blocks.
"""
return {QEffGrok1DecoderLayer}

def forward(
self,
input_ids: torch.LongTensor = None,
Expand Down
22 changes: 21 additions & 1 deletion QEfficient/transformers/models/internvl/modeling_internvl.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#
# -----------------------------------------------------------------------------

from typing import List, Optional
from typing import List, Optional, Type

import torch
import torch.nn as nn
Expand All @@ -21,6 +21,16 @@ def __init__(self, model):
super().__init__()
self.model = model

def get_repeated_layer_class(self) -> Type[nn.Module]:
"""
Return the set of class used as the repeated layer across the model for subfunction extraction.

Notes:
This method should return the *class object* (not an instance).
Downstream code can use this to find/build subfunctions for repeated blocks.
"""
return {self.model.vision_model.encoder.layers[0].__class__}

def forward(self, pixel_values):
vision_embeds = self.model.extract_feature(pixel_values)
# Reshape from [num_patches, 256, hidden_dim] -> [1, num_patches*256, head_dim]
Expand All @@ -36,6 +46,16 @@ def __init__(self, model):
self.config = self.model.language_model.config
self.language_model = self.model.language_model

def get_repeated_layer_class(self) -> Type[nn.Module]:
"""
Return the set of class used as the repeated layer across the model for subfunction extraction.

Notes:
This method should return the *class object* (not an instance).
Downstream code can use this to find/build subfunctions for repeated blocks.
"""
return {self.model.language_model.model.layers[0].__class__}

def forward(
self,
input_ids,
Expand Down
12 changes: 11 additions & 1 deletion QEfficient/transformers/models/llama/modeling_llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#
# -----------------------------------------------------------------------------

from typing import Any, Dict, List, Optional, Tuple, Union
from typing import Any, Dict, List, Optional, Tuple, Type, Union

import torch
from torch import nn
Expand Down Expand Up @@ -404,6 +404,16 @@ class QEffLlamaForCausalLM(LlamaForCausalLM):
Copied from LlamaForCausalLM: https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py
"""

def get_repeated_layer_class(self) -> Type[nn.Module]:
"""
Return the set of class used as the repeated layer across the model for subfunction extraction.

Notes:
This method should return the *class object* (not an instance).
Downstream code can use this to find/build subfunctions for repeated blocks.
"""
return {QEffLlamaDecoderLayer}

def forward(
self,
input_ids: torch.LongTensor = None,
Expand Down
Loading
Loading