Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
fd8e607
inital commit
ArthurZucker Nov 18, 2025
7990c49
up
ArthurZucker Nov 18, 2025
1a9f77a
update unexpected later on
ArthurZucker Nov 18, 2025
c82b5c8
Merge branch 'main' of github.com:huggingface/transformers into vlm-u…
ArthurZucker Nov 18, 2025
30e405a
fix
ArthurZucker Nov 18, 2025
e9fcb66
update
ArthurZucker Nov 18, 2025
4204535
simplify our lives
ArthurZucker Nov 19, 2025
1da30a6
isolate a bit more
ArthurZucker Nov 19, 2025
5c71300
fixup
ArthurZucker Nov 19, 2025
6c33dc8
small nits
ArthurZucker Nov 19, 2025
e53e1c6
style
ArthurZucker Nov 19, 2025
5e2e0c4
nit
ArthurZucker Nov 19, 2025
eb8493c
fix common cases
ArthurZucker Nov 19, 2025
526001e
Merge branch 'main' of github.com:huggingface/transformers into vlm-u…
ArthurZucker Nov 19, 2025
74c524d
fix post merge
ArthurZucker Nov 19, 2025
7c04b0f
bnb needs missing keys
ArthurZucker Nov 19, 2025
935e77f
small fix
ArthurZucker Nov 19, 2025
6c23f3e
bettrer documentation
ArthurZucker Nov 19, 2025
b5adc5b
no veradict + base class
ArthurZucker Nov 19, 2025
2746e0f
rake review comments
ArthurZucker Nov 19, 2025
b7591da
take all comments
ArthurZucker Nov 19, 2025
138d415
fix super init
ArthurZucker Nov 19, 2025
cb63300
update doc to be more real
ArthurZucker Nov 20, 2025
98aecb8
small nits
ArthurZucker Nov 20, 2025
408d194
nits
ArthurZucker Nov 20, 2025
6919f15
fix dtype
ArthurZucker Nov 20, 2025
2b352e2
Merge branch 'main' of github.com:huggingface/transformers into vlm-u…
ArthurZucker Nov 20, 2025
3d25d19
fix dtype issue
ArthurZucker Nov 20, 2025
02e02c5
remove one unused function
ArthurZucker Nov 20, 2025
9a3783b
cleanup and nits
ArthurZucker Nov 20, 2025
63cd719
up
ArthurZucker Nov 20, 2025
6fd8ec8
should be the final fix!
ArthurZucker Nov 20, 2025
da8f897
fixup
ArthurZucker Nov 20, 2025
321f16d
Merge branch 'main' of github.com:huggingface/transformers into vlm-u…
ArthurZucker Nov 20, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 8 additions & 13 deletions src/transformers/conversion_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

from copy import deepcopy

from .core_model_loading import Concatenate, MergeModulelist, WeightConverter
from .core_model_loading import Concatenate, MergeModulelist, WeightConverter, WeightRenaming
from .utils import is_torch_available


Expand All @@ -26,6 +26,7 @@
def _build_checkpoint_conversion_mapping():
mapping = {
"mixtral": [
WeightRenaming(".block_sparse_moe.gate", ".mlp.gate"),
WeightConverter(
source_keys=[
"block_sparse_moe.experts.*.w1.weight",
Expand All @@ -50,12 +51,6 @@ def _build_checkpoint_conversion_mapping():
), # each process has two lists of tensors, we cat each list. -> we end up with 2 tensors
], # we want the loading to add this shard operation here. Though we can't shard after concats and merge, needs to be first
),
# WeightConverter(
# ["self_attn.q_proj", "self_attn.k_proj", "self_attn.v_proj"],
# "self_attn.qkv_proj",
# operations=[Concatenate(dim=0)], # more like stack?
# ),
WeightConverter("*.block_sparse_moe.", "*.mlp."),
],
"qwen2_moe": [
WeightConverter(
Expand All @@ -73,34 +68,34 @@ def _build_checkpoint_conversion_mapping():
),
],
"legacy": [
WeightConverter(
WeightRenaming(
source_keys="LayerNorm.gamma",
target_keys="LayerNorm.weight",
),
WeightConverter(
WeightRenaming(
source_keys="LayerNorm.beta",
target_keys="LayerNorm.bias",
),
],
}
if hasattr(torch.nn.utils.parametrizations, "weight_norm"):
mapping["legacy"] += [
WeightConverter(
WeightRenaming(
source_keys="weight_g",
target_keys="parametrizations.weight.original0",
),
WeightConverter(
WeightRenaming(
source_keys="weight_v",
target_keys="parametrizations.weight.original1",
),
]
else:
mapping["legacy"] += [
WeightConverter(
WeightRenaming(
source_keys="parametrizations.weight.original0",
target_keys="weight_g",
),
WeightConverter(
WeightRenaming(
source_keys="parametrizations.weight.original1",
target_keys="weight_v",
),
Expand Down
Loading