Skip to content

Commit 271bb98

Browse files
committed
fix mixer name
Signed-off-by: jenchen13 <[email protected]>
1 parent 752c70a commit 271bb98

File tree

1 file changed

+8
-8
lines changed

1 file changed

+8
-8
lines changed

modelopt/torch/export/plugins/mcore_nemotron.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -67,11 +67,11 @@
6767
"linear_fc2": NameRemapping("backbone.layers.{}.mixer.down_proj.", ROW_TP),
6868
# MoE
6969
"router": NameRemapping("model.layers.{}.mlp.gate.", REPLICATE),
70-
"local_experts.linear_fc1": NameRemapping("backbone.layers.{}.mlp.experts.{}.up_proj", COL_ETP),
71-
"local_experts.linear_fc2": NameRemapping("backbone.layers.{}.mlp.experts.{}.down_proj.", ROW_ETP),
72-
"shared_experts.linear_fc1": NameRemapping("backbone.layers.{}.mlp.shared_experts.up_proj.", COL_TP),
70+
"local_experts.linear_fc1": NameRemapping("backbone.layers.{}.mixer.experts.{}.up_proj", COL_ETP),
71+
"local_experts.linear_fc2": NameRemapping("backbone.layers.{}.mixer.experts.{}.down_proj.", ROW_ETP),
72+
"shared_experts.linear_fc1": NameRemapping("backbone.layers.{}.mixer.shared_experts.up_proj.", COL_TP),
7373
"shared_experts.linear_fc2": NameRemapping(
74-
"backbone.layers.{}.mlp.shared_experts.down_proj.", ROW_TP
74+
"backbone.layers.{}.mixer.shared_experts.down_proj.", ROW_TP
7575
),
7676

7777
}
@@ -100,9 +100,9 @@
100100
"linear_fc2": NameRemapping("backbone.layers.{}.mixer.down_proj."),
101101
# MoE
102102
"router": NameRemapping("backbone.layers.{}.mlp.gate."),
103-
"local_experts.linear_fc1": NameRemapping("backbone.layers.{}.mlp.experts.{}.up_proj."),
104-
"local_experts.linear_fc2": NameRemapping("backbone.layers.{}.mlp.experts.{}.down_proj."),
105-
"shared_experts.linear_fc1": NameRemapping("backbone.layers.{}.mlp.shared_experts.up_proj."),
106-
"shared_experts.linear_fc2": NameRemapping("backbone.layers.{}.mlp.shared_experts.down_proj."),
103+
"local_experts.linear_fc1": NameRemapping("backbone.layers.{}.mixer.experts.{}.up_proj."),
104+
"local_experts.linear_fc2": NameRemapping("backbone.layers.{}.mixer.experts.{}.down_proj."),
105+
"shared_experts.linear_fc1": NameRemapping("backbone.layers.{}.mixer.shared_experts.up_proj."),
106+
"shared_experts.linear_fc2": NameRemapping("backbone.layers.{}.mixer.shared_experts.down_proj."),
107107

108108
}

0 commit comments

Comments
 (0)