Skip to content

Commit fa46075

Browse files
committed
fix e score bias
Signed-off-by: jenchen13 <[email protected]>
1 parent 15a8351 commit fa46075

File tree

1 file changed

+7
-2
lines changed

1 file changed

+7
-2
lines changed

modelopt/torch/export/plugins/mcore_nemotron.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,10 @@
6666
"linear_fc1": NameRemapping("backbone.layers.{}.mixer.up_proj.", COL_TP),
6767
"linear_fc2": NameRemapping("backbone.layers.{}.mixer.down_proj.", ROW_TP),
6868
# MoE
69-
"router": NameRemapping("model.layers.{}.mlp.gate.", REPLICATE),
69+
"router": NameRemapping(
70+
"backbone.layers.{}.mlp.gate.", {"mapping": {"expert_bias": "e_score_correction_bias"}}
71+
),
72+
7073
"local_experts.linear_fc1": NameRemapping(
7174
"backbone.layers.{}.mixer.experts.{}.up_proj", COL_ETP
7275
),
@@ -104,7 +107,9 @@
104107
"linear_fc1": NameRemapping("backbone.layers.{}.mixer.up_proj."),
105108
"linear_fc2": NameRemapping("backbone.layers.{}.mixer.down_proj."),
106109
# MoE
107-
"router": NameRemapping("backbone.layers.{}.mlp.gate."),
110+
"router": NameRemapping(
111+
"backbone.layers.{}.mlp.gate.", {"mapping": {"expert_bias": "e_score_correction_bias"}}
112+
),
108113
"local_experts.linear_fc1": NameRemapping("backbone.layers.{}.mixer.experts.{}.up_proj."),
109114
"local_experts.linear_fc2": NameRemapping("backbone.layers.{}.mixer.experts.{}.down_proj."),
110115
"shared_experts.linear_fc1": NameRemapping("backbone.layers.{}.mixer.shared_experts.up_proj."),

0 commit comments

Comments
 (0)