|
19 | 19 | from .mcore_custom import ( |
20 | 20 | COL_ETP, |
21 | 21 | COL_TP, |
22 | | - ROW_ETP, |
23 | 22 | REPLICATE, |
| 23 | + ROW_ETP, |
24 | 24 | ROW_TP, |
25 | 25 | CustomModuleMapping, |
26 | 26 | NameRemapping, |
|
41 | 41 | "linear_fc2": NameRemapping("model.layers.{}.mlp.down_proj."), |
42 | 42 | "final_layernorm": NameRemapping("model.norm."), |
43 | 43 | "output_layer": NameRemapping("lm_head."), |
44 | | - } |
| 44 | +} |
45 | 45 |
|
46 | 46 |
|
47 | 47 | nemotron_h_causal_lm_import: dict[str, CustomModuleMapping] = { |
|
67 | 67 | "linear_fc2": NameRemapping("backbone.layers.{}.mixer.down_proj.", ROW_TP), |
68 | 68 | # MoE |
69 | 69 | "router": NameRemapping("model.layers.{}.mlp.gate.", REPLICATE), |
70 | | - "local_experts.linear_fc1": NameRemapping("backbone.layers.{}.mixer.experts.{}.up_proj", COL_ETP), |
71 | | - "local_experts.linear_fc2": NameRemapping("backbone.layers.{}.mixer.experts.{}.down_proj.", ROW_ETP), |
72 | | - "shared_experts.linear_fc1": NameRemapping("backbone.layers.{}.mixer.shared_experts.up_proj.", COL_TP), |
| 70 | + "local_experts.linear_fc1": NameRemapping( |
| 71 | + "backbone.layers.{}.mixer.experts.{}.up_proj", COL_ETP |
| 72 | + ), |
| 73 | + "local_experts.linear_fc2": NameRemapping( |
| 74 | + "backbone.layers.{}.mixer.experts.{}.down_proj.", ROW_ETP |
| 75 | + ), |
| 76 | + "shared_experts.linear_fc1": NameRemapping( |
| 77 | + "backbone.layers.{}.mixer.shared_experts.up_proj.", COL_TP |
| 78 | + ), |
73 | 79 | "shared_experts.linear_fc2": NameRemapping( |
74 | 80 | "backbone.layers.{}.mixer.shared_experts.down_proj.", ROW_TP |
75 | 81 | ), |
76 | | - |
77 | 82 | } |
78 | 83 |
|
79 | 84 |
|
|
103 | 108 | "local_experts.linear_fc1": NameRemapping("backbone.layers.{}.mixer.experts.{}.up_proj."), |
104 | 109 | "local_experts.linear_fc2": NameRemapping("backbone.layers.{}.mixer.experts.{}.down_proj."), |
105 | 110 | "shared_experts.linear_fc1": NameRemapping("backbone.layers.{}.mixer.shared_experts.up_proj."), |
106 | | - "shared_experts.linear_fc2": NameRemapping("backbone.layers.{}.mixer.shared_experts.down_proj."), |
107 | | - |
| 111 | + "shared_experts.linear_fc2": NameRemapping( |
| 112 | + "backbone.layers.{}.mixer.shared_experts.down_proj." |
| 113 | + ), |
108 | 114 | } |
0 commit comments