|  | 
| 67 | 67 |     "linear_fc2": NameRemapping("backbone.layers.{}.mixer.down_proj.", ROW_TP), | 
| 68 | 68 |     # MoE | 
| 69 | 69 |     "router": NameRemapping("model.layers.{}.mlp.gate.", REPLICATE), | 
| 70 |  | -    "local_experts.linear_fc1": NameRemapping("backbone.layers.{}.mlp.experts.{}.up_proj", COL_ETP), | 
| 71 |  | -    "local_experts.linear_fc2": NameRemapping("backbone.layers.{}.mlp.experts.{}.down_proj.", ROW_ETP), | 
| 72 |  | -    "shared_experts.linear_fc1": NameRemapping("backbone.layers.{}.mlp.shared_experts.up_proj.", COL_TP), | 
|  | 70 | +    "local_experts.linear_fc1": NameRemapping("backbone.layers.{}.mixer.experts.{}.up_proj", COL_ETP), | 
|  | 71 | +    "local_experts.linear_fc2": NameRemapping("backbone.layers.{}.mixer.experts.{}.down_proj.", ROW_ETP), | 
|  | 72 | +    "shared_experts.linear_fc1": NameRemapping("backbone.layers.{}.mixer.shared_experts.up_proj.", COL_TP), | 
| 73 | 73 |     "shared_experts.linear_fc2": NameRemapping( | 
| 74 |  | -        "backbone.layers.{}.mlp.shared_experts.down_proj.", ROW_TP | 
|  | 74 | +        "backbone.layers.{}.mixer.shared_experts.down_proj.", ROW_TP | 
| 75 | 75 |     ), | 
| 76 | 76 | 
 | 
| 77 | 77 | } | 
|  | 
| 100 | 100 |     "linear_fc2": NameRemapping("backbone.layers.{}.mixer.down_proj."), | 
| 101 | 101 |     # MoE | 
| 102 | 102 |     "router": NameRemapping("backbone.layers.{}.mlp.gate."), | 
| 103 |  | -    "local_experts.linear_fc1": NameRemapping("backbone.layers.{}.mlp.experts.{}.up_proj."), | 
| 104 |  | -    "local_experts.linear_fc2": NameRemapping("backbone.layers.{}.mlp.experts.{}.down_proj."), | 
| 105 |  | -    "shared_experts.linear_fc1": NameRemapping("backbone.layers.{}.mlp.shared_experts.up_proj."), | 
| 106 |  | -    "shared_experts.linear_fc2": NameRemapping("backbone.layers.{}.mlp.shared_experts.down_proj."), | 
|  | 103 | +    "local_experts.linear_fc1": NameRemapping("backbone.layers.{}.mixer.experts.{}.up_proj."), | 
|  | 104 | +    "local_experts.linear_fc2": NameRemapping("backbone.layers.{}.mixer.experts.{}.down_proj."), | 
|  | 105 | +    "shared_experts.linear_fc1": NameRemapping("backbone.layers.{}.mixer.shared_experts.up_proj."), | 
|  | 106 | +    "shared_experts.linear_fc2": NameRemapping("backbone.layers.{}.mixer.shared_experts.down_proj."), | 
| 107 | 107 | 
 | 
| 108 | 108 | } | 
0 commit comments