|
26 | 26 | NameRemapping, |
27 | 27 | QKVMerging, |
28 | 28 | QKVSlicing, |
29 | | - GatedMLPSlicing, |
30 | | - GatedMLPMerging, |
31 | 29 | ) |
32 | 30 |
|
33 | 31 | # Example on adding a new CausalLM. |
|
69 | 67 | "linear_fc2": NameRemapping("backbone.layers.{}.mixer.down_proj.", ROW_TP), |
70 | 68 | # MoE |
71 | 69 | "router": NameRemapping("model.layers.{}.mlp.gate.", REPLICATE), |
72 | | - "local_experts.linear_fc1": NameRemapping("model.layers.{}.mlp.experts.{}.up_proj", COL_ETP), |
73 | | - "local_experts.linear_fc2": NameRemapping("model.layers.{}.mlp.experts.{}.down_proj.", ROW_ETP), |
74 | | - "shared_experts.linear_fc1": NameRemapping("model.layers.{}.mlp.shared_experts.up_proj.", COL_TP), |
| 70 | + "local_experts.linear_fc1": NameRemapping("backbone.layers.{}.mlp.experts.{}.up_proj", COL_ETP), |
| 71 | + "local_experts.linear_fc2": NameRemapping("backbone.layers.{}.mlp.experts.{}.down_proj.", ROW_ETP), |
| 72 | + "shared_experts.linear_fc1": NameRemapping("backbone.layers.{}.mlp.shared_experts.up_proj.", COL_TP), |
75 | 73 | "shared_experts.linear_fc2": NameRemapping( |
76 | | - "model.layers.{}.mlp.shared_experts.down_proj.", ROW_TP |
| 74 | + "backbone.layers.{}.mlp.shared_experts.down_proj.", ROW_TP |
77 | 75 | ), |
78 | 76 |
|
79 | 77 | } |
|
101 | 99 | "linear_fc1": NameRemapping("backbone.layers.{}.mixer.up_proj."), |
102 | 100 | "linear_fc2": NameRemapping("backbone.layers.{}.mixer.down_proj."), |
103 | 101 | # MoE |
104 | | - "router": NameRemapping("model.layers.{}.mlp.gate."), |
105 | | - "local_experts.linear_fc1": NameRemapping("model.layers.{}.mlp.experts.{}.up_proj."), |
106 | | - "local_experts.linear_fc2": NameRemapping("model.layers.{}.mlp.experts.{}.down_proj."), |
107 | | - "shared_experts.linear_fc1": NameRemapping("model.layers.{}.mlp.shared_experts.up_proj."), |
108 | | - "shared_experts.linear_fc2": NameRemapping("model.layers.{}.mlp.shared_experts.down_proj."), |
| 102 | + "router": NameRemapping("backbone.layers.{}.mlp.gate."), |
| 103 | + "local_experts.linear_fc1": NameRemapping("backbone.layers.{}.mlp.experts.{}.up_proj."), |
| 104 | + "local_experts.linear_fc2": NameRemapping("backbone.layers.{}.mlp.experts.{}.down_proj."), |
| 105 | + "shared_experts.linear_fc1": NameRemapping("backbone.layers.{}.mlp.shared_experts.up_proj."), |
| 106 | + "shared_experts.linear_fc2": NameRemapping("backbone.layers.{}.mlp.shared_experts.down_proj."), |
109 | 107 |
|
110 | 108 | } |
0 commit comments