Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
liym27 committed Feb 27, 2025
1 parent 45b5012 commit c1114d6
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 3 deletions.
2 changes: 1 addition & 1 deletion paddlenlp/transformers/qwen/modeling_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -538,7 +538,7 @@ def __init__(self, config):
self.recompute_granularity = config.recompute_granularity

self.wte = nn.Embedding(self.vocab_size, self.embed_dim)
self.wte.weight = dist.shard_tensor(self.wte.weight, get_mesh(), [dist.Replicate(), dist.Shard(0)])
self.wte.weight = dist.shard_tensor(self.wte.weight, get_mesh(), [dist.Replicate(), dist.Shard(1)])

Check warning on line 541 in paddlenlp/transformers/qwen/modeling_auto.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/transformers/qwen/modeling_auto.py#L541

Added line #L541 was not covered by tests
self.drop = nn.Dropout(config.emb_dropout_prob)

self.h = nn.LayerList(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
"model_name_or_path": "qwen/qwen-14b",
"tokenizer_name_or_path": "qwen/qwen-14b",
"input_dir": "./data",
"num_hidden_layers": 10,
"output_dir": "./checkpoints/qwen_pretrain_ckpts",
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 2,
Expand All @@ -25,8 +26,8 @@
"min_learning_rate": 5e-06,
"scale_loss": 1024,
"warmup_steps": 30,
"logging_steps": 5,
"max_steps": 200,
"logging_steps": 1,
"max_steps": 50,
"save_steps": 1000,
"eval_steps": 10000,
"weight_decay": 0.01,
Expand Down

0 comments on commit c1114d6

Please sign in to comment.