Skip to content

Commit bfe8a04

Browse files
committed
Fix shard_model_param in FP32
Parameter shards should never participate in autograd, inconsistent with FP16/BF16
1 parent 543579d commit bfe8a04

1 file changed

Lines changed: 3 additions & 1 deletion

File tree

megatron/core/optimizer/distrib_optimizer.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -415,7 +415,9 @@ def _build_model_and_main_param_groups(
415415

416416
# fp32 params.
417417
elif model_param.type() == 'torch.cuda.FloatTensor':
418-
shard_model_param = model_param.view(-1)[param_range.start : param_range.end]
418+
shard_model_param = model_param.detach().view(-1)[
419+
param_range.start : param_range.end
420+
]
419421
model_fp32_params_this_group.append(model_param)
420422
shard_fp32_params_this_group.append(shard_model_param)
421423
tensor_parallel.copy_tensor_model_parallel_attributes(

0 commit comments

Comments
 (0)