@@ -296,28 +296,28 @@ def build_test_list():
296296 "hsdp" ,
297297 ngpu = 4 ,
298298 ),
299- OverrideDefinitions (
300- [
301- [
302- "--experimental.context_parallel_degree=4" ,
303- "--experimental.context_parallel_rotate_method='allgather'" ,
304- ]
305- ],
306- "CP (allgather)" ,
307- "cp_allgather" ,
308- ngpu = 4 ,
309- ),
310- OverrideDefinitions (
311- [
312- [
313- "--experimental.context_parallel_degree=4" ,
314- "--experimental.context_parallel_rotate_method='alltoall'" ,
315- ]
316- ],
317- "CP (alltoall)" ,
318- "cp_alltoall" ,
319- ngpu = 4 ,
320- ),
299+ # OverrideDefinitions(
300+ # [
301+ # [
302+ # "--experimental.context_parallel_degree=4",
303+ # "--experimental.context_parallel_rotate_method='allgather'",
304+ # ]
305+ # ],
306+ # "CP (allgather)",
307+ # "cp_allgather",
308+ # ngpu=4,
309+ # ),
310+ # OverrideDefinitions(
311+ # [
312+ # [
313+ # "--experimental.context_parallel_degree=4",
314+ # "--experimental.context_parallel_rotate_method='alltoall'",
315+ # ]
316+ # ],
317+ # "CP (alltoall)",
318+ # "cp_alltoall",
319+ # ngpu=4,
320+ # ),
321321 OverrideDefinitions (
322322 [
323323 [
@@ -330,74 +330,74 @@ def build_test_list():
330330 "hsdp+tp" ,
331331 ngpu = 8 ,
332332 ),
333- OverrideDefinitions (
334- [
335- [
336- "--training.data_parallel_shard_degree=2" ,
337- "--experimental.context_parallel_degree=2" ,
338- ]
339- ],
340- "FSDP+CP" ,
341- "fsdp+cp" ,
342- ngpu = 4 ,
343- ),
344- OverrideDefinitions (
345- [
346- [
347- "--training.data_parallel_shard_degree=1" ,
348- "--training.data_parallel_replicate_degree=2" ,
349- "--experimental.context_parallel_degree=2" ,
350- ]
351- ],
352- "HSDP+CP (with dp_shard)" ,
353- "hsdp+cp_without_dp_shard" ,
354- ngpu = 4 ,
355- ),
356- OverrideDefinitions (
357- [
358- [
359- "--training.data_parallel_shard_degree=2" ,
360- "--training.data_parallel_replicate_degree=2" ,
361- "--experimental.context_parallel_degree=2" ,
362- ]
363- ],
364- "HSDP+CP (without dp_shard)" ,
365- "hsdp+cp_with_dp_shard" ,
366- ngpu = 8 ,
367- ),
368- OverrideDefinitions (
369- [
370- [
371- "--training.data_parallel_shard_degree=2" ,
372- "--training.tensor_parallel_degree=2" ,
373- "--experimental.context_parallel_degree=2" ,
374- ]
375- ],
376- "FSDP+TP+CP" ,
377- "fsdp+tp+cp" ,
378- ngpu = 8 ,
379- ),
380- OverrideDefinitions (
381- [
382- [
383- "--checkpoint.enable_checkpoint" ,
384- "--training.tensor_parallel_degree=2" ,
385- "--experimental.context_parallel_degree=2" ,
386- "--training.enable_cpu_offload" ,
387- "--optimizer.early_step_in_backward" ,
388- ],
389- [
390- "--training.tensor_parallel_degree=2" ,
391- "--experimental.context_parallel_degree=2" ,
392- "--training.data_parallel_replicate_degree=2" ,
393- "--training.enable_cpu_offload" ,
394- "--optimizer.early_step_in_backward" ,
395- ],
396- ],
397- "Enable CPU Offload, Optimizer in backward with TP, DP, CP" ,
398- "cpu_offload+opt_in_bwd+TP+DP+CP" ,
399- ngpu = 8 ,
400- ),
333+ # OverrideDefinitions(
334+ # [
335+ # [
336+ # "--training.data_parallel_shard_degree=2",
337+ # "--experimental.context_parallel_degree=2",
338+ # ]
339+ # ],
340+ # "FSDP+CP",
341+ # "fsdp+cp",
342+ # ngpu=4,
343+ # ),
344+ # OverrideDefinitions(
345+ # [
346+ # [
347+ # "--training.data_parallel_shard_degree=1",
348+ # "--training.data_parallel_replicate_degree=2",
349+ # "--experimental.context_parallel_degree=2",
350+ # ]
351+ # ],
352+ # "HSDP+CP (with dp_shard)",
353+ # "hsdp+cp_without_dp_shard",
354+ # ngpu=4,
355+ # ),
356+ # OverrideDefinitions(
357+ # [
358+ # [
359+ # "--training.data_parallel_shard_degree=2",
360+ # "--training.data_parallel_replicate_degree=2",
361+ # "--experimental.context_parallel_degree=2",
362+ # ]
363+ # ],
364+ # "HSDP+CP (without dp_shard)",
365+ # "hsdp+cp_with_dp_shard",
366+ # ngpu=8,
367+ # ),
368+ # OverrideDefinitions(
369+ # [
370+ # [
371+ # "--training.data_parallel_shard_degree=2",
372+ # "--training.tensor_parallel_degree=2",
373+ # "--experimental.context_parallel_degree=2",
374+ # ]
375+ # ],
376+ # "FSDP+TP+CP",
377+ # "fsdp+tp+cp",
378+ # ngpu=8,
379+ # ),
380+ # OverrideDefinitions(
381+ # [
382+ # [
383+ # "--checkpoint.enable_checkpoint",
384+ # "--training.tensor_parallel_degree=2",
385+ # "--experimental.context_parallel_degree=2",
386+ # "--training.enable_cpu_offload",
387+ # "--optimizer.early_step_in_backward",
388+ # ],
389+ # [
390+ # "--training.tensor_parallel_degree=2",
391+ # "--experimental.context_parallel_degree=2",
392+ # "--training.data_parallel_replicate_degree=2",
393+ # "--training.enable_cpu_offload",
394+ # "--optimizer.early_step_in_backward",
395+ # ],
396+ # ],
397+ # "Enable CPU Offload, Optimizer in backward with TP, DP, CP",
398+ # "cpu_offload+opt_in_bwd+TP+DP+CP",
399+ # ngpu=8,
400+ # ),
401401 OverrideDefinitions (
402402 [
403403 [
0 commit comments