Skip to content

Commit 0047aa2

Browse files
authored
Temporarily disable CP tests (#898)
PyTorch trunk has an offensive PR that breaks CP. It is better to get tests signals from other parallelisms than no signals.
1 parent 8a92fb6 commit 0047aa2

File tree

1 file changed

+90
-90
lines changed

1 file changed

+90
-90
lines changed

tests/integration_tests.py

Lines changed: 90 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -296,28 +296,28 @@ def build_test_list():
296296
"hsdp",
297297
ngpu=4,
298298
),
299-
OverrideDefinitions(
300-
[
301-
[
302-
"--experimental.context_parallel_degree=4",
303-
"--experimental.context_parallel_rotate_method='allgather'",
304-
]
305-
],
306-
"CP (allgather)",
307-
"cp_allgather",
308-
ngpu=4,
309-
),
310-
OverrideDefinitions(
311-
[
312-
[
313-
"--experimental.context_parallel_degree=4",
314-
"--experimental.context_parallel_rotate_method='alltoall'",
315-
]
316-
],
317-
"CP (alltoall)",
318-
"cp_alltoall",
319-
ngpu=4,
320-
),
299+
# OverrideDefinitions(
300+
# [
301+
# [
302+
# "--experimental.context_parallel_degree=4",
303+
# "--experimental.context_parallel_rotate_method='allgather'",
304+
# ]
305+
# ],
306+
# "CP (allgather)",
307+
# "cp_allgather",
308+
# ngpu=4,
309+
# ),
310+
# OverrideDefinitions(
311+
# [
312+
# [
313+
# "--experimental.context_parallel_degree=4",
314+
# "--experimental.context_parallel_rotate_method='alltoall'",
315+
# ]
316+
# ],
317+
# "CP (alltoall)",
318+
# "cp_alltoall",
319+
# ngpu=4,
320+
# ),
321321
OverrideDefinitions(
322322
[
323323
[
@@ -330,74 +330,74 @@ def build_test_list():
330330
"hsdp+tp",
331331
ngpu=8,
332332
),
333-
OverrideDefinitions(
334-
[
335-
[
336-
"--training.data_parallel_shard_degree=2",
337-
"--experimental.context_parallel_degree=2",
338-
]
339-
],
340-
"FSDP+CP",
341-
"fsdp+cp",
342-
ngpu=4,
343-
),
344-
OverrideDefinitions(
345-
[
346-
[
347-
"--training.data_parallel_shard_degree=1",
348-
"--training.data_parallel_replicate_degree=2",
349-
"--experimental.context_parallel_degree=2",
350-
]
351-
],
352-
"HSDP+CP (with dp_shard)",
353-
"hsdp+cp_without_dp_shard",
354-
ngpu=4,
355-
),
356-
OverrideDefinitions(
357-
[
358-
[
359-
"--training.data_parallel_shard_degree=2",
360-
"--training.data_parallel_replicate_degree=2",
361-
"--experimental.context_parallel_degree=2",
362-
]
363-
],
364-
"HSDP+CP (without dp_shard)",
365-
"hsdp+cp_with_dp_shard",
366-
ngpu=8,
367-
),
368-
OverrideDefinitions(
369-
[
370-
[
371-
"--training.data_parallel_shard_degree=2",
372-
"--training.tensor_parallel_degree=2",
373-
"--experimental.context_parallel_degree=2",
374-
]
375-
],
376-
"FSDP+TP+CP",
377-
"fsdp+tp+cp",
378-
ngpu=8,
379-
),
380-
OverrideDefinitions(
381-
[
382-
[
383-
"--checkpoint.enable_checkpoint",
384-
"--training.tensor_parallel_degree=2",
385-
"--experimental.context_parallel_degree=2",
386-
"--training.enable_cpu_offload",
387-
"--optimizer.early_step_in_backward",
388-
],
389-
[
390-
"--training.tensor_parallel_degree=2",
391-
"--experimental.context_parallel_degree=2",
392-
"--training.data_parallel_replicate_degree=2",
393-
"--training.enable_cpu_offload",
394-
"--optimizer.early_step_in_backward",
395-
],
396-
],
397-
"Enable CPU Offload, Optimizer in backward with TP, DP, CP",
398-
"cpu_offload+opt_in_bwd+TP+DP+CP",
399-
ngpu=8,
400-
),
333+
# OverrideDefinitions(
334+
# [
335+
# [
336+
# "--training.data_parallel_shard_degree=2",
337+
# "--experimental.context_parallel_degree=2",
338+
# ]
339+
# ],
340+
# "FSDP+CP",
341+
# "fsdp+cp",
342+
# ngpu=4,
343+
# ),
344+
# OverrideDefinitions(
345+
# [
346+
# [
347+
# "--training.data_parallel_shard_degree=1",
348+
# "--training.data_parallel_replicate_degree=2",
349+
# "--experimental.context_parallel_degree=2",
350+
# ]
351+
# ],
352+
# "HSDP+CP (with dp_shard)",
353+
# "hsdp+cp_without_dp_shard",
354+
# ngpu=4,
355+
# ),
356+
# OverrideDefinitions(
357+
# [
358+
# [
359+
# "--training.data_parallel_shard_degree=2",
360+
# "--training.data_parallel_replicate_degree=2",
361+
# "--experimental.context_parallel_degree=2",
362+
# ]
363+
# ],
364+
# "HSDP+CP (without dp_shard)",
365+
# "hsdp+cp_with_dp_shard",
366+
# ngpu=8,
367+
# ),
368+
# OverrideDefinitions(
369+
# [
370+
# [
371+
# "--training.data_parallel_shard_degree=2",
372+
# "--training.tensor_parallel_degree=2",
373+
# "--experimental.context_parallel_degree=2",
374+
# ]
375+
# ],
376+
# "FSDP+TP+CP",
377+
# "fsdp+tp+cp",
378+
# ngpu=8,
379+
# ),
380+
# OverrideDefinitions(
381+
# [
382+
# [
383+
# "--checkpoint.enable_checkpoint",
384+
# "--training.tensor_parallel_degree=2",
385+
# "--experimental.context_parallel_degree=2",
386+
# "--training.enable_cpu_offload",
387+
# "--optimizer.early_step_in_backward",
388+
# ],
389+
# [
390+
# "--training.tensor_parallel_degree=2",
391+
# "--experimental.context_parallel_degree=2",
392+
# "--training.data_parallel_replicate_degree=2",
393+
# "--training.enable_cpu_offload",
394+
# "--optimizer.early_step_in_backward",
395+
# ],
396+
# ],
397+
# "Enable CPU Offload, Optimizer in backward with TP, DP, CP",
398+
# "cpu_offload+opt_in_bwd+TP+DP+CP",
399+
# ngpu=8,
400+
# ),
401401
OverrideDefinitions(
402402
[
403403
[

0 commit comments

Comments
 (0)