File tree Expand file tree Collapse file tree 3 files changed +4
-6
lines changed
Expand file tree Collapse file tree 3 files changed +4
-6
lines changed Original file line number Diff line number Diff line change @@ -20,6 +20,10 @@ TRAIN_FILE=${TRAIN_FILE:-"torchtitan.train"}
2020COMM_MODE=${COMM_MODE:- " " }
2121
2222TORCHFT_LIGHTHOUSE=${TORCHFT_LIGHTHOUSE:- " http://localhost:29510" }
23+ export NCCL_NVLS_ENABLE=0
24+ export NVSHMEM_DISABLE_NVLS=0
25+ export TORCH_SHOW_CPP_STACKTRACES=1
26+ export TORCH_CPP_LOG_LEVEL=INFO
2327
2428if [ -n " $COMM_MODE " ]; then
2529 # Communication mode specified: validate configuration or run in debug mode
Original file line number Diff line number Diff line change @@ -78,7 +78,6 @@ def build_features_test_list() -> list[OverrideDefinitions]:
7878 "2D compile" ,
7979 "2d_compile" ,
8080 ),
81- # TODO: re-enable this test once the async TP CI issue is fixed
8281 OverrideDefinitions (
8382 [
8483 [
@@ -89,7 +88,6 @@ def build_features_test_list() -> list[OverrideDefinitions]:
8988 ],
9089 "2D async TP compile" ,
9190 "2d_asynctp_compile" ,
92- disabled = True ,
9391 ),
9492 OverrideDefinitions (
9593 [
Original file line number Diff line number Diff line change @@ -19,7 +19,6 @@ def build_h100_tests_list() -> list[OverrideDefinitions]:
1919 same root config file.
2020 """
2121 integration_tests_flavors = [
22- # TODO: re-enable this test once the async TP issue is fixed
2322 OverrideDefinitions (
2423 [
2524 [
@@ -30,7 +29,6 @@ def build_h100_tests_list() -> list[OverrideDefinitions]:
3029 ],
3130 "2D async TP compile" ,
3231 "2d_asynctp_compile" ,
33- disabled = True ,
3432 ),
3533 OverrideDefinitions (
3634 [
@@ -43,7 +41,6 @@ def build_h100_tests_list() -> list[OverrideDefinitions]:
4341 "Float8 test" ,
4442 "float8" ,
4543 ),
46- # TODO: re-enable this test once the async TP issue is fixed
4744 OverrideDefinitions (
4845 [
4946 [
@@ -60,7 +57,6 @@ def build_h100_tests_list() -> list[OverrideDefinitions]:
6057 "FSDP+async TP+PP+torch.compile+Float8" ,
6158 "fsdp+tp+cp+compile+float8" ,
6259 ngpu = 8 ,
63- disabled = True ,
6460 ),
6561 OverrideDefinitions (
6662 [
You can’t perform that action at this time.
0 commit comments