pytorch · ruisizhang123 · Nov 20, 2025 · Nov 26, 2025 · tianyu-l · Nov 25, 2025
@@ -50,7 +50,11 @@ jobs:
         python -m pip install --force-reinstall --pre torch --index-url https://download.pytorch.org/whl/nightly/cu126
 
         mkdir artifacts-to-be-uploaded
-        python -m torchtitan.experiments.simple_fsdp.tests.integration_tests artifacts-to-be-uploaded --ngpu 8
+        # Run front-end integration tests of SimpleFSDP
+        python -m torchtitan.experiments.simple_fsdp.tests.frontend_integration_tests artifacts-to-be-uploaded --ngpu 8
+
+        # Run backend pass integration tests of SimpleFSDP
+        python -m torchtitan.experiments.simple_fsdp.tests.compiler_pass_integration_tests artifacts-to-be-uploaded --ngpu 8
 
         # Run the numerics unit tests of SimpleFSDP
         torchrun --nproc-per-node=8 -m pytest torchtitan/experiments/simple_fsdp/tests/test_numerics.py -v

@@ -16,12 +16,23 @@ NGPU=${NGPU:-"8"}
 export LOG_RANK=${LOG_RANK:-0}
 CONFIG_FILE=${CONFIG_FILE:-"./torchtitan/models/llama3/train_configs/debug_model.toml"}
 TRAIN_FILE=${TRAIN_FILE:-"torchtitan.train"}
+
+set +x
+copy_args=("$@")
+for ((i=0; i<${#copy_args[@]}; i++)); do
+  if [[ ${copy_args[i]} == --comm.mode ]]; then
+    CONFIG_COMM_MODE="${copy_args[i+1]}"
+  fi
+done
+set -x
+
+CONFIG_COMM_MODE=${CONFIG_COMM_MODE:-"default"}
 # COMM_MODE options: "fake_backend" (dry run), "local_tensor" (debug mode), or empty for normal training
-COMM_MODE=${COMM_MODE:-""}
+COMM_MODE=${COMM_MODE:-$CONFIG_COMM_MODE}
 
 TORCHFT_LIGHTHOUSE=${TORCHFT_LIGHTHOUSE:-"http://localhost:29510"}
 
-if [ -n "$COMM_MODE" ]; then
+if [ "$COMM_MODE" != "default" ]; then
     # Communication mode specified: validate configuration or run in debug mode
     echo "Running with comm_mode=${COMM_MODE}"
     NGPU="${NGPU}" LOCAL_RANK=0 python3 -m "${TRAIN_FILE}" --job.config_file "${CONFIG_FILE}" "$@" --comm.mode=${COMM_MODE} --training.steps=1

diff --git a/torchtitan/experiments/simple_fsdp/tests/compiler_pass_integration_tests.py b/torchtitan/experiments/simple_fsdp/tests/compiler_pass_integration_tests.py
@@ -0,0 +1,255 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import os
+
+from tests.integration_tests import OverrideDefinitions
+from tests.integration_tests.run_tests import run_tests
+
+
+def build_simple_fsdp_test_list() -> list[OverrideDefinitions]:
+    """
+    key is the config file name and value is a list of OverrideDefinitions
+    that is used to generate variations of integration tests based on the
+    same root config file.
+    """
+    integration_tests_flavors = [
+        OverrideDefinitions(
+            [
+                [
+                    "--model.name simple_fsdp.llama3",
+                    "--compile.enable",
+                    "--job.custom_config_module=torchtitan.experiments.simple_fsdp.job_config",
+                    "--compile.backend aot_eager",
+                    "--compile.graph_passes auto_bucketing",
+                    "--comm.mode fake_backend",
+                ],
+            ],
+            "1D+autobucketing",
+            "1d_autobucketing",
+            ngpu=8,
+        ),
+        OverrideDefinitions(
+            [
+                [
+                    "--model.name simple_fsdp.llama3",
+                    "--compile.enable",
+                    "--job.custom_config_module=torchtitan.experiments.simple_fsdp.job_config",
+                    "--compile.backend aot_eager",
+                    "--compile.graph_passes transformer_block_bucketing",
+                    "--comm.mode fake_backend",
+                ],
+            ],
+            "1D+transformer_block_bucketing",
+            "1d_transformer_block_bucketing",
+            ngpu=8,
+        ),
+        OverrideDefinitions(
+            [
+                [
+                    "--model.name simple_fsdp.llama3",
+                    "--parallelism.tensor_parallel_degree 2",
+                    "--compile.enable",
+                    "--job.custom_config_module=torchtitan.experiments.simple_fsdp.job_config",
+                    "--compile.backend aot_eager",
+                    "--compile.graph_passes auto_bucketing",
+                    "--comm.mode fake_backend",
+                ],
+            ],
+            "2D+autobucketing",
+            "2d_autobucketing",
+            ngpu=8,
+        ),
+        OverrideDefinitions(
+            [
+                [
+                    "--model.name simple_fsdp.llama3",
+                    "--parallelism.tensor_parallel_degree 2",
+                    "--compile.enable",
+                    "--job.custom_config_module=torchtitan.experiments.simple_fsdp.job_config",
+                    "--compile.backend aot_eager",
+                    "--compile.graph_passes transformer_block_bucketing",
+                    "--comm.mode fake_backend",
+                ],
+            ],
+            "2D+transformer_block_bucketing",
+            "2d_transformer_block_bucketing",
+            ngpu=8,
+        ),
+        # TODO(ruisizhang123): add back after passes + PP is supported
+        # OverrideDefinitions(
+        #     [
+        #         [
+        #             "--model.name simple_fsdp.llama3",
+        #             "--parallelism.tensor_parallel_degree 2",
+        #             "--parallelism.pipeline_parallel_degree 2",
+        #             "--compile.enable",
+        #             "--job.custom_config_module=torchtitan.experiments.simple_fsdp.job_config",
+        #             "--compile.backend aot_eager",
+        #             "--compile.graph_passes auto_bucketing",
+        #             "--comm.mode fake_backend",
+        #         ],
+        #     ],
+        #     "3D+autobucketing",
+        #     "3d_autobucketing",
+        #     ngpu=8,
+        # ),
+        # OverrideDefinitions(
+        #     [
+        #         [
+        #             "--model.name simple_fsdp.llama3",
+        #             "--parallelism.tensor_parallel_degree 2",
+        #             "--parallelism.pipeline_parallel_degree 2",
+        #             "--compile.enable",
+        #             "--job.custom_config_module=torchtitan.experiments.simple_fsdp.job_config",
+        #             "--compile.backend aot_eager",
+        #             "--compile.graph_passes transformer_block_bucketing",
+        #             "--comm.mode fake_backend",
+        #         ],
+        #     ],
+        #     "3D+transformer_block_bucketing",
+        #     "3d_transformer_block_bucketing",
+        #     ngpu=8,
+        # ),
+        # OverrideDefinitions(
+        #     [
+        #         [
+        #             "--model.name simple_fsdp.llama3",
+        #             "--parallelism.tensor_parallel_degree 2",
+        #             "--parallelism.context_parallel_degree 2",
+        #             "--compile.enable",
+        #             "--job.custom_config_module=torchtitan.experiments.simple_fsdp.job_config",
+        #             "--compile.backend aot_eager",
+        #             "--compile.graph_passes auto_bucketing",
+        #             "--comm.mode fake_backend",
+        #         ],
+        #     ],
+        #     "FSDP+TP+CP+autobucketing",
+        #     "fsdp+tp+cp_autobucketing",
+        #     ngpu=8,
+        # ),
+        OverrideDefinitions(
+            [
+                [
+                    "--model.name simple_fsdp.llama3",
+                    "--parallelism.tensor_parallel_degree 2",
+                    "--parallelism.context_parallel_degree 2",
+                    "--compile.enable",
+                    "--job.custom_config_module=torchtitan.experiments.simple_fsdp.job_config",
+                    "--compile.backend aot_eager",
+                    "--compile.graph_passes transformer_block_bucketing",
+                    "--comm.mode fake_backend",
+                ],
+            ],
+            "FSDP+TP+CP+transformer_block_bucketing",
+            "fsdp+tp+cp_transformer_block_bucketing",
+            ngpu=8,
+        ),
+        OverrideDefinitions(
+            [
+                [
+                    "--model.name simple_fsdp.deepseek_v3",
+                    "--parallelism.data_parallel_shard_degree 4",
+                    "--parallelism.expert_parallel_degree 2",
+                    "--job.custom_config_module=torchtitan.experiments.simple_fsdp.job_config",
+                    "--compile.backend aot_eager",
+                    "--compile.graph_passes auto_bucketing",
+                    "--comm.mode fake_backend",
+                ],
+            ],
+            "FSDP+EP+autobucketing",
+            "fsdp+ep_autobucketing",
+            ngpu=4,
+        ),
+        OverrideDefinitions(
+            [
+                [
+                    "--model.name simple_fsdp.deepseek_v3",
+                    "--parallelism.data_parallel_shard_degree 4",
+                    "--parallelism.expert_parallel_degree 2",
+                    "--job.custom_config_module=torchtitan.experiments.simple_fsdp.job_config",
+                    "--compile.backend aot_eager",
+                    "--compile.graph_passes transformer_block_bucketing",
+                    "--comm.mode fake_backend",
+                ],
+            ],
+            "FSDP+EP+transformer_block_bucketing",
+            "fsdp+ep_transformer_block_bucketing",
+            ngpu=4,
+        ),
+        OverrideDefinitions(
+            [
+                [
+                    "--model.name simple_fsdp.deepseek_v3",
+                    "--parallelism.data_parallel_shard_degree 2",
+                    "--parallelism.tensor_parallel_degree 2",
+                    "--parallelism.expert_parallel_degree 4",
+                    "--parallelism.expert_tensor_parallel_degree 1",
+                    "--job.custom_config_module=torchtitan.experiments.simple_fsdp.job_config",
+                    "--compile.backend aot_eager",
+                    "--compile.graph_passes auto_bucketing",
+                    "--comm.mode fake_backend",
+                ],
+            ],
+            "FSDP+TP+EP+autobucketing",
+            "fsdp+tp+ep_autobucketing",
+            ngpu=4,
+        ),
+        OverrideDefinitions(
+            [
+                [
+                    "--model.name simple_fsdp.deepseek_v3",
+                    "--parallelism.data_parallel_shard_degree 2",
+                    "--parallelism.tensor_parallel_degree 2",
+                    "--parallelism.expert_parallel_degree 4",
+                    "--parallelism.expert_tensor_parallel_degree 1",
+                    "--job.custom_config_module=torchtitan.experiments.simple_fsdp.job_config",
+                    "--compile.backend aot_eager",
+                    "--compile.graph_passes transformer_block_bucketing",
+                    "--comm.mode fake_backend",
+                ],
+            ],
+            "FSDP+TP+EP+transformer_block_bucketing",
+            "fsdp+tp+ep_transformer_block_bucketing",
+            ngpu=4,
+        ),
+    ]
+    return integration_tests_flavors
+
+
+_TEST_SUITES_FUNCTION = {
+    "simple_fsdp": build_simple_fsdp_test_list,
+}
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("output_dir")
+    parser.add_argument(
+        "--config_path",
+        default="./tests/integration_tests/base_config.toml",
+        help="Base config path for integration tests. This is the config that will be used as a base for all tests.",
+    )
+    parser.add_argument(
+        "--test_name",
+        default="all",
+        help="test to run, acceptable values: `test_name` in `build_test_list` (default: all)",
+    )
+    parser.add_argument("--ngpu", default=8, type=int)
+    args = parser.parse_args()
+
+    if not os.path.exists(args.output_dir):
+        os.makedirs(args.output_dir)
+    if os.listdir(args.output_dir):
+        raise RuntimeError("Please provide an empty output directory.")
+
+    test_list = _TEST_SUITES_FUNCTION["simple_fsdp"]()
+    run_tests(args, test_list)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/...ts/simple_fsdp/tests/integration_tests.py → ..._fsdp/tests/frontend_integration_tests.py b/...ts/simple_fsdp/tests/integration_tests.py → ..._fsdp/tests/frontend_integration_tests.py
@@ -29,32 +29,6 @@ def build_simple_fsdp_test_list() -> list[OverrideDefinitions]:
             "1D",
             "1d",
         ),
-        OverrideDefinitions(
-            [
-                [
-                    "--model.name simple_fsdp.llama3",
-                    "--compile.enable",
-                    "--job.custom_config_module=torchtitan.experiments.simple_fsdp.job_config",
-                    "--compile.backend aot_eager",
-                    "--compile.graph_passes auto_bucketing",
-                ],
-            ],
-            "1D+autobucketing",
-            "1d_autobucketing",
-        ),
-        OverrideDefinitions(
-            [
-                [
-                    "--model.name simple_fsdp.llama3",
-                    "--compile.enable",
-                    "--job.custom_config_module=torchtitan.experiments.simple_fsdp.job_config",
-                    "--compile.backend aot_eager",
-                    "--compile.graph_passes transformer_block_bucketing",
-                ],
-            ],
-            "1D+transformer_block_bucketing",
-            "1d_transformer_block_bucketing",
-        ),
         OverrideDefinitions(
             [
                 [