From bc9d66049d885f0c2125bafc8339e5b95c38c91d Mon Sep 17 00:00:00 2001 From: ruisizhang123 Date: Thu, 20 Nov 2025 15:23:28 -0800 Subject: [PATCH 1/2] add CI to guard compiler optimization passes --- .../integration_test_8gpu_simple_fsdp.yaml | 6 +- tests/integration_tests/run_tests.py | 13 +- .../tests/compiler_pass_integration_tests.py | 261 ++++++++++++++++++ ...tests.py => frontend_integration_tests.py} | 26 -- 4 files changed, 277 insertions(+), 29 deletions(-) create mode 100755 torchtitan/experiments/simple_fsdp/tests/compiler_pass_integration_tests.py rename torchtitan/experiments/simple_fsdp/tests/{integration_tests.py => frontend_integration_tests.py} (92%) diff --git a/.github/workflows/integration_test_8gpu_simple_fsdp.yaml b/.github/workflows/integration_test_8gpu_simple_fsdp.yaml index 9a1a0a2866..d0e642a7e4 100644 --- a/.github/workflows/integration_test_8gpu_simple_fsdp.yaml +++ b/.github/workflows/integration_test_8gpu_simple_fsdp.yaml @@ -50,7 +50,11 @@ jobs: python -m pip install --force-reinstall --pre torch --index-url https://download.pytorch.org/whl/nightly/cu126 mkdir artifacts-to-be-uploaded - python -m torchtitan.experiments.simple_fsdp.tests.integration_tests artifacts-to-be-uploaded --ngpu 8 + # Run front-end integration tests of SimpleFSDP + python -m torchtitan.experiments.simple_fsdp.tests.frontend_integration_tests artifacts-to-be-uploaded --ngpu 8 + + # Run backend pass integration tests of SimpleFSDP + python -m torchtitan.experiments.simple_fsdp.tests.compiler_pass_integration_tests artifacts-to-be-uploaded --ngpu 8 --comm_mode local_tensor # Run the numerics unit tests of SimpleFSDP torchrun --nproc-per-node=8 -m pytest torchtitan/experiments/simple_fsdp/tests/test_numerics.py -v diff --git a/tests/integration_tests/run_tests.py b/tests/integration_tests/run_tests.py index 7081215c83..336c7e815c 100644 --- a/tests/integration_tests/run_tests.py +++ b/tests/integration_tests/run_tests.py @@ -29,7 +29,9 @@ def _run_cmd(cmd): return subprocess.run([cmd], text=True, shell=True) -def run_single_test(test_flavor: OverrideDefinitions, full_path: str, output_dir: str): +def run_single_test( + test_flavor: OverrideDefinitions, full_path: str, output_dir: str, comm_mode: str +): # run_test supports sequence of tests. test_name = test_flavor.test_name dump_folder_arg = f"--job.dump_folder {output_dir}/{test_name}" @@ -37,7 +39,8 @@ def run_single_test(test_flavor: OverrideDefinitions, full_path: str, output_dir all_ranks = ",".join(map(str, range(test_flavor.ngpu))) for idx, override_arg in enumerate(test_flavor.override_args): - cmd = f"CONFIG_FILE={full_path} NGPU={test_flavor.ngpu} LOG_RANK={all_ranks} ./run_train.sh" + cmd = f"CONFIG_FILE={full_path} COMM_MODE={comm_mode} NGPU={test_flavor.ngpu} LOG_RANK={all_ranks} ./run_train.sh" + # dump compile trace for debugging purpose cmd = f'TORCH_TRACE="{output_dir}/{test_name}/compile_trace" ' + cmd @@ -119,6 +122,12 @@ def main(): parser.add_argument( "output_dir", help="Directory to dump results generated by tests" ) + parser.add_argument( + "comm_mode", + default="default", + choices=["default", "fake_backend", "local_tensor"], + help="Communication mode to validate tests", + ) parser.add_argument( "--gpu_arch_type", default="cuda", diff --git a/torchtitan/experiments/simple_fsdp/tests/compiler_pass_integration_tests.py b/torchtitan/experiments/simple_fsdp/tests/compiler_pass_integration_tests.py new file mode 100755 index 0000000000..f603e1e605 --- /dev/null +++ b/torchtitan/experiments/simple_fsdp/tests/compiler_pass_integration_tests.py @@ -0,0 +1,261 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import os + +from tests.integration_tests import OverrideDefinitions +from tests.integration_tests.run_tests import run_tests + + +def build_simple_fsdp_test_list() -> list[OverrideDefinitions]: + """ + key is the config file name and value is a list of OverrideDefinitions + that is used to generate variations of integration tests based on the + same root config file. + """ + integration_tests_flavors = [ + OverrideDefinitions( + [ + [ + "--model.name simple_fsdp.llama3", + "--model.flavor 8B", + "--compile.enable", + "--job.custom_config_module=torchtitan.experiments.simple_fsdp.job_config", + "--compile.backend aot_eager", + "--compile.graph_passes auto_bucketing", + ], + ], + "1D+autobucketing", + "1d_autobucketing", + ngpu=8, + ), + OverrideDefinitions( + [ + [ + "--model.name simple_fsdp.llama3", + "--model.flavor 8B", + "--compile.enable", + "--job.custom_config_module=torchtitan.experiments.simple_fsdp.job_config", + "--compile.backend aot_eager", + "--compile.graph_passes transformer_block_bucketing", + ], + ], + "1D+transformer_block_bucketing", + "1d_transformer_block_bucketing", + ngpu=8, + ), + OverrideDefinitions( + [ + [ + "--model.name simple_fsdp.llama3", + "--model.flavor 8B", + "--parallelism.tensor_parallel_degree 2", + "--compile.enable", + "--job.custom_config_module=torchtitan.experiments.simple_fsdp.job_config", + "--compile.backend aot_eager", + "--compile.graph_passes auto_bucketing", + ], + ], + "2D+autobucketing", + "2d_autobucketing", + ngpu=8, + ), + OverrideDefinitions( + [ + [ + "--model.name simple_fsdp.llama3", + "--model.flavor 8B", + "--parallelism.tensor_parallel_degree 2", + "--compile.enable", + "--job.custom_config_module=torchtitan.experiments.simple_fsdp.job_config", + "--compile.backend aot_eager", + "--compile.graph_passes transformer_block_bucketing", + ], + ], + "2D+transformer_block_bucketing", + "2d_transformer_block_bucketing", + ngpu=8, + ), + # TODO(ruisizhang123): add back after passes + PP is supported + # OverrideDefinitions( + # [ + # [ + # "--model.name simple_fsdp.llama3", + # "--model.flavor 8B", + # "--parallelism.tensor_parallel_degree 2", + # "--parallelism.pipeline_parallel_degree 2", + # "--compile.enable", + # "--job.custom_config_module=torchtitan.experiments.simple_fsdp.job_config", + # "--compile.backend aot_eager", + # "--compile.graph_passes auto_bucketing", + # ], + # ], + # "3D+autobucketing", + # "3d_autobucketing", + # ngpu=8, + # ), + # OverrideDefinitions( + # [ + # [ + # "--model.name simple_fsdp.llama3", + # "--model.flavor 8B", + # "--parallelism.tensor_parallel_degree 2", + # "--parallelism.pipeline_parallel_degree 2", + # "--compile.enable", + # "--job.custom_config_module=torchtitan.experiments.simple_fsdp.job_config", + # "--compile.backend aot_eager", + # "--compile.graph_passes transformer_block_bucketing", + # ], + # ], + # "3D+transformer_block_bucketing", + # "3d_transformer_block_bucketing", + # ngpu=8, + # ), + # OverrideDefinitions( + # [ + # [ + # "--model.name simple_fsdp.llama3", + # "--model.flavor 8B", + # "--parallelism.tensor_parallel_degree 2", + # "--parallelism.context_parallel_degree 2", + # "--compile.enable", + # "--job.custom_config_module=torchtitan.experiments.simple_fsdp.job_config", + # "--compile.backend aot_eager", + # "--compile.graph_passes auto_bucketing", + # ], + # ], + # "FSDP+TP+CP+autobucketing", + # "fsdp+tp+cp_autobucketing", + # ngpu=8, + # ), + OverrideDefinitions( + [ + [ + "--model.name simple_fsdp.llama3", + "--model.flavor 8B", + "--parallelism.tensor_parallel_degree 2", + "--parallelism.context_parallel_degree 2", + "--compile.enable", + "--job.custom_config_module=torchtitan.experiments.simple_fsdp.job_config", + "--compile.backend aot_eager", + "--compile.graph_passes transformer_block_bucketing", + ], + ], + "FSDP+TP+CP+transformer_block_bucketing", + "fsdp+tp+cp_transformer_block_bucketing", + ngpu=8, + ), + OverrideDefinitions( + [ + [ + "--model.name simple_fsdp.deepseek_v3", + "--model.flavor 16B", + "--parallelism.data_parallel_shard_degree 4", + "--parallelism.expert_parallel_degree 2", + "--job.custom_config_module=torchtitan.experiments.simple_fsdp.job_config", + "--compile.backend aot_eager", + "--compile.graph_passes auto_bucketing", + ], + ], + "FSDP+EP+autobucketing", + "fsdp+ep_autobucketing", + ngpu=4, + ), + OverrideDefinitions( + [ + [ + "--model.name simple_fsdp.deepseek_v3", + "--model.flavor 16B", + "--parallelism.data_parallel_shard_degree 4", + "--parallelism.expert_parallel_degree 2", + "--job.custom_config_module=torchtitan.experiments.simple_fsdp.job_config", + "--compile.backend aot_eager", + "--compile.graph_passes transformer_block_bucketing", + ], + ], + "FSDP+EP+transformer_block_bucketing", + "fsdp+ep_transformer_block_bucketing", + ngpu=4, + ), + OverrideDefinitions( + [ + [ + "--model.name simple_fsdp.deepseek_v3", + "--model.flavor 16B", + "--parallelism.data_parallel_shard_degree 2", + "--parallelism.tensor_parallel_degree 2", + "--parallelism.expert_parallel_degree 4", + "--parallelism.expert_tensor_parallel_degree 1", + "--job.custom_config_module=torchtitan.experiments.simple_fsdp.job_config", + "--compile.backend aot_eager", + "--compile.graph_passes auto_bucketing", + ], + ], + "FSDP+TP+EP+autobucketing", + "fsdp+tp+ep_autobucketing", + ngpu=4, + ), + OverrideDefinitions( + [ + [ + "--model.name simple_fsdp.deepseek_v3", + "--model.flavor 16B", + "--parallelism.data_parallel_shard_degree 2", + "--parallelism.tensor_parallel_degree 2", + "--parallelism.expert_parallel_degree 4", + "--parallelism.expert_tensor_parallel_degree 1", + "--job.custom_config_module=torchtitan.experiments.simple_fsdp.job_config", + "--compile.backend aot_eager", + "--compile.graph_passes transformer_block_bucketing", + ], + ], + "FSDP+TP+EP+transformer_block_bucketing", + "fsdp+tp+ep_transformer_block_bucketing", + ngpu=4, + ), + ] + return integration_tests_flavors + + +_TEST_SUITES_FUNCTION = { + "simple_fsdp": build_simple_fsdp_test_list, +} + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("output_dir") + parser.add_argument( + "--comm_mode", + default="default", + choices=["default", "fake_backend", "local_tensor"], + help="Communication mode to validate tests", + ) + parser.add_argument( + "--config_path", + default="./tests/integration_tests/base_config.toml", + help="Base config path for integration tests. This is the config that will be used as a base for all tests.", + ) + parser.add_argument( + "--test_name", + default="all", + help="test to run, acceptable values: `test_name` in `build_test_list` (default: all)", + ) + parser.add_argument("--ngpu", default=8, type=int) + args = parser.parse_args() + + if not os.path.exists(args.output_dir): + os.makedirs(args.output_dir) + if os.listdir(args.output_dir): + raise RuntimeError("Please provide an empty output directory.") + + test_list = _TEST_SUITES_FUNCTION["simple_fsdp"]() + run_tests(args, test_list) + + +if __name__ == "__main__": + main() diff --git a/torchtitan/experiments/simple_fsdp/tests/integration_tests.py b/torchtitan/experiments/simple_fsdp/tests/frontend_integration_tests.py similarity index 92% rename from torchtitan/experiments/simple_fsdp/tests/integration_tests.py rename to torchtitan/experiments/simple_fsdp/tests/frontend_integration_tests.py index c3cee7b52f..b6dd3dead3 100755 --- a/torchtitan/experiments/simple_fsdp/tests/integration_tests.py +++ b/torchtitan/experiments/simple_fsdp/tests/frontend_integration_tests.py @@ -29,32 +29,6 @@ def build_simple_fsdp_test_list() -> list[OverrideDefinitions]: "1D", "1d", ), - OverrideDefinitions( - [ - [ - "--model.name simple_fsdp.llama3", - "--compile.enable", - "--job.custom_config_module=torchtitan.experiments.simple_fsdp.job_config", - "--compile.backend aot_eager", - "--compile.graph_passes auto_bucketing", - ], - ], - "1D+autobucketing", - "1d_autobucketing", - ), - OverrideDefinitions( - [ - [ - "--model.name simple_fsdp.llama3", - "--compile.enable", - "--job.custom_config_module=torchtitan.experiments.simple_fsdp.job_config", - "--compile.backend aot_eager", - "--compile.graph_passes transformer_block_bucketing", - ], - ], - "1D+transformer_block_bucketing", - "1d_transformer_block_bucketing", - ), OverrideDefinitions( [ [ From 6d6f3970377cd581f74ac38899836d2fc2098e56 Mon Sep 17 00:00:00 2001 From: ruisizhang123 Date: Tue, 25 Nov 2025 19:24:43 -0800 Subject: [PATCH 2/2] add torchrun version --- .../integration_test_8gpu_simple_fsdp.yaml | 2 +- run_train.sh | 15 ++++++++-- tests/integration_tests/run_tests.py | 13 ++------ .../tests/compiler_pass_integration_tests.py | 30 ++++++++----------- 4 files changed, 28 insertions(+), 32 deletions(-) diff --git a/.github/workflows/integration_test_8gpu_simple_fsdp.yaml b/.github/workflows/integration_test_8gpu_simple_fsdp.yaml index d0e642a7e4..96220f0338 100644 --- a/.github/workflows/integration_test_8gpu_simple_fsdp.yaml +++ b/.github/workflows/integration_test_8gpu_simple_fsdp.yaml @@ -54,7 +54,7 @@ jobs: python -m torchtitan.experiments.simple_fsdp.tests.frontend_integration_tests artifacts-to-be-uploaded --ngpu 8 # Run backend pass integration tests of SimpleFSDP - python -m torchtitan.experiments.simple_fsdp.tests.compiler_pass_integration_tests artifacts-to-be-uploaded --ngpu 8 --comm_mode local_tensor + python -m torchtitan.experiments.simple_fsdp.tests.compiler_pass_integration_tests artifacts-to-be-uploaded --ngpu 8 # Run the numerics unit tests of SimpleFSDP torchrun --nproc-per-node=8 -m pytest torchtitan/experiments/simple_fsdp/tests/test_numerics.py -v diff --git a/run_train.sh b/run_train.sh index 87558a782d..391286d5df 100755 --- a/run_train.sh +++ b/run_train.sh @@ -16,12 +16,23 @@ NGPU=${NGPU:-"8"} export LOG_RANK=${LOG_RANK:-0} CONFIG_FILE=${CONFIG_FILE:-"./torchtitan/models/llama3/train_configs/debug_model.toml"} TRAIN_FILE=${TRAIN_FILE:-"torchtitan.train"} + +set +x +copy_args=("$@") +for ((i=0; i<${#copy_args[@]}; i++)); do + if [[ ${copy_args[i]} == --comm.mode ]]; then + CONFIG_COMM_MODE="${copy_args[i+1]}" + fi +done +set -x + +CONFIG_COMM_MODE=${CONFIG_COMM_MODE:-"default"} # COMM_MODE options: "fake_backend" (dry run), "local_tensor" (debug mode), or empty for normal training -COMM_MODE=${COMM_MODE:-""} +COMM_MODE=${COMM_MODE:-$CONFIG_COMM_MODE} TORCHFT_LIGHTHOUSE=${TORCHFT_LIGHTHOUSE:-"http://localhost:29510"} -if [ -n "$COMM_MODE" ]; then +if [ "$COMM_MODE" != "default" ]; then # Communication mode specified: validate configuration or run in debug mode echo "Running with comm_mode=${COMM_MODE}" NGPU="${NGPU}" LOCAL_RANK=0 python3 -m "${TRAIN_FILE}" --job.config_file "${CONFIG_FILE}" "$@" --comm.mode=${COMM_MODE} --training.steps=1 diff --git a/tests/integration_tests/run_tests.py b/tests/integration_tests/run_tests.py index 336c7e815c..7081215c83 100644 --- a/tests/integration_tests/run_tests.py +++ b/tests/integration_tests/run_tests.py @@ -29,9 +29,7 @@ def _run_cmd(cmd): return subprocess.run([cmd], text=True, shell=True) -def run_single_test( - test_flavor: OverrideDefinitions, full_path: str, output_dir: str, comm_mode: str -): +def run_single_test(test_flavor: OverrideDefinitions, full_path: str, output_dir: str): # run_test supports sequence of tests. test_name = test_flavor.test_name dump_folder_arg = f"--job.dump_folder {output_dir}/{test_name}" @@ -39,8 +37,7 @@ def run_single_test( all_ranks = ",".join(map(str, range(test_flavor.ngpu))) for idx, override_arg in enumerate(test_flavor.override_args): - cmd = f"CONFIG_FILE={full_path} COMM_MODE={comm_mode} NGPU={test_flavor.ngpu} LOG_RANK={all_ranks} ./run_train.sh" - + cmd = f"CONFIG_FILE={full_path} NGPU={test_flavor.ngpu} LOG_RANK={all_ranks} ./run_train.sh" # dump compile trace for debugging purpose cmd = f'TORCH_TRACE="{output_dir}/{test_name}/compile_trace" ' + cmd @@ -122,12 +119,6 @@ def main(): parser.add_argument( "output_dir", help="Directory to dump results generated by tests" ) - parser.add_argument( - "comm_mode", - default="default", - choices=["default", "fake_backend", "local_tensor"], - help="Communication mode to validate tests", - ) parser.add_argument( "--gpu_arch_type", default="cuda", diff --git a/torchtitan/experiments/simple_fsdp/tests/compiler_pass_integration_tests.py b/torchtitan/experiments/simple_fsdp/tests/compiler_pass_integration_tests.py index f603e1e605..b2f3b53288 100755 --- a/torchtitan/experiments/simple_fsdp/tests/compiler_pass_integration_tests.py +++ b/torchtitan/experiments/simple_fsdp/tests/compiler_pass_integration_tests.py @@ -22,11 +22,11 @@ def build_simple_fsdp_test_list() -> list[OverrideDefinitions]: [ [ "--model.name simple_fsdp.llama3", - "--model.flavor 8B", "--compile.enable", "--job.custom_config_module=torchtitan.experiments.simple_fsdp.job_config", "--compile.backend aot_eager", "--compile.graph_passes auto_bucketing", + "--comm.mode fake_backend", ], ], "1D+autobucketing", @@ -37,11 +37,11 @@ def build_simple_fsdp_test_list() -> list[OverrideDefinitions]: [ [ "--model.name simple_fsdp.llama3", - "--model.flavor 8B", "--compile.enable", "--job.custom_config_module=torchtitan.experiments.simple_fsdp.job_config", "--compile.backend aot_eager", "--compile.graph_passes transformer_block_bucketing", + "--comm.mode fake_backend", ], ], "1D+transformer_block_bucketing", @@ -52,12 +52,12 @@ def build_simple_fsdp_test_list() -> list[OverrideDefinitions]: [ [ "--model.name simple_fsdp.llama3", - "--model.flavor 8B", "--parallelism.tensor_parallel_degree 2", "--compile.enable", "--job.custom_config_module=torchtitan.experiments.simple_fsdp.job_config", "--compile.backend aot_eager", "--compile.graph_passes auto_bucketing", + "--comm.mode fake_backend", ], ], "2D+autobucketing", @@ -68,12 +68,12 @@ def build_simple_fsdp_test_list() -> list[OverrideDefinitions]: [ [ "--model.name simple_fsdp.llama3", - "--model.flavor 8B", "--parallelism.tensor_parallel_degree 2", "--compile.enable", "--job.custom_config_module=torchtitan.experiments.simple_fsdp.job_config", "--compile.backend aot_eager", "--compile.graph_passes transformer_block_bucketing", + "--comm.mode fake_backend", ], ], "2D+transformer_block_bucketing", @@ -85,13 +85,13 @@ def build_simple_fsdp_test_list() -> list[OverrideDefinitions]: # [ # [ # "--model.name simple_fsdp.llama3", - # "--model.flavor 8B", # "--parallelism.tensor_parallel_degree 2", # "--parallelism.pipeline_parallel_degree 2", # "--compile.enable", # "--job.custom_config_module=torchtitan.experiments.simple_fsdp.job_config", # "--compile.backend aot_eager", # "--compile.graph_passes auto_bucketing", + # "--comm.mode fake_backend", # ], # ], # "3D+autobucketing", @@ -102,13 +102,13 @@ def build_simple_fsdp_test_list() -> list[OverrideDefinitions]: # [ # [ # "--model.name simple_fsdp.llama3", - # "--model.flavor 8B", # "--parallelism.tensor_parallel_degree 2", # "--parallelism.pipeline_parallel_degree 2", # "--compile.enable", # "--job.custom_config_module=torchtitan.experiments.simple_fsdp.job_config", # "--compile.backend aot_eager", # "--compile.graph_passes transformer_block_bucketing", + # "--comm.mode fake_backend", # ], # ], # "3D+transformer_block_bucketing", @@ -119,13 +119,13 @@ def build_simple_fsdp_test_list() -> list[OverrideDefinitions]: # [ # [ # "--model.name simple_fsdp.llama3", - # "--model.flavor 8B", # "--parallelism.tensor_parallel_degree 2", # "--parallelism.context_parallel_degree 2", # "--compile.enable", # "--job.custom_config_module=torchtitan.experiments.simple_fsdp.job_config", # "--compile.backend aot_eager", # "--compile.graph_passes auto_bucketing", + # "--comm.mode fake_backend", # ], # ], # "FSDP+TP+CP+autobucketing", @@ -136,13 +136,13 @@ def build_simple_fsdp_test_list() -> list[OverrideDefinitions]: [ [ "--model.name simple_fsdp.llama3", - "--model.flavor 8B", "--parallelism.tensor_parallel_degree 2", "--parallelism.context_parallel_degree 2", "--compile.enable", "--job.custom_config_module=torchtitan.experiments.simple_fsdp.job_config", "--compile.backend aot_eager", "--compile.graph_passes transformer_block_bucketing", + "--comm.mode fake_backend", ], ], "FSDP+TP+CP+transformer_block_bucketing", @@ -153,12 +153,12 @@ def build_simple_fsdp_test_list() -> list[OverrideDefinitions]: [ [ "--model.name simple_fsdp.deepseek_v3", - "--model.flavor 16B", "--parallelism.data_parallel_shard_degree 4", "--parallelism.expert_parallel_degree 2", "--job.custom_config_module=torchtitan.experiments.simple_fsdp.job_config", "--compile.backend aot_eager", "--compile.graph_passes auto_bucketing", + "--comm.mode fake_backend", ], ], "FSDP+EP+autobucketing", @@ -169,12 +169,12 @@ def build_simple_fsdp_test_list() -> list[OverrideDefinitions]: [ [ "--model.name simple_fsdp.deepseek_v3", - "--model.flavor 16B", "--parallelism.data_parallel_shard_degree 4", "--parallelism.expert_parallel_degree 2", "--job.custom_config_module=torchtitan.experiments.simple_fsdp.job_config", "--compile.backend aot_eager", "--compile.graph_passes transformer_block_bucketing", + "--comm.mode fake_backend", ], ], "FSDP+EP+transformer_block_bucketing", @@ -185,7 +185,6 @@ def build_simple_fsdp_test_list() -> list[OverrideDefinitions]: [ [ "--model.name simple_fsdp.deepseek_v3", - "--model.flavor 16B", "--parallelism.data_parallel_shard_degree 2", "--parallelism.tensor_parallel_degree 2", "--parallelism.expert_parallel_degree 4", @@ -193,6 +192,7 @@ def build_simple_fsdp_test_list() -> list[OverrideDefinitions]: "--job.custom_config_module=torchtitan.experiments.simple_fsdp.job_config", "--compile.backend aot_eager", "--compile.graph_passes auto_bucketing", + "--comm.mode fake_backend", ], ], "FSDP+TP+EP+autobucketing", @@ -203,7 +203,6 @@ def build_simple_fsdp_test_list() -> list[OverrideDefinitions]: [ [ "--model.name simple_fsdp.deepseek_v3", - "--model.flavor 16B", "--parallelism.data_parallel_shard_degree 2", "--parallelism.tensor_parallel_degree 2", "--parallelism.expert_parallel_degree 4", @@ -211,6 +210,7 @@ def build_simple_fsdp_test_list() -> list[OverrideDefinitions]: "--job.custom_config_module=torchtitan.experiments.simple_fsdp.job_config", "--compile.backend aot_eager", "--compile.graph_passes transformer_block_bucketing", + "--comm.mode fake_backend", ], ], "FSDP+TP+EP+transformer_block_bucketing", @@ -229,12 +229,6 @@ def build_simple_fsdp_test_list() -> list[OverrideDefinitions]: def main(): parser = argparse.ArgumentParser() parser.add_argument("output_dir") - parser.add_argument( - "--comm_mode", - default="default", - choices=["default", "fake_backend", "local_tensor"], - help="Communication mode to validate tests", - ) parser.add_argument( "--config_path", default="./tests/integration_tests/base_config.toml",