diff --git a/tests/functional/L1_Functional_Tests_GPU.sh b/tests/functional/L1_Functional_Tests_GPU.sh index ee26f2de37..c01dd7ae63 100644 --- a/tests/functional/L1_Functional_Tests_GPU.sh +++ b/tests/functional/L1_Functional_Tests_GPU.sh @@ -23,32 +23,39 @@ cd ${PROJECT_ROOT} time bash ./tests/functional/grpo_frozen_env.sh time bash ./tests/functional/test_frozen_env.sh -time uv run --no-sync bash ./tests/functional/sft.sh -time uv run --no-sync bash ./tests/functional/sft_resume_diamond.sh +time uv run --no-sync bash ./tests/functional/distillation.sh +time uv run --no-sync bash ./tests/functional/distillation_megatron.sh +time uv run --no-sync bash ./tests/functional/dpo.sh +time uv run --no-sync bash ./tests/functional/dpo_automodel_lora.sh +time uv run --no-sync bash ./tests/functional/dpo_megatron.sh +time uv run --no-sync bash ./tests/functional/eval.sh +time uv run --no-sync bash ./tests/functional/eval_async.sh time uv run --no-sync bash ./tests/functional/grpo.sh -time uv run --no-sync bash ./tests/functional/prorlv2.sh time uv run --no-sync bash ./tests/functional/grpo_async.sh +time uv run --no-sync bash ./tests/functional/grpo_automodel_lora.sh time uv run --no-sync bash ./tests/functional/grpo_automodel_lora_async.sh time uv run --no-sync bash ./tests/functional/grpo_automodel_lora_non_colocated.sh -time uv run --no-sync bash ./tests/functional/grpo_automodel_lora.sh time uv run --no-sync bash ./tests/functional/grpo_megatron.sh time uv run --no-sync bash ./tests/functional/grpo_megatron_generation.sh +time uv run --no-sync bash ./tests/functional/grpo_multiple_datasets.sh time uv run --no-sync bash ./tests/functional/grpo_multiturn.sh time uv run --no-sync bash ./tests/functional/grpo_non_colocated.sh +# Re-enable once it is fixed. +# time uv run --no-sync bash ./tests/functional/grpo_rm_env.sh +# Re-enable once SGLang build is fixed. # time uv run --no-sync bash ./tests/functional/grpo_sglang.sh -time uv run --no-sync bash ./tests/functional/grpo_multiple_datasets.sh -time uv run --no-sync bash ./tests/functional/dpo_automodel_lora.sh -time uv run --no-sync bash ./tests/functional/dpo.sh +time uv run --no-sync bash ./tests/functional/prorlv2.sh time uv run --no-sync bash ./tests/functional/rm.sh -time uv run --no-sync bash ./tests/functional/eval.sh -time uv run --no-sync bash ./tests/functional/eval_async.sh -time uv run --no-sync bash ./tests/functional/test_mcore_extra_installed_correctly.sh -time uv run --no-sync bash ./tests/functional/test_automodel_extra_installed_correctly.sh -time uv run --no-sync bash ./tests/functional/vlm_grpo.sh -time uv run --no-sync bash ./tests/functional/distillation.sh -time uv run --no-sync bash ./tests/functional/distillation_megatron.sh +time uv run --no-sync bash ./tests/functional/sft.sh time uv run --no-sync bash ./tests/functional/sft_automodel_lora.sh +time uv run --no-sync bash ./tests/functional/sft_megatron.sh time uv run --no-sync bash ./tests/functional/sft_megatron_lora.sh +time uv run --no-sync bash ./tests/functional/sft_resume_diamond.sh +time uv run --no-sync bash ./tests/functional/test_automodel_extra_installed_correctly.sh +# Re-enable once DTensor v2 converter is fixed. +# time uv run --no-sync bash ./tests/functional/test_converters.sh +time uv run --no-sync bash ./tests/functional/test_mcore_extra_installed_correctly.sh +time uv run --no-sync bash ./tests/functional/vlm_grpo.sh # Research functional tests (self-discovery) for test_script in research/*/tests/functional/*.sh; do diff --git a/tests/functional/dpo_megatron.sh b/tests/functional/dpo_megatron.sh index 8c1524c2c5..11d8b7602a 100755 --- a/tests/functional/dpo_megatron.sh +++ b/tests/functional/dpo_megatron.sh @@ -22,24 +22,26 @@ mkdir -p $EXP_DIR $LOG_DIR cd $PROJECT_ROOT uv run $PROJECT_ROOT/examples/run_dpo.py \ - --config $PROJECT_ROOT/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.yaml \ + --config $PROJECT_ROOT/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.v2.yaml \ policy.model_name=Qwen/Qwen3-0.6B \ - cluster.gpus_per_node=2 \ dpo.max_num_steps=3 \ dpo.val_batches=1 \ dpo.val_period=3 \ + policy.train_global_batch_size=8 \ + policy.megatron_cfg.tensor_model_parallel_size=1 \ + policy.megatron_cfg.sequence_parallel=false \ logger.tensorboard_enabled=true \ logger.log_dir=$LOG_DIR \ logger.wandb_enabled=false \ logger.monitor_gpus=true \ checkpointing.enabled=false \ - policy.megatron_cfg.tensor_model_parallel_size=1 \ - policy.train_global_batch_size=8 \ + cluster.gpus_per_node=2 \ + cluster.num_nodes=1 \ $@ \ 2>&1 | tee $RUN_LOG uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS uv run tests/check_metrics.py $JSON_METRICS \ - 'data["train/loss"]["3"] < 5' \ + 'data["train/loss"]["3"] < 6' \ diff --git a/tests/functional/grpo_math_env.sh b/tests/functional/grpo_math_env.sh deleted file mode 100755 index 1504c95c83..0000000000 --- a/tests/functional/grpo_math_env.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash - -SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd) -PROJECT_ROOT=$(realpath $SCRIPT_DIR/../..) -# Mark the current repo as safe, since wandb fetches metadata about the repo -git config --global --add safe.directory $PROJECT_ROOT - -set -eou pipefail - -EXP_NAME=$(basename $0 .sh) -EXP_DIR=$SCRIPT_DIR/$EXP_NAME -LOG_DIR=$EXP_DIR/logs -JSON_METRICS=$EXP_DIR/metrics.json -RUN_LOG=$EXP_DIR/run.log -export PYTHONPATH=${PROJECT_ROOT}:${PYTHONPATH:-} - -rm -rf $EXP_DIR $LOG_DIR -mkdir -p $EXP_DIR $LOG_DIR - -cd $PROJECT_ROOT -uv run coverage run -a --data-file=$PROJECT_ROOT/tests/.coverage --source=$PROJECT_ROOT/nemo_rl \ - $PROJECT_ROOT/examples/run_grpo.py \ - policy.model_name=Qwen/Qwen3-0.6B \ - grpo.num_prompts_per_step=2 \ - grpo.num_generations_per_prompt=4 \ - policy.train_global_batch_size=4 \ - policy.train_micro_batch_size=1 \ - cluster.gpus_per_node=2 \ - grpo.max_num_steps=2 \ - logger.tensorboard_enabled=true \ - logger.log_dir=$LOG_DIR \ - logger.wandb_enabled=false \ - logger.monitor_gpus=true \ - checkpointing.enabled=false \ - $@ \ - 2>&1 | tee $RUN_LOG - -uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS - -uv run tests/check_metrics.py $JSON_METRICS \ - 'max(data["train/gen_kl_error"]) < 0.001' - diff --git a/tests/unit/test_recipes_and_test_suites.py b/tests/unit/test_recipes_and_test_suites.py index 60e880fd12..1fb39f41d2 100644 --- a/tests/unit/test_recipes_and_test_suites.py +++ b/tests/unit/test_recipes_and_test_suites.py @@ -310,3 +310,28 @@ def test_all_recipes_start_with_algo_hyphen(all_recipe_yaml_rel_paths): assert algo in expected_algos, ( f"Recipe {recipe_yaml} has unexpected algo {algo}" ) + + +def test_functional_tests_exist(): + functional_tests_dir = os.path.join(project_root, "tests", "functional") + + test_list = [] + with open( + os.path.join(functional_tests_dir, "L1_Functional_Tests_GPU.sh"), "r" + ) as f: + for line in f: + line = line.strip() + if line and "./tests/functional" in line: + test_list.append(line.split(" ")[-1].split("/")[-1]) + + missing_list = [] + for filename in os.listdir(functional_tests_dir): + if filename.endswith(".sh"): + if filename == "L1_Functional_Tests_GPU.sh": + continue + if filename not in test_list: + missing_list.append(f"./tests/functional/{filename}") + + assert len(missing_list) == 0, ( + f"Missing functional test scripts in ./tests/functional/L1_Functional_Tests_GPU.sh:\n{'\n'.join(missing_list)}" + )