diff --git a/examples/geo3k_vlm/README.md b/examples/geo3k_vlm/README.md index 273157439..9841e7641 100644 --- a/examples/geo3k_vlm/README.md +++ b/examples/geo3k_vlm/README.md @@ -2,6 +2,10 @@ Training VLMs with FSDP or Megatron on single-turn reasoning task using GRPO on the [GEO3K dataset](https://huggingface.co/datasets/hiyouga/geometry3k). We used processed version [here](https://huggingface.co/datasets/chenhegu/geo3k_imgurl). +Supported models: +* Qwen2.5-VL +* Qwen3-VL (Dense and Moe) + Note: Please make sure the cudnn version in the environment is 9.16.0.29 to prevent severe performance regression in conv3d in torch 2.9 mentioned in https://github.com/pytorch/pytorch/issues/168167. Otherwise, you can reinstall cudnn with: ```bash pip install nvidia-cudnn-cu12==9.16.0.29 diff --git a/examples/geo3k_vlm/run_geo3k_vlm.sh b/examples/geo3k_vlm/run_geo3k_vlm.sh index 5b36efd45..d4e0c8923 100644 --- a/examples/geo3k_vlm/run_geo3k_vlm.sh +++ b/examples/geo3k_vlm/run_geo3k_vlm.sh @@ -15,6 +15,10 @@ DATASET_LOCAL_NAME=$(basename "$DATASET_NAME") # Validate MODEL_NAME VALID_MODELS=" + Qwen2.5-VL-3B-Instruct + Qwen2.5-VL-7B-Instruct + Qwen2.5-VL-32B-Instruct + Qwen2.5-VL-72B-Instruct Qwen3-VL-2B-Instruct Qwen3-VL-4B-Instruct Qwen3-VL-8B-Instruct @@ -80,6 +84,8 @@ fi # Common args CKPT_ARGS=( --hf-checkpoint /root/models/${MODEL_NAME} + # qwen3 vl model has rotary base 5000000, set it when applicable + --rotary-base 5000000 ) ROLLOUT_ARGS=( diff --git a/examples/geo3k_vlm/run_geo3k_vlm_sft.sh b/examples/geo3k_vlm/run_geo3k_vlm_sft.sh index 4df78b492..e2cf44d7c 100644 --- a/examples/geo3k_vlm/run_geo3k_vlm_sft.sh +++ b/examples/geo3k_vlm/run_geo3k_vlm_sft.sh @@ -6,6 +6,10 @@ DATASET_LOCAL_NAME=$(basename "$DATASET_NAME") # Validate MODEL_NAME VALID_MODELS=" + Qwen2.5-VL-3B-Instruct + Qwen2.5-VL-7B-Instruct + Qwen2.5-VL-32B-Instruct + Qwen2.5-VL-72B-Instruct Qwen3-VL-2B-Instruct Qwen3-VL-4B-Instruct Qwen3-VL-8B-Instruct