From 4a83f0a0ee9e2be25fa6bc319d013004f584991a Mon Sep 17 00:00:00 2001 From: geomin12 Date: Mon, 29 Jun 2026 12:18:27 -0700 Subject: [PATCH 1/4] Add MI455 (gfx125X) bringup to multi-arch CI Configure gfx125X-dcgpu for rocm-systems nightly multi-arch CI with: - Custom test runner: linux-mi455-gpu-rocm - Limited tests: hip-tests, rocrtst, sanity only - Uses TheRock branch with external_family_overrides support TEMPORARY: Points to users/geomin12/mi455-bringup branch in TheRock. TODO(geomin12): Revert to main once MI455 is in therock-ci-config. Co-Authored-By: Claude Opus 4.6 --- .github/workflows/therock-multi-arch-ci.yml | 44 ++++++++++++++++----- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/.github/workflows/therock-multi-arch-ci.yml b/.github/workflows/therock-multi-arch-ci.yml index a03f153ec78..b71c69fedaa 100644 --- a/.github/workflows/therock-multi-arch-ci.yml +++ b/.github/workflows/therock-multi-arch-ci.yml @@ -14,6 +14,16 @@ # We are keeping the branch main until this gets to post-submit. Then, we will use commit hashes +# ============================================================================= +# TEMPORARY: MI455 (gfx125X) bringup configuration +# TODO(geomin12): Remove this block once MI455 is fully enabled in therock-ci-config +# +# The gfx125x_family_overrides JSON configures MI455-specific test settings: +# - test-runs-on: Custom runner label for MI455 hardware +# - test_labels_for_family: Limit tests to hip-tests, rocrtst (sanity always runs) +# - Container options with ulimit for RCCL compatibility +# ============================================================================= + name: TheRock Multi-Arch CI on: @@ -57,19 +67,35 @@ concurrency: jobs: setup: - uses: ROCm/TheRock/.github/workflows/setup_multi_arch.yml@main + uses: ROCm/TheRock/.github/workflows/setup_multi_arch.yml@users/geomin12/mi455-bringup with: build_variant: "release" # Limit GPU families for rocm-systems CI - linux_amdgpu_families: ${{ inputs.linux_amdgpu_families || 'gfx94X,gfx950' }} + # Note: gfx125X is added for MI455 bringup with limited tests (hip-tests, rocrtst, sanity) + linux_amdgpu_families: ${{ inputs.linux_amdgpu_families || 'gfx94X,gfx950,gfx125X' }} windows_amdgpu_families: ${{ inputs.windows_amdgpu_families || 'gfx1151' }} linux_test_labels: ${{ inputs.linux_test_labels || '' }} windows_test_labels: ${{ inputs.windows_test_labels || '' }} prebuilt_stages: ${{ inputs.prebuilt_stages || '' }} baseline_run_id: ${{ inputs.baseline_run_id || '' }} repository: ROCm/TheRock - ref: main - external_repo: '{"repository":"${{ github.repository }}","ref":"${{ github.sha }}"}' + ref: users/geomin12/mi455-bringup + # TEMPORARY: MI455 bringup - pass family_overrides to configure test runner and limited tests + # TODO(geomin12): Remove family_overrides once MI455 is in therock-ci-config + external_repo: >- + { + "repository": "${{ github.repository }}", + "ref": "${{ github.sha }}", + "family_overrides": { + "gfx125x": { + "linux": { + "test-runs-on": "linux-mi455-gpu-rocm", + "test_labels_for_family": ["test:hip-tests", "test:rocrtst"], + "fetch-gfx-targets": ["gfx1250"] + } + } + } + } linux_build_and_test: name: Linux::${{ fromJSON(needs.setup.outputs.linux_build_config || '{}').build_variant_label || 'skip' }} @@ -79,7 +105,7 @@ jobs: needs.setup.outputs.linux_build_config != '' && needs.setup.outputs.enable_build_jobs == 'true' }} - uses: ROCm/TheRock/.github/workflows/multi_arch_ci_linux.yml@main + uses: ROCm/TheRock/.github/workflows/multi_arch_ci_linux.yml@users/geomin12/mi455-bringup secrets: inherit with: build_config: ${{ needs.setup.outputs.linux_build_config }} @@ -88,7 +114,7 @@ jobs: test_type: ${{ needs.setup.outputs.test_type }} external_repo_config: ${{ needs.setup.outputs.external_repo_config }} repository: ROCm/TheRock - ref: main + ref: users/geomin12/mi455-bringup permissions: contents: read id-token: write @@ -101,7 +127,7 @@ jobs: needs.setup.outputs.windows_build_config != '' && needs.setup.outputs.enable_build_jobs == 'true' }} - uses: ROCm/TheRock/.github/workflows/multi_arch_ci_windows.yml@main + uses: ROCm/TheRock/.github/workflows/multi_arch_ci_windows.yml@users/geomin12/mi455-bringup secrets: inherit with: build_config: ${{ needs.setup.outputs.windows_build_config }} @@ -110,7 +136,7 @@ jobs: test_type: ${{ needs.setup.outputs.test_type }} external_repo_config: ${{ needs.setup.outputs.external_repo_config }} repository: ROCm/TheRock - ref: main + ref: users/geomin12/mi455-bringup permissions: contents: read id-token: write @@ -128,7 +154,7 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: repository: "ROCm/TheRock" - ref: main + ref: users/geomin12/mi455-bringup sparse-checkout: build_tools/github_actions sparse-checkout-cone-mode: true From 5da280dbfc145e251c80c4a6c3de334b695d49ed Mon Sep 17 00:00:00 2001 From: geomin12 Date: Mon, 29 Jun 2026 17:39:38 -0700 Subject: [PATCH 2/4] Update TheRock branch to mi455-bringup-2 (rebased on main) Co-Authored-By: Claude Opus 4.6 --- .github/workflows/therock-multi-arch-ci.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/therock-multi-arch-ci.yml b/.github/workflows/therock-multi-arch-ci.yml index b71c69fedaa..64f5db0abe8 100644 --- a/.github/workflows/therock-multi-arch-ci.yml +++ b/.github/workflows/therock-multi-arch-ci.yml @@ -67,7 +67,7 @@ concurrency: jobs: setup: - uses: ROCm/TheRock/.github/workflows/setup_multi_arch.yml@users/geomin12/mi455-bringup + uses: ROCm/TheRock/.github/workflows/setup_multi_arch.yml@users/geomin12/mi455-bringup-2 with: build_variant: "release" # Limit GPU families for rocm-systems CI @@ -79,7 +79,7 @@ jobs: prebuilt_stages: ${{ inputs.prebuilt_stages || '' }} baseline_run_id: ${{ inputs.baseline_run_id || '' }} repository: ROCm/TheRock - ref: users/geomin12/mi455-bringup + ref: users/geomin12/mi455-bringup-2 # TEMPORARY: MI455 bringup - pass family_overrides to configure test runner and limited tests # TODO(geomin12): Remove family_overrides once MI455 is in therock-ci-config external_repo: >- @@ -105,7 +105,7 @@ jobs: needs.setup.outputs.linux_build_config != '' && needs.setup.outputs.enable_build_jobs == 'true' }} - uses: ROCm/TheRock/.github/workflows/multi_arch_ci_linux.yml@users/geomin12/mi455-bringup + uses: ROCm/TheRock/.github/workflows/multi_arch_ci_linux.yml@users/geomin12/mi455-bringup-2 secrets: inherit with: build_config: ${{ needs.setup.outputs.linux_build_config }} @@ -114,7 +114,7 @@ jobs: test_type: ${{ needs.setup.outputs.test_type }} external_repo_config: ${{ needs.setup.outputs.external_repo_config }} repository: ROCm/TheRock - ref: users/geomin12/mi455-bringup + ref: users/geomin12/mi455-bringup-2 permissions: contents: read id-token: write @@ -127,7 +127,7 @@ jobs: needs.setup.outputs.windows_build_config != '' && needs.setup.outputs.enable_build_jobs == 'true' }} - uses: ROCm/TheRock/.github/workflows/multi_arch_ci_windows.yml@users/geomin12/mi455-bringup + uses: ROCm/TheRock/.github/workflows/multi_arch_ci_windows.yml@users/geomin12/mi455-bringup-2 secrets: inherit with: build_config: ${{ needs.setup.outputs.windows_build_config }} @@ -136,7 +136,7 @@ jobs: test_type: ${{ needs.setup.outputs.test_type }} external_repo_config: ${{ needs.setup.outputs.external_repo_config }} repository: ROCm/TheRock - ref: users/geomin12/mi455-bringup + ref: users/geomin12/mi455-bringup-2 permissions: contents: read id-token: write @@ -154,7 +154,7 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: repository: "ROCm/TheRock" - ref: users/geomin12/mi455-bringup + ref: users/geomin12/mi455-bringup-2 sparse-checkout: build_tools/github_actions sparse-checkout-cone-mode: true From 640fbc4ca34b12a3d59a98b6e5c5c0f3a5b2ac13 Mon Sep 17 00:00:00 2001 From: geomin12 Date: Mon, 29 Jun 2026 22:07:47 -0700 Subject: [PATCH 3/4] Update TheRock branch to mi455-bringup for testing --- .github/workflows/therock-multi-arch-ci.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/therock-multi-arch-ci.yml b/.github/workflows/therock-multi-arch-ci.yml index 64f5db0abe8..b71c69fedaa 100644 --- a/.github/workflows/therock-multi-arch-ci.yml +++ b/.github/workflows/therock-multi-arch-ci.yml @@ -67,7 +67,7 @@ concurrency: jobs: setup: - uses: ROCm/TheRock/.github/workflows/setup_multi_arch.yml@users/geomin12/mi455-bringup-2 + uses: ROCm/TheRock/.github/workflows/setup_multi_arch.yml@users/geomin12/mi455-bringup with: build_variant: "release" # Limit GPU families for rocm-systems CI @@ -79,7 +79,7 @@ jobs: prebuilt_stages: ${{ inputs.prebuilt_stages || '' }} baseline_run_id: ${{ inputs.baseline_run_id || '' }} repository: ROCm/TheRock - ref: users/geomin12/mi455-bringup-2 + ref: users/geomin12/mi455-bringup # TEMPORARY: MI455 bringup - pass family_overrides to configure test runner and limited tests # TODO(geomin12): Remove family_overrides once MI455 is in therock-ci-config external_repo: >- @@ -105,7 +105,7 @@ jobs: needs.setup.outputs.linux_build_config != '' && needs.setup.outputs.enable_build_jobs == 'true' }} - uses: ROCm/TheRock/.github/workflows/multi_arch_ci_linux.yml@users/geomin12/mi455-bringup-2 + uses: ROCm/TheRock/.github/workflows/multi_arch_ci_linux.yml@users/geomin12/mi455-bringup secrets: inherit with: build_config: ${{ needs.setup.outputs.linux_build_config }} @@ -114,7 +114,7 @@ jobs: test_type: ${{ needs.setup.outputs.test_type }} external_repo_config: ${{ needs.setup.outputs.external_repo_config }} repository: ROCm/TheRock - ref: users/geomin12/mi455-bringup-2 + ref: users/geomin12/mi455-bringup permissions: contents: read id-token: write @@ -127,7 +127,7 @@ jobs: needs.setup.outputs.windows_build_config != '' && needs.setup.outputs.enable_build_jobs == 'true' }} - uses: ROCm/TheRock/.github/workflows/multi_arch_ci_windows.yml@users/geomin12/mi455-bringup-2 + uses: ROCm/TheRock/.github/workflows/multi_arch_ci_windows.yml@users/geomin12/mi455-bringup secrets: inherit with: build_config: ${{ needs.setup.outputs.windows_build_config }} @@ -136,7 +136,7 @@ jobs: test_type: ${{ needs.setup.outputs.test_type }} external_repo_config: ${{ needs.setup.outputs.external_repo_config }} repository: ROCm/TheRock - ref: users/geomin12/mi455-bringup-2 + ref: users/geomin12/mi455-bringup permissions: contents: read id-token: write @@ -154,7 +154,7 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: repository: "ROCm/TheRock" - ref: users/geomin12/mi455-bringup-2 + ref: users/geomin12/mi455-bringup sparse-checkout: build_tools/github_actions sparse-checkout-cone-mode: true From b4daa2081ff4b5aa3ae713506cdb687905acf750 Mon Sep 17 00:00:00 2001 From: geomin12 Date: Mon, 29 Jun 2026 22:46:03 -0700 Subject: [PATCH 4/4] Update TheRock branch to main --- .github/workflows/therock-multi-arch-ci.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/therock-multi-arch-ci.yml b/.github/workflows/therock-multi-arch-ci.yml index b71c69fedaa..1d38b5a00eb 100644 --- a/.github/workflows/therock-multi-arch-ci.yml +++ b/.github/workflows/therock-multi-arch-ci.yml @@ -67,7 +67,7 @@ concurrency: jobs: setup: - uses: ROCm/TheRock/.github/workflows/setup_multi_arch.yml@users/geomin12/mi455-bringup + uses: ROCm/TheRock/.github/workflows/setup_multi_arch.yml@main with: build_variant: "release" # Limit GPU families for rocm-systems CI @@ -79,7 +79,7 @@ jobs: prebuilt_stages: ${{ inputs.prebuilt_stages || '' }} baseline_run_id: ${{ inputs.baseline_run_id || '' }} repository: ROCm/TheRock - ref: users/geomin12/mi455-bringup + ref: main # TEMPORARY: MI455 bringup - pass family_overrides to configure test runner and limited tests # TODO(geomin12): Remove family_overrides once MI455 is in therock-ci-config external_repo: >- @@ -105,7 +105,7 @@ jobs: needs.setup.outputs.linux_build_config != '' && needs.setup.outputs.enable_build_jobs == 'true' }} - uses: ROCm/TheRock/.github/workflows/multi_arch_ci_linux.yml@users/geomin12/mi455-bringup + uses: ROCm/TheRock/.github/workflows/multi_arch_ci_linux.yml@main secrets: inherit with: build_config: ${{ needs.setup.outputs.linux_build_config }} @@ -114,7 +114,7 @@ jobs: test_type: ${{ needs.setup.outputs.test_type }} external_repo_config: ${{ needs.setup.outputs.external_repo_config }} repository: ROCm/TheRock - ref: users/geomin12/mi455-bringup + ref: main permissions: contents: read id-token: write @@ -127,7 +127,7 @@ jobs: needs.setup.outputs.windows_build_config != '' && needs.setup.outputs.enable_build_jobs == 'true' }} - uses: ROCm/TheRock/.github/workflows/multi_arch_ci_windows.yml@users/geomin12/mi455-bringup + uses: ROCm/TheRock/.github/workflows/multi_arch_ci_windows.yml@main secrets: inherit with: build_config: ${{ needs.setup.outputs.windows_build_config }} @@ -136,7 +136,7 @@ jobs: test_type: ${{ needs.setup.outputs.test_type }} external_repo_config: ${{ needs.setup.outputs.external_repo_config }} repository: ROCm/TheRock - ref: users/geomin12/mi455-bringup + ref: main permissions: contents: read id-token: write @@ -154,7 +154,7 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: repository: "ROCm/TheRock" - ref: users/geomin12/mi455-bringup + ref: main sparse-checkout: build_tools/github_actions sparse-checkout-cone-mode: true