Skip to content

Commit

Permalink
test sloz
Browse files Browse the repository at this point in the history
  • Loading branch information
IlyasMoutawwakil committed Feb 10, 2025
1 parent a0f11d0 commit e1b32f8
Show file tree
Hide file tree
Showing 2 changed files with 305 additions and 293 deletions.
248 changes: 127 additions & 121 deletions .github/workflows/slow_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,17 @@ name: Non-regression tests
on:
workflow_dispatch:
schedule:
- cron: '0 21 * * 0-5' # every Sunday to Friday at 11pm CET (10pm winter time)
- cron: '0 21 * * 6' # every Saturday at 1am CET (midnight winter time)
- cron: "0 21 * * 0-5" # every Sunday to Friday at 11pm CET (10pm winter time)
- cron: "0 21 * * 6" # every Saturday at 1am CET (midnight winter time)

pull_request:
branches: [main]
push:
branches: [main]

concurrency:
group: ${{ github.workflow }}
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true

jobs:
example-diff:
Expand All @@ -19,136 +25,136 @@ jobs:
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.19.0/ubuntu22.04/habanalabs/pytorch-installer-2.5.1:latest
docker pull vault.habana.ai/gaudi-docker/1.19.0/ubuntu22.04/habanalabs/pytorch-installer-2.5.1:latest
- name: Run tests
run: |
docker run \
-v $PWD:/root/workspace \
--workdir=/root/workspace \
--runtime=habana \
-e HABANA_VISIBLE_DEVICES=all \
-e OMPI_MCA_btl_vader_single_copy_mechanism=none \
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.19.0/ubuntu22.04/habanalabs/pytorch-installer-2.5.1:latest \
/bin/bash tests/ci/example_diff_tests.sh
docker run \
-v $PWD:/root/workspace \
--workdir=/root/workspace \
--runtime=habana \
-e HABANA_VISIBLE_DEVICES=all \
-e OMPI_MCA_btl_vader_single_copy_mechanism=none \
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.19.0/ubuntu22.04/habanalabs/pytorch-installer-2.5.1:latest \
/bin/bash tests/ci/example_diff_tests.sh
stable-diffusion:
name: Test Stable Diffusion
if: ${{ !cancelled() && (success() || failure()) }}
needs:
- example-diff # run the job when the previous test job is done
- example-diff # run the job when the previous test job is done
runs-on:
group: aws-dl1-24xlarge
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.19.0/ubuntu22.04/habanalabs/pytorch-installer-2.5.1:latest
docker pull vault.habana.ai/gaudi-docker/1.19.0/ubuntu22.04/habanalabs/pytorch-installer-2.5.1:latest
- name: Run tests
run: |
docker run \
-v $PWD:/root/workspace \
--workdir=/root/workspace \
--runtime=habana \
-e HABANA_VISIBLE_DEVICES=all \
-e OMPI_MCA_btl_vader_single_copy_mechanism=none \
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.19.0/ubuntu22.04/habanalabs/pytorch-installer-2.5.1:latest \
/bin/bash tests/ci/slow_tests_diffusers.sh
docker run \
-v $PWD:/root/workspace \
--workdir=/root/workspace \
--runtime=habana \
-e HABANA_VISIBLE_DEVICES=all \
-e OMPI_MCA_btl_vader_single_copy_mechanism=none \
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.19.0/ubuntu22.04/habanalabs/pytorch-installer-2.5.1:latest \
/bin/bash tests/ci/slow_tests_diffusers.sh
deepspeed:
name: Test DeepSpeed models
if: ${{ !cancelled() && (success() || failure()) }}
needs:
- example-diff
- stable-diffusion # run the job when the previous test job is done
- stable-diffusion # run the job when the previous test job is done
runs-on:
group: aws-dl1-24xlarge
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.19.0/ubuntu22.04/habanalabs/pytorch-installer-2.5.1:latest
docker pull vault.habana.ai/gaudi-docker/1.19.0/ubuntu22.04/habanalabs/pytorch-installer-2.5.1:latest
- name: Run tests
run: |
docker run \
-v $PWD:/root/workspace \
--workdir=/root/workspace \
--runtime=habana \
-e HABANA_VISIBLE_DEVICES=all \
-e OMPI_MCA_btl_vader_single_copy_mechanism=none \
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.19.0/ubuntu22.04/habanalabs/pytorch-installer-2.5.1:latest \
/bin/bash tests/ci/slow_tests_deepspeed.sh
docker run \
-v $PWD:/root/workspace \
--workdir=/root/workspace \
--runtime=habana \
-e HABANA_VISIBLE_DEVICES=all \
-e OMPI_MCA_btl_vader_single_copy_mechanism=none \
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.19.0/ubuntu22.04/habanalabs/pytorch-installer-2.5.1:latest \
/bin/bash tests/ci/slow_tests_deepspeed.sh
multi-card:
name: Test multi-card models
if: ${{ !cancelled() && (success() || failure()) }}
needs:
- example-diff
- deepspeed # run the job when the previous test job is done
- deepspeed # run the job when the previous test job is done
runs-on:
group: aws-dl1-24xlarge
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.19.0/ubuntu22.04/habanalabs/pytorch-installer-2.5.1:latest
docker pull vault.habana.ai/gaudi-docker/1.19.0/ubuntu22.04/habanalabs/pytorch-installer-2.5.1:latest
- name: Run tests
run: |
docker run \
-v $PWD:/root/workspace \
--workdir=/root/workspace \
--runtime=habana \
-e HABANA_VISIBLE_DEVICES=all \
-e OMPI_MCA_btl_vader_single_copy_mechanism=none \
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.19.0/ubuntu22.04/habanalabs/pytorch-installer-2.5.1:latest \
/bin/bash tests/ci/slow_tests_8x.sh
docker run \
-v $PWD:/root/workspace \
--workdir=/root/workspace \
--runtime=habana \
-e HABANA_VISIBLE_DEVICES=all \
-e OMPI_MCA_btl_vader_single_copy_mechanism=none \
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.19.0/ubuntu22.04/habanalabs/pytorch-installer-2.5.1:latest \
/bin/bash tests/ci/slow_tests_8x.sh
single-card:
name: Test single-card models
if: ${{ !cancelled() && (success() || failure()) }}
needs:
- example-diff
- deepspeed
- multi-card # run the job when the previous test jobs are done
- multi-card # run the job when the previous test jobs are done
runs-on:
group: aws-dl1-24xlarge
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.19.0/ubuntu22.04/habanalabs/pytorch-installer-2.5.1:latest
docker pull vault.habana.ai/gaudi-docker/1.19.0/ubuntu22.04/habanalabs/pytorch-installer-2.5.1:latest
- name: Run tests
run: |
docker run \
-v $PWD:/root/workspace \
--workdir=/root/workspace \
--runtime=habana \
-e HABANA_VISIBLE_DEVICES=all \
-e OMPI_MCA_btl_vader_single_copy_mechanism=none \
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.19.0/ubuntu22.04/habanalabs/pytorch-installer-2.5.1:latest \
/bin/bash tests/ci/slow_tests_1x.sh
docker run \
-v $PWD:/root/workspace \
--workdir=/root/workspace \
--runtime=habana \
-e HABANA_VISIBLE_DEVICES=all \
-e OMPI_MCA_btl_vader_single_copy_mechanism=none \
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.19.0/ubuntu22.04/habanalabs/pytorch-installer-2.5.1:latest \
/bin/bash tests/ci/slow_tests_1x.sh
albert-xxl-single-card:
name: Test single-card ALBERT XXL
if: ${{ !cancelled() && (success() || failure()) }}
needs:
- example-diff
- deepspeed
- multi-card
- single-card # run the job when the previous test jobs are done
- single-card # run the job when the previous test jobs are done
runs-on:
group: aws-dl1-24xlarge
steps:
Expand All @@ -158,21 +164,21 @@ jobs:
- name: Pull image
if: github.event.schedule == '0 21 * * 6'
run: |
docker pull vault.habana.ai/gaudi-docker/1.19.0/ubuntu22.04/habanalabs/pytorch-installer-2.5.1:latest
docker pull vault.habana.ai/gaudi-docker/1.19.0/ubuntu22.04/habanalabs/pytorch-installer-2.5.1:latest
- name: Run test
if: github.event.schedule == '0 21 * * 6'
run: |
docker run \
-v $PWD:/root/workspace \
--workdir=/root/workspace \
--runtime=habana \
-e HABANA_VISIBLE_DEVICES=all \
-e OMPI_MCA_btl_vader_single_copy_mechanism=none \
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.19.0/ubuntu22.04/habanalabs/pytorch-installer-2.5.1:latest \
/bin/bash tests/ci/albert_xxl_1x.sh
docker run \
-v $PWD:/root/workspace \
--workdir=/root/workspace \
--runtime=habana \
-e HABANA_VISIBLE_DEVICES=all \
-e OMPI_MCA_btl_vader_single_copy_mechanism=none \
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.19.0/ubuntu22.04/habanalabs/pytorch-installer-2.5.1:latest \
/bin/bash tests/ci/albert_xxl_1x.sh
- name: Warning
if: github.event.schedule != '0 21 * * 6'
run: echo "ALBERT XXL 1x is only tested on Saturdays."
Expand All @@ -184,28 +190,28 @@ jobs:
- deepspeed
- multi-card
- single-card
- albert-xxl-single-card # run the job when the previous test jobs are done
- albert-xxl-single-card # run the job when the previous test jobs are done
runs-on:
group: aws-dl1-24xlarge
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.19.0/ubuntu22.04/habanalabs/pytorch-installer-2.5.1:latest
docker pull vault.habana.ai/gaudi-docker/1.19.0/ubuntu22.04/habanalabs/pytorch-installer-2.5.1:latest
- name: Run tests
run: |
docker run \
-v $PWD:/root/workspace \
--workdir=/root/workspace \
--runtime=habana \
-e HABANA_VISIBLE_DEVICES=all \
-e OMPI_MCA_btl_vader_single_copy_mechanism=none \
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.19.0/ubuntu22.04/habanalabs/pytorch-installer-2.5.1:latest \
make slow_tests_text_generation_example TOKEN=${{ secrets.TEXT_GENERATION_CI_HUB_TOKEN }}
docker run \
-v $PWD:/root/workspace \
--workdir=/root/workspace \
--runtime=habana \
-e HABANA_VISIBLE_DEVICES=all \
-e OMPI_MCA_btl_vader_single_copy_mechanism=none \
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.19.0/ubuntu22.04/habanalabs/pytorch-installer-2.5.1:latest \
make slow_tests_text_generation_example TOKEN=${{ secrets.TEXT_GENERATION_CI_HUB_TOKEN }}
trl:
name: Test TRL integration
if: ${{ !cancelled() && (success() || failure()) }}
Expand All @@ -215,28 +221,28 @@ jobs:
- multi-card
- single-card
- albert-xxl-single-card
- text-generation # run the job when the previous test jobs are done
- text-generation # run the job when the previous test jobs are done
runs-on:
group: aws-dl1-24xlarge
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.19.0/ubuntu22.04/habanalabs/pytorch-installer-2.5.1:latest
docker pull vault.habana.ai/gaudi-docker/1.19.0/ubuntu22.04/habanalabs/pytorch-installer-2.5.1:latest
- name: Run tests
run: |
docker run \
-v $PWD:/root/workspace \
--workdir=/root/workspace \
--runtime=habana \
-e HABANA_VISIBLE_DEVICES=all \
-e OMPI_MCA_btl_vader_single_copy_mechanism=none \
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.19.0/ubuntu22.04/habanalabs/pytorch-installer-2.5.1:latest \
/bin/bash tests/ci/slow_tests_trl.sh
docker run \
-v $PWD:/root/workspace \
--workdir=/root/workspace \
--runtime=habana \
-e HABANA_VISIBLE_DEVICES=all \
-e OMPI_MCA_btl_vader_single_copy_mechanism=none \
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.19.0/ubuntu22.04/habanalabs/pytorch-installer-2.5.1:latest \
/bin/bash tests/ci/slow_tests_trl.sh
sentence-transformers:
name: Test Sentence Transformers integration
if: ${{ !cancelled() && (success() || failure()) }}
Expand All @@ -247,33 +253,33 @@ jobs:
- single-card
- albert-xxl-single-card
- text-generation
- trl # run the job when the previous test jobs are done
- trl # run the job when the previous test jobs are done
runs-on:
group: aws-dl1-24xlarge
steps:
- name: Checkout Optimum Habana
uses: actions/checkout@v2
with:
repository: 'huggingface/optimum-habana'
repository: "huggingface/optimum-habana"
path: optimum-habana
- name: Checkout Sentence Transformers
uses: actions/checkout@v2
with:
repository: 'UKPLab/sentence-transformers'
repository: "UKPLab/sentence-transformers"
path: sentence-transformers
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.19.0/ubuntu22.04/habanalabs/pytorch-installer-2.5.1:latest
docker pull vault.habana.ai/gaudi-docker/1.19.0/ubuntu22.04/habanalabs/pytorch-installer-2.5.1:latest
- name: Run tests
run: |
docker run \
-v $PWD:/root/workspace \
--workdir=/root/workspace \
--runtime=habana \
-e HABANA_VISIBLE_DEVICES=all \
-e OMPI_MCA_btl_vader_single_copy_mechanism=none \
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.19.0/ubuntu22.04/habanalabs/pytorch-installer-2.5.1:latest \
/bin/bash optimum-habana/tests/ci/sentence_transformers.sh
docker run \
-v $PWD:/root/workspace \
--workdir=/root/workspace \
--runtime=habana \
-e HABANA_VISIBLE_DEVICES=all \
-e OMPI_MCA_btl_vader_single_copy_mechanism=none \
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.19.0/ubuntu22.04/habanalabs/pytorch-installer-2.5.1:latest \
/bin/bash optimum-habana/tests/ci/sentence_transformers.sh
Loading

0 comments on commit e1b32f8

Please sign in to comment.