From bcf09573a7c2d823a15e08e3b449f113f792e5c9 Mon Sep 17 00:00:00 2001 From: archana-ramalingam Date: Tue, 22 Oct 2024 12:11:34 -0500 Subject: [PATCH 1/4] Update default CI timeout from 6 to 10 hrs --- .github/workflows/ci_eval.yaml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/.github/workflows/ci_eval.yaml b/.github/workflows/ci_eval.yaml index d3681d95a..9181d5b72 100644 --- a/.github/workflows/ci_eval.yaml +++ b/.github/workflows/ci_eval.yaml @@ -16,6 +16,7 @@ concurrency: jobs: test_perplexity: + timeout-minutes: 600 name: "Evaluation Tests - perplexity" strategy: matrix: @@ -59,9 +60,3 @@ jobs: - name: Run perplexity test run: pytest sharktank/tests/evaluate/perplexity_test.py --longrun - - - name: Update Perplexity baseline numbers - uses: actions/upload-artifact@v4 - with: - name: current_perplexity_scores_json - path: ${{ env.SHARK_PLATFORM_REPO_ROOT }}/sharktank/sharktank/evaluate/ From ed97ab82a31e123edc4fa781fe2c840427ec5d82 Mon Sep 17 00:00:00 2001 From: archana-ramalingam Date: Wed, 23 Oct 2024 12:21:15 -0500 Subject: [PATCH 2/4] Update from 10 hrs to 20 hrs --- .github/workflows/ci_eval.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci_eval.yaml b/.github/workflows/ci_eval.yaml index 9181d5b72..4609bffc6 100644 --- a/.github/workflows/ci_eval.yaml +++ b/.github/workflows/ci_eval.yaml @@ -16,7 +16,7 @@ concurrency: jobs: test_perplexity: - timeout-minutes: 600 + timeout-minutes: 1200 name: "Evaluation Tests - perplexity" strategy: matrix: From 6b5274aad186422ff06c4a523f8e86206a58399d Mon Sep 17 00:00:00 2001 From: archana-ramalingam Date: Wed, 23 Oct 2024 13:18:54 -0500 Subject: [PATCH 3/4] Test CI --- .github/workflows/ci_eval.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci_eval.yaml b/.github/workflows/ci_eval.yaml index 4609bffc6..c14b59b9e 100644 --- a/.github/workflows/ci_eval.yaml +++ b/.github/workflows/ci_eval.yaml @@ -1,6 +1,7 @@ name: Evaluation Tests on: + pull_request: workflow_dispatch: schedule: # Weekdays nightly at 07:00 UTC = 23:00 PST / 00:00 PDT. From 3bac7ebc07168b157d2bbd77d4047a07dcfcbc7c Mon Sep 17 00:00:00 2001 From: archana-ramalingam Date: Thu, 24 Oct 2024 22:11:15 -0500 Subject: [PATCH 4/4] Add flags for CI --- .github/workflows/ci_eval.yaml | 5 ++--- sharktank/sharktank/evaluate/perplexity.py | 1 + 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci_eval.yaml b/.github/workflows/ci_eval.yaml index c14b59b9e..7288ed8ac 100644 --- a/.github/workflows/ci_eval.yaml +++ b/.github/workflows/ci_eval.yaml @@ -1,7 +1,6 @@ name: Evaluation Tests on: - pull_request: workflow_dispatch: schedule: # Weekdays nightly at 07:00 UTC = 23:00 PST / 00:00 PDT. @@ -17,7 +16,7 @@ concurrency: jobs: test_perplexity: - timeout-minutes: 1200 + timeout-minutes: 1000 name: "Evaluation Tests - perplexity" strategy: matrix: @@ -60,4 +59,4 @@ jobs: pip install --no-compile -r requirements.txt -r sharktank/requirements-tests.txt -e sharktank/ - name: Run perplexity test - run: pytest sharktank/tests/evaluate/perplexity_test.py --longrun + run: pytest -n 4 -v -s sharktank/tests/evaluate/perplexity_test.py --longrun diff --git a/sharktank/sharktank/evaluate/perplexity.py b/sharktank/sharktank/evaluate/perplexity.py index 2c76a76ad..aa9d35dcc 100644 --- a/sharktank/sharktank/evaluate/perplexity.py +++ b/sharktank/sharktank/evaluate/perplexity.py @@ -177,6 +177,7 @@ def get_logits(self): start = 0 for i in tqdm( range(start, self.max_prompt_length - 1), + mininterval=300, desc="eval: Calculating logits", ): logger.debug(f"Iteration: {i}")