Skip to content

evaluate-model

evaluate-model #69

name: "W&B Evaluate + Comparison Report"
on:
repository_dispatch:
types: "evaluate-model"
jobs:
test:
name: Evaluate + Report
runs-on: ubuntu-latest
env:
WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
steps:
- name: Get current date
id: date
run: echo "TSTAMP=$(date +'%Y%m%d%H%M%S%N')" >> $GITHUB_ENV
- uses: actions/checkout@v4
- run: 'echo "payload: ${{ toJson(github.event.client_payload) }}"'
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.12.5'
cache: 'pip'
- name: Install Dependencies
run: pip install -r ./requirements.txt
- name: Login to W&B
run: wandb login
- name: Launch Job - candidate
run: wandb launch -e reviewco -j "reviewco/eval-llama-3.1-8b-fine-tune/job-source-eval-llama-3.1-8b-fine-tune-evaluate_model.py:latest" --config '{"overrides":{"args":["${{env.TSTAMP}}", "candidate"]}}' -q "ReviewCo-Lambda-A10"
- name: Launch Job - production
run: wandb launch -e reviewco -j "reviewco/eval-llama-3.1-8b-fine-tune/job-source-eval-llama-3.1-8b-fine-tune-evaluate_model.py:latest" --config '{"overrides":{"args":["${{env.TSTAMP}}", "production"]}}' -q "ReviewCo-Lambda-A10"
- name: Generate Evaluation Report
run: python generate_report.py ${{env.TSTAMP}}
- name: Inference Evaluation Deployment
run: python inference_evaluation.py