-
Notifications
You must be signed in to change notification settings - Fork 17
91 lines (74 loc) · 2.8 KB
/
evaluate-agent.yml
File metadata and controls
91 lines (74 loc) · 2.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
name: Evaluate Trail Guide Agent
on:
# Uncomment the lines below to enable automatic evaluation on pull requests
# pull_request:
# branches: [main]
# paths:
# - 'src/agents/trail_guide_agent/**'
workflow_dispatch:
permissions:
contents: read
pull-requests: write
id-token: write
jobs:
evaluate:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.11'
cache: 'pip'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
- name: Azure Login
uses: azure/login@v2
with:
client-id: ${{ secrets.AZURE_CLIENT_ID }}
tenant-id: ${{ secrets.AZURE_TENANT_ID }}
subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
- name: Run evaluation
id: run
env:
AZURE_AI_PROJECT_ENDPOINT: ${{ secrets.AZURE_AI_PROJECT_ENDPOINT }}
MODEL_NAME: ${{ vars.MODEL_NAME || 'gpt-4.1' }}
AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }}
AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }}
AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
run: |
python src/evaluators/evaluate_agent.py > evaluation_results.txt 2>&1 || true
cat evaluation_results.txt
# Fail the step if the script wrote an error marker
grep -q "Evaluation FAILED" evaluation_results.txt && exit 1 || exit 0
- name: Comment PR with results
if: github.event_name == 'pull_request'
uses: actions/github-script@v7
with:
script: |
const fs = require('fs');
const results = fs.readFileSync('evaluation_results.txt', 'utf8');
const reportUrl = '${{ steps.run.outputs.report_url }}' || 'Not available';
const body = `## 🎯 Agent Evaluation Results
Automated evaluation completed for Trail Guide Agent changes.
<details>
<summary>View Detailed Results</summary>
\`\`\`
${results}
\`\`\`
</details>
📊 [View full results in Azure AI Foundry Portal](${reportUrl})
**Evaluation Criteria:**
- Intent Resolution (score ≥ 3)
- Relevance (score ≥ 3)
- Groundedness (score ≥ 3)
`;
github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: body
});