Test workflow #2
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Evaluate Trail Guide Agent | |
| on: | |
| # Uncomment the lines below to enable automatic evaluation on pull requests | |
| pull_request: | |
| branches: [main] | |
| paths: | |
| - 'src/agents/trail_guide_agent/**' | |
| workflow_dispatch: | |
| permissions: | |
| contents: read | |
| pull-requests: write | |
| id-token: write | |
| jobs: | |
| evaluate: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Set up Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.11' | |
| cache: 'pip' | |
| - name: Install dependencies | |
| run: | | |
| python -m pip install --upgrade pip | |
| pip install -r requirements.txt | |
| - name: Azure Login | |
| uses: azure/login@v2 | |
| with: | |
| client-id: ${{ secrets.AZURE_CLIENT_ID }} | |
| tenant-id: ${{ secrets.AZURE_TENANT_ID }} | |
| subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }} | |
| - name: Run evaluation | |
| id: run | |
| env: | |
| AZURE_AI_PROJECT_ENDPOINT: ${{ secrets.AZURE_AI_PROJECT_ENDPOINT }} | |
| MODEL_NAME: ${{ vars.MODEL_NAME || 'gpt-4.1' }} | |
| AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }} | |
| AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }} | |
| AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }} | |
| run: | | |
| python src/evaluators/evaluate_agent.py > evaluation_results.txt 2>&1 || true | |
| cat evaluation_results.txt | |
| # Fail the step if the script wrote an error marker | |
| grep -q "Evaluation FAILED" evaluation_results.txt && exit 1 || exit 0 | |
| - name: Comment PR with results | |
| if: github.event_name == 'pull_request' | |
| uses: actions/github-script@v7 | |
| with: | |
| script: | | |
| const fs = require('fs'); | |
| const results = fs.readFileSync('evaluation_results.txt', 'utf8'); | |
| const reportUrl = '${{ steps.run.outputs.report_url }}' || 'Not available'; | |
| const body = `## 🎯 Agent Evaluation Results | |
| Automated evaluation completed for Trail Guide Agent changes. | |
| <details> | |
| <summary>View Detailed Results</summary> | |
| \`\`\` | |
| ${results} | |
| \`\`\` | |
| </details> | |
| 📊 [View full results in Azure AI Foundry Portal](${reportUrl}) | |
| **Evaluation Criteria:** | |
| - Intent Resolution (score ≥ 3) | |
| - Relevance (score ≥ 3) | |
| - Groundedness (score ≥ 3) | |
| `; | |
| github.rest.issues.createComment({ | |
| issue_number: context.issue.number, | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| body: body | |
| }); |