Benchmark All Providers #62
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Benchmark All Providers | |
| on: | |
| push: | |
| branches: [main] | |
| workflow_dispatch: | |
| schedule: | |
| # Run daily at midnight | |
| - cron: '0 0 * * *' | |
| env: | |
| CARGO_TERM_COLOR: always | |
| jobs: | |
| benchmark: | |
| runs-on: ubuntu-latest | |
| permissions: | |
| contents: write | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| token: ${{ secrets.GITHUB_TOKEN }} | |
| - name: Install Rust | |
| uses: dtolnay/rust-toolchain@stable | |
| - name: Build openenvvm | |
| run: | | |
| cargo build --release | |
| sudo cp target/release/openenvvm /usr/local/bin/ | |
| - name: Set up Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.11' | |
| - name: Install sandbox-bench | |
| run: | | |
| pip install git+https://github.com/zkwentz/sandbox-bench.git@feature/openenv-providers | |
| pip install requests | |
| - name: Check KVM availability | |
| id: kvm | |
| run: | | |
| if [ -e /dev/kvm ]; then | |
| echo "available=true" >> $GITHUB_OUTPUT | |
| sudo chmod 666 /dev/kvm | |
| sudo usermod -aG kvm $USER | |
| else | |
| echo "available=false" >> $GITHUB_OUTPUT | |
| fi | |
| - name: Install Firecracker | |
| if: steps.kvm.outputs.available == 'true' | |
| run: | | |
| ARCH=$(uname -m) | |
| curl -L https://github.com/firecracker-microvm/firecracker/releases/download/v1.6.0/firecracker-v1.6.0-${ARCH}.tgz | tar xz | |
| sudo mv release-v1.6.0-${ARCH}/firecracker-v1.6.0-${ARCH} /usr/local/bin/firecracker | |
| firecracker --version | |
| - name: Checkout OpenEnv | |
| uses: actions/checkout@v4 | |
| with: | |
| repository: meta-pytorch/OpenEnv | |
| path: OpenEnv | |
| - name: Build echo_env Docker image | |
| run: | | |
| cd OpenEnv/envs/echo_env | |
| if [ ! -f Dockerfile ]; then | |
| cat > Dockerfile << 'EOF' | |
| FROM python:3.11-slim | |
| WORKDIR /app | |
| COPY . /app/env | |
| RUN pip install fastapi uvicorn | |
| RUN if [ -f /app/env/server/requirements.txt ]; then pip install -r /app/env/server/requirements.txt; fi | |
| EXPOSE 8000 | |
| CMD ["uvicorn", "env.server.app:app", "--host", "0.0.0.0", "--port", "8000"] | |
| EOF | |
| fi | |
| docker build -t echo_env:latest . | |
| - name: Convert echo_env to MicroVM | |
| if: steps.kvm.outputs.available == 'true' | |
| run: | | |
| openenvvm convert ./OpenEnv/envs/echo_env -o echo_env.microvm | |
| ls -la echo_env.microvm/ | |
| - name: Run sandbox-bench - All providers | |
| id: benchmark | |
| continue-on-error: true | |
| env: | |
| E2B_API_KEY: ${{ secrets.E2B_API_KEY }} | |
| DAYTONA_API_KEY: ${{ secrets.DAYTONA_API_KEY }} | |
| MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }} | |
| MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }} | |
| DOCKER_IMAGE: echo_env:latest | |
| MICROVM_COMMAND: "openenvvm run ./echo_env.microvm --port {port} --ip {ip}" | |
| run: | | |
| # Build provider list based on available credentials | |
| PROVIDERS="" | |
| # Always test local providers | |
| PROVIDERS="--provider docker-image" | |
| if [ "${{ steps.kvm.outputs.available }}" == "true" ]; then | |
| PROVIDERS="$PROVIDERS --provider microvm" | |
| fi | |
| # Add cloud providers if keys available | |
| if [ -n "$E2B_API_KEY" ]; then | |
| PROVIDERS="$PROVIDERS --provider e2b" | |
| fi | |
| if [ -n "$DAYTONA_API_KEY" ]; then | |
| PROVIDERS="$PROVIDERS --provider daytona" | |
| fi | |
| if [ -n "$MODAL_TOKEN_ID" ]; then | |
| PROVIDERS="$PROVIDERS --provider modal" | |
| fi | |
| echo "Running: sandbox-bench run $PROVIDERS --runs 3" | |
| sandbox-bench run $PROVIDERS --runs 3 --output benchmark-results.json | |
| if [ -f benchmark-results.json ]; then | |
| cat benchmark-results.json | |
| fi | |
| - name: Update README with results | |
| id: results | |
| if: github.ref == 'refs/heads/main' && github.event_name != 'pull_request' | |
| run: python3 scripts/update_readme.py benchmark-results.json | |
| - name: Commit and push README updates | |
| if: github.ref == 'refs/heads/main' && github.event_name != 'pull_request' | |
| run: | | |
| git config user.name "github-actions[bot]" | |
| git config user.email "github-actions[bot]@users.noreply.github.com" | |
| git add README.md | |
| if git diff --staged --quiet; then | |
| echo "No changes to commit" | |
| else | |
| git commit -m "Update benchmark results in README [skip ci]" | |
| git push | |
| fi | |
| - name: Upload benchmark results | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: benchmark-results | |
| path: benchmark-results.json | |
| retention-days: 90 | |
| - name: Generate GitHub Summary | |
| if: always() | |
| run: | | |
| echo "## Benchmark Results" >> $GITHUB_STEP_SUMMARY | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| if [ -f benchmark-results.json ]; then | |
| echo '```json' >> $GITHUB_STEP_SUMMARY | |
| cat benchmark-results.json >> $GITHUB_STEP_SUMMARY | |
| echo '```' >> $GITHUB_STEP_SUMMARY | |
| else | |
| echo "No benchmark results generated" >> $GITHUB_STEP_SUMMARY | |
| fi |