From 1a841908c54d659b5d5a31c9e0d42cb7a2acb4a9 Mon Sep 17 00:00:00 2001 From: Algis Dumbris Date: Mon, 22 Jun 2026 09:52:19 +0300 Subject: [PATCH] fix(scanner): arm64 ramparts + trivy vuln-DB pre-cache (MCP-2150) ramparts (GLIBC_2.39): - Split CI matrix into per-arch native builds: ubuntu-latest for amd64, ubuntu-24.04-arm for arm64. Eliminates QEMU overhead that caused arm64 cargo-install to time out (MCP-2395), which left a stale arm64 manifest in GHCR linked against glibc 2.39 (trixie-era rust:1-slim). - Add merge-ramparts job: assembles the multi-arch :latest manifest from :latest-amd64 + :latest-arm64 using docker buildx imagetools create. - Dockerfile already has the bookworm pin (PR #665); no Dockerfile change needed. trivy-mcp (vuln-DB download timeout): - Add docker/scanners/trivy/Dockerfile: wraps ghcr.io/aquasecurity/trivy:latest and pre-caches the ~96 MiB vuln DB at build time via trivy image --download-db-only. TRIVY_CACHE_DIR=/trivy-cache baked in so runtime reads the same path without --cache-dir flags. - Update registry_bundled.go: DockerImage -> ghcr.io/smart-mcp-proxy/scanner-trivy, Timeout 300s -> 120s (no download needed for fs scans), update NetworkReq comment. - Add trivy to scanner-images.yml matrix (linux/amd64,linux/arm64). - Add weekly cron schedule (Monday 03:00 UTC) so the pre-cached DB stays current. Co-Authored-By: Paperclip --- .github/workflows/scanner-images.yml | 123 ++++++++++++++---- docker/scanners/trivy/Dockerfile | 30 +++++ internal/security/scanner/registry_bundled.go | 9 +- 3 files changed, 133 insertions(+), 29 deletions(-) create mode 100644 docker/scanners/trivy/Dockerfile diff --git a/.github/workflows/scanner-images.yml b/.github/workflows/scanner-images.yml index 72f7ecd4..fddc36e8 100644 --- a/.github/workflows/scanner-images.yml +++ b/.github/workflows/scanner-images.yml @@ -1,11 +1,13 @@ name: scanner-images # Build and publish the custom security scanner Docker images to -# ghcr.io/smart-mcp-proxy/scanner-*. Vendor images (trivy, semgrep, the -# AI scanner which lives in a separate repo) are NOT built here. +# ghcr.io/smart-mcp-proxy/scanner-*. Vendor images (semgrep, the AI scanner +# which lives in a separate repo) are NOT built here. trivy IS wrapped in our +# own image (docker/scanners/trivy) to pre-cache the vuln DB (MCP-2150). # # Triggers: # - push to main that touches docker/scanners/** or this workflow file +# - weekly schedule: rebuilds trivy to pick up a fresh vulnerability DB # - manual dispatch (for ad-hoc rebuilds) # - pull requests: builds without pushing (to catch Dockerfile drift) @@ -19,6 +21,9 @@ on: paths: - 'docker/scanners/**' - '.github/workflows/scanner-images.yml' + schedule: + # Weekly Monday 03:00 UTC — keeps the trivy vuln DB reasonably current. + - cron: '0 3 * * 1' workflow_dispatch: inputs: scanner: @@ -33,40 +38,62 @@ permissions: jobs: build: - runs-on: ubuntu-latest + # Most scanners use ubuntu-latest (amd64) with QEMU for multi-arch builds. + # ramparts uses per-arch native runners to avoid QEMU timeout on the Rust + # `cargo install` step: `matrix.runner` selects the runner per entry. + runs-on: ${{ matrix.runner }} strategy: fail-fast: false matrix: - # `platforms` defaults to multi-arch. ramparts overrides to linux/amd64 - # only: it is the lone Rust-from-source wrapper, and `cargo install - # ramparts` for an emulated linux/arm64 leg runs under QEMU (~5-10x - # slower). A cold build of both arches blows past the runner budget and - # the job gets cancelled mid-compile (MCP-2395). amd64-only finishes in - # time; the binary is consumed on the host arch by Docker isolation. + # `arch` is set only for ramparts per-arch entries. Entries without + # `arch` push a combined multi-arch :latest directly; per-arch entries + # push :- / :latest- tags and the merge-ramparts job + # assembles the final multi-arch :latest manifest. include: - id: snyk + runner: ubuntu-latest image: ghcr.io/smart-mcp-proxy/scanner-snyk context: docker/scanners/snyk platforms: linux/amd64,linux/arm64 - id: cisco + runner: ubuntu-latest image: ghcr.io/smart-mcp-proxy/scanner-cisco context: docker/scanners/cisco platforms: linux/amd64,linux/arm64 - - id: ramparts - image: ghcr.io/smart-mcp-proxy/scanner-ramparts - context: docker/scanners/ramparts - platforms: linux/amd64 + - id: trivy + runner: ubuntu-latest + image: ghcr.io/smart-mcp-proxy/scanner-trivy + context: docker/scanners/trivy + platforms: linux/amd64,linux/arm64 - id: proximity + runner: ubuntu-latest image: ghcr.io/smart-mcp-proxy/scanner-proximity context: docker/scanners/proximity platforms: linux/amd64,linux/arm64 + # ramparts: native per-arch builds (MCP-2150). + # Using QEMU to emulate arm64 on an amd64 runner blows past the job + # budget (~5-10x slower for `cargo install`; MCP-2395). Instead each + # arch builds on its native runner and the merge-ramparts job combines + # them into a multi-arch :latest manifest. + - id: ramparts + arch: amd64 + runner: ubuntu-latest + image: ghcr.io/smart-mcp-proxy/scanner-ramparts + context: docker/scanners/ramparts + platforms: linux/amd64 + - id: ramparts + arch: arm64 + runner: ubuntu-24.04-arm + image: ghcr.io/smart-mcp-proxy/scanner-ramparts + context: docker/scanners/ramparts + platforms: linux/arm64 steps: - name: Checkout uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 - name: Skip if not selected if: ${{ github.event_name == 'workflow_dispatch' && inputs.scanner != '' && inputs.scanner != matrix.id }} - run: echo "Skipping ${{ matrix.id }} (workflow_dispatch selected ${{ inputs.scanner }})" && exit 0 + run: echo "Skipping ${{ matrix.id }}${{ matrix.arch && format('-{0}', matrix.arch) || '' }} (workflow_dispatch selected ${{ inputs.scanner }})" && exit 0 - name: Set up QEMU uses: docker/setup-qemu-action@ce360397dd3f832beb865e1373c09c0e9f86d70a # v4.0.0 @@ -88,10 +115,22 @@ jobs: set -euo pipefail short_sha="${GITHUB_SHA:0:7}" echo "short_sha=$short_sha" >> "$GITHUB_OUTPUT" - if [ "${GITHUB_REF}" = "refs/heads/main" ]; then - echo "tags=${{ matrix.image }}:latest,${{ matrix.image }}:$short_sha" >> "$GITHUB_OUTPUT" + arch="${{ matrix.arch }}" + if [ -n "$arch" ]; then + # Per-arch build (ramparts): push arch-suffixed tags only. + # The merge-ramparts job assembles :latest and : from these. + if [ "${GITHUB_REF}" = "refs/heads/main" ]; then + echo "tags=${{ matrix.image }}:latest-${arch},${{ matrix.image }}:${short_sha}-${arch}" >> "$GITHUB_OUTPUT" + else + echo "tags=${{ matrix.image }}:pr-${{ github.event.pull_request.number || 'dev' }}-${arch}" >> "$GITHUB_OUTPUT" + fi else - echo "tags=${{ matrix.image }}:pr-${{ github.event.pull_request.number || 'dev' }}" >> "$GITHUB_OUTPUT" + # Multi-arch build: push :latest and : directly. + if [ "${GITHUB_REF}" = "refs/heads/main" ]; then + echo "tags=${{ matrix.image }}:latest,${{ matrix.image }}:$short_sha" >> "$GITHUB_OUTPUT" + else + echo "tags=${{ matrix.image }}:pr-${{ github.event.pull_request.number || 'dev' }}" >> "$GITHUB_OUTPUT" + fi fi - name: Build and push @@ -101,8 +140,8 @@ jobs: platforms: ${{ matrix.platforms }} push: ${{ github.event_name != 'pull_request' }} tags: ${{ steps.tags.outputs.tags }} - cache-from: type=gha,scope=scanner-${{ matrix.id }} - cache-to: type=gha,scope=scanner-${{ matrix.id }},mode=max + cache-from: type=gha,scope=scanner-${{ matrix.id }}${{ matrix.arch && format('-{0}', matrix.arch) || '' }} + cache-to: type=gha,scope=scanner-${{ matrix.id }}${{ matrix.arch && format('-{0}', matrix.arch) || '' }},mode=max labels: | org.opencontainers.image.source=https://github.com/smart-mcp-proxy/mcpproxy-go org.opencontainers.image.revision=${{ github.sha }} @@ -111,12 +150,12 @@ jobs: # multi-platform builds (linux/amd64,linux/arm64) cannot be loaded to the runner with # `load: true`, so there is no local image to scan on PRs. On push the image is # already in GHCR and Trivy pulls it directly. - # exit-code: '0' → report-only, never fails the build. Scanner base images routinely - # carry unfixable CVEs (upstream OS packages); blocking builds on those would create - # constant noise with no actionable remediation. Visibility via the Security tab and - # the workflow log is the goal. + # Skipped for per-arch ramparts builds (matrix.arch set): those entries push + # arch-suffixed tags that get merged by merge-ramparts; the merged :latest is + # what should be scanned, not the intermediate arch-specific tags. + # exit-code: '0' → report-only, never fails the build. - name: Scan image with Trivy - if: github.event_name != 'pull_request' + if: github.event_name != 'pull_request' && !matrix.arch uses: aquasecurity/trivy-action@ed142fd0673e97e23eac54620cfb913e5ce36c25 # v0.36.0 with: image-ref: ${{ matrix.image }}:${{ steps.tags.outputs.short_sha }} @@ -127,7 +166,7 @@ jobs: format: table - name: Upload Trivy SARIF to Security tab - if: github.event_name != 'pull_request' + if: github.event_name != 'pull_request' && !matrix.arch uses: aquasecurity/trivy-action@ed142fd0673e97e23eac54620cfb913e5ce36c25 # v0.36.0 with: image-ref: ${{ matrix.image }}:${{ steps.tags.outputs.short_sha }} @@ -139,8 +178,40 @@ jobs: output: trivy-results-${{ matrix.id }}.sarif - name: Upload SARIF - if: github.event_name != 'pull_request' + if: github.event_name != 'pull_request' && !matrix.arch uses: github/codeql-action/upload-sarif@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4.36.2 with: sarif_file: trivy-results-${{ matrix.id }}.sarif category: trivy-${{ matrix.id }} + + # Assemble the multi-arch ramparts manifest from the two per-arch images + # pushed by the build job. Runs only on push to main and workflow_dispatch + # (not pull_request, since per-arch images aren't pushed on PRs). + merge-ramparts: + needs: build + if: github.event_name != 'pull_request' + runs-on: ubuntu-latest + steps: + - name: Log in to GHCR + uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up Buildx + uses: docker/setup-buildx-action@d7f5e7f509e45cec5c76c4d5afdd7de93d0b3df5 # v4.1.0 + + - name: Create multi-arch :latest manifest + run: | + docker buildx imagetools create \ + -t ghcr.io/smart-mcp-proxy/scanner-ramparts:latest \ + ghcr.io/smart-mcp-proxy/scanner-ramparts:latest-amd64 \ + ghcr.io/smart-mcp-proxy/scanner-ramparts:latest-arm64 + + - name: Create multi-arch SHA-tagged manifest + run: | + docker buildx imagetools create \ + -t ghcr.io/smart-mcp-proxy/scanner-ramparts:${GITHUB_SHA:0:7} \ + ghcr.io/smart-mcp-proxy/scanner-ramparts:${GITHUB_SHA:0:7}-amd64 \ + ghcr.io/smart-mcp-proxy/scanner-ramparts:${GITHUB_SHA:0:7}-arm64 diff --git a/docker/scanners/trivy/Dockerfile b/docker/scanners/trivy/Dockerfile new file mode 100644 index 00000000..ceb08097 --- /dev/null +++ b/docker/scanners/trivy/Dockerfile @@ -0,0 +1,30 @@ +# Trivy MCP Scanner — wraps the upstream Aqua image with a pre-cached +# vulnerability database so scans don't race the download timeout on first run. +# +# Upstream: https://github.com/aquasecurity/trivy +# Base: ghcr.io/aquasecurity/trivy:latest +# +# Published as: ghcr.io/smart-mcp-proxy/scanner-trivy:latest +# Rebuilt weekly via scanner-images.yml schedule to keep the DB current. +# +# Why a custom wrapper (MCP-2150): +# The upstream image downloads its ~96 MiB vuln DB on first scan. On slow +# connections this races the per-scanner timeout and causes exit 1 with no +# output. Baking the DB into the image eliminates the download entirely for +# filesystem scans (trivy fs) and avoids the race for image scans too. +# +# Runtime notes: +# TRIVY_CACHE_DIR is set here so the trivy binary picks up the baked-in DB +# at the same path without needing --cache-dir flags in the scan command. +# NetworkReq is still required for `trivy image` scanning (pulling remote +# container images from registries) but NOT for `trivy fs` (vuln DB is local). +FROM ghcr.io/aquasecurity/trivy:latest +LABEL org.opencontainers.image.source="https://github.com/smart-mcp-proxy/mcpproxy-go" +LABEL org.opencontainers.image.description="Trivy vulnerability scanner with pre-cached vuln DB for MCPProxy" +LABEL org.opencontainers.image.licenses="Apache-2.0" + +# Pre-cache the vulnerability database at image build time. +# --download-db-only: fetch the DB and exit, no scan performed (trivy 0.34+). +# --no-progress: suppress the progress bar so build logs are clean. +ENV TRIVY_CACHE_DIR=/trivy-cache +RUN trivy --cache-dir /trivy-cache image --download-db-only --no-progress diff --git a/internal/security/scanner/registry_bundled.go b/internal/security/scanner/registry_bundled.go index 52da5900..1bf7505c 100644 --- a/internal/security/scanner/registry_bundled.go +++ b/internal/security/scanner/registry_bundled.go @@ -152,7 +152,10 @@ var bundledScanners = []*ScannerPlugin{ Description: "Comprehensive vulnerability scanner for filesystem, dependencies, and container images. Detects known CVEs and misconfigurations.", License: "Apache-2.0", Homepage: "https://trivy.dev", - DockerImage: "ghcr.io/aquasecurity/trivy:latest", + // Our wrapper pre-caches the vuln DB at image build time (MCP-2150), + // eliminating the ~96 MiB first-run download that raced the timeout. + // Rebuilt weekly via scanner-images.yml to keep the DB current. + DockerImage: "ghcr.io/smart-mcp-proxy/scanner-trivy:latest", Inputs: []string{"source", "container_image"}, Outputs: []string{"sarif"}, RequiredEnv: nil, @@ -163,7 +166,7 @@ var bundledScanners = []*ScannerPlugin{ // daemon/containerd/podman and falls back to pulling from the remote // registry (network is enabled), so no docker socket mount is required. ImageCommand: []string{"image", "--format", "sarif", "{{IMAGE}}"}, - Timeout: "300s", // First run downloads vuln DB (~90MB) - NetworkReq: true, // Needs to download vulnerability database + Timeout: "120s", // DB is pre-cached; network only needed for image pulls + NetworkReq: true, // Required for `trivy image` pulling remote container images }, }