Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 97 additions & 26 deletions .github/workflows/scanner-images.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
name: scanner-images

# Build and publish the custom security scanner Docker images to
# ghcr.io/smart-mcp-proxy/scanner-*. Vendor images (trivy, semgrep, the
# AI scanner which lives in a separate repo) are NOT built here.
# ghcr.io/smart-mcp-proxy/scanner-*. Vendor images (semgrep, the AI scanner
# which lives in a separate repo) are NOT built here. trivy IS wrapped in our
# own image (docker/scanners/trivy) to pre-cache the vuln DB (MCP-2150).
#
# Triggers:
# - push to main that touches docker/scanners/** or this workflow file
# - weekly schedule: rebuilds trivy to pick up a fresh vulnerability DB
# - manual dispatch (for ad-hoc rebuilds)
# - pull requests: builds without pushing (to catch Dockerfile drift)

Expand All @@ -19,6 +21,9 @@ on:
paths:
- 'docker/scanners/**'
- '.github/workflows/scanner-images.yml'
schedule:
# Weekly Monday 03:00 UTC — keeps the trivy vuln DB reasonably current.
- cron: '0 3 * * 1'
workflow_dispatch:
inputs:
scanner:
Expand All @@ -33,40 +38,62 @@ permissions:

jobs:
build:
runs-on: ubuntu-latest
# Most scanners use ubuntu-latest (amd64) with QEMU for multi-arch builds.
# ramparts uses per-arch native runners to avoid QEMU timeout on the Rust
# `cargo install` step: `matrix.runner` selects the runner per entry.
runs-on: ${{ matrix.runner }}
strategy:
fail-fast: false
matrix:
# `platforms` defaults to multi-arch. ramparts overrides to linux/amd64
# only: it is the lone Rust-from-source wrapper, and `cargo install
# ramparts` for an emulated linux/arm64 leg runs under QEMU (~5-10x
# slower). A cold build of both arches blows past the runner budget and
# the job gets cancelled mid-compile (MCP-2395). amd64-only finishes in
# time; the binary is consumed on the host arch by Docker isolation.
# `arch` is set only for ramparts per-arch entries. Entries without
# `arch` push a combined multi-arch :latest directly; per-arch entries
# push :<sha>-<arch> / :latest-<arch> tags and the merge-ramparts job
# assembles the final multi-arch :latest manifest.
include:
- id: snyk
runner: ubuntu-latest
image: ghcr.io/smart-mcp-proxy/scanner-snyk
context: docker/scanners/snyk
platforms: linux/amd64,linux/arm64
- id: cisco
runner: ubuntu-latest
image: ghcr.io/smart-mcp-proxy/scanner-cisco
context: docker/scanners/cisco
platforms: linux/amd64,linux/arm64
- id: ramparts
image: ghcr.io/smart-mcp-proxy/scanner-ramparts
context: docker/scanners/ramparts
platforms: linux/amd64
- id: trivy
runner: ubuntu-latest
image: ghcr.io/smart-mcp-proxy/scanner-trivy
context: docker/scanners/trivy
platforms: linux/amd64,linux/arm64
- id: proximity
runner: ubuntu-latest
image: ghcr.io/smart-mcp-proxy/scanner-proximity
context: docker/scanners/proximity
platforms: linux/amd64,linux/arm64
# ramparts: native per-arch builds (MCP-2150).
# Using QEMU to emulate arm64 on an amd64 runner blows past the job
# budget (~5-10x slower for `cargo install`; MCP-2395). Instead each
# arch builds on its native runner and the merge-ramparts job combines
# them into a multi-arch :latest manifest.
- id: ramparts
arch: amd64
runner: ubuntu-latest
image: ghcr.io/smart-mcp-proxy/scanner-ramparts
context: docker/scanners/ramparts
platforms: linux/amd64
- id: ramparts
arch: arm64
runner: ubuntu-24.04-arm
image: ghcr.io/smart-mcp-proxy/scanner-ramparts
context: docker/scanners/ramparts
platforms: linux/arm64
steps:
- name: Checkout
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3

- name: Skip if not selected
if: ${{ github.event_name == 'workflow_dispatch' && inputs.scanner != '' && inputs.scanner != matrix.id }}
run: echo "Skipping ${{ matrix.id }} (workflow_dispatch selected ${{ inputs.scanner }})" && exit 0
run: echo "Skipping ${{ matrix.id }}${{ matrix.arch && format('-{0}', matrix.arch) || '' }} (workflow_dispatch selected ${{ inputs.scanner }})" && exit 0

- name: Set up QEMU
uses: docker/setup-qemu-action@ce360397dd3f832beb865e1373c09c0e9f86d70a # v4.0.0
Expand All @@ -88,10 +115,22 @@ jobs:
set -euo pipefail
short_sha="${GITHUB_SHA:0:7}"
echo "short_sha=$short_sha" >> "$GITHUB_OUTPUT"
if [ "${GITHUB_REF}" = "refs/heads/main" ]; then
echo "tags=${{ matrix.image }}:latest,${{ matrix.image }}:$short_sha" >> "$GITHUB_OUTPUT"
arch="${{ matrix.arch }}"
if [ -n "$arch" ]; then
# Per-arch build (ramparts): push arch-suffixed tags only.
# The merge-ramparts job assembles :latest and :<sha> from these.
if [ "${GITHUB_REF}" = "refs/heads/main" ]; then
echo "tags=${{ matrix.image }}:latest-${arch},${{ matrix.image }}:${short_sha}-${arch}" >> "$GITHUB_OUTPUT"
else
echo "tags=${{ matrix.image }}:pr-${{ github.event.pull_request.number || 'dev' }}-${arch}" >> "$GITHUB_OUTPUT"
fi
else
echo "tags=${{ matrix.image }}:pr-${{ github.event.pull_request.number || 'dev' }}" >> "$GITHUB_OUTPUT"
# Multi-arch build: push :latest and :<sha> directly.
if [ "${GITHUB_REF}" = "refs/heads/main" ]; then
echo "tags=${{ matrix.image }}:latest,${{ matrix.image }}:$short_sha" >> "$GITHUB_OUTPUT"
else
echo "tags=${{ matrix.image }}:pr-${{ github.event.pull_request.number || 'dev' }}" >> "$GITHUB_OUTPUT"
fi
fi

- name: Build and push
Expand All @@ -101,8 +140,8 @@ jobs:
platforms: ${{ matrix.platforms }}
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.tags.outputs.tags }}
cache-from: type=gha,scope=scanner-${{ matrix.id }}
cache-to: type=gha,scope=scanner-${{ matrix.id }},mode=max
cache-from: type=gha,scope=scanner-${{ matrix.id }}${{ matrix.arch && format('-{0}', matrix.arch) || '' }}
cache-to: type=gha,scope=scanner-${{ matrix.id }}${{ matrix.arch && format('-{0}', matrix.arch) || '' }},mode=max
labels: |
org.opencontainers.image.source=https://github.com/smart-mcp-proxy/mcpproxy-go
org.opencontainers.image.revision=${{ github.sha }}
Expand All @@ -111,12 +150,12 @@ jobs:
# multi-platform builds (linux/amd64,linux/arm64) cannot be loaded to the runner with
# `load: true`, so there is no local image to scan on PRs. On push the image is
# already in GHCR and Trivy pulls it directly.
# exit-code: '0' → report-only, never fails the build. Scanner base images routinely
# carry unfixable CVEs (upstream OS packages); blocking builds on those would create
# constant noise with no actionable remediation. Visibility via the Security tab and
# the workflow log is the goal.
# Skipped for per-arch ramparts builds (matrix.arch set): those entries push
# arch-suffixed tags that get merged by merge-ramparts; the merged :latest is
# what should be scanned, not the intermediate arch-specific tags.
# exit-code: '0' → report-only, never fails the build.
- name: Scan image with Trivy
if: github.event_name != 'pull_request'
if: github.event_name != 'pull_request' && !matrix.arch
uses: aquasecurity/trivy-action@ed142fd0673e97e23eac54620cfb913e5ce36c25 # v0.36.0
with:
image-ref: ${{ matrix.image }}:${{ steps.tags.outputs.short_sha }}
Expand All @@ -127,7 +166,7 @@ jobs:
format: table

- name: Upload Trivy SARIF to Security tab
if: github.event_name != 'pull_request'
if: github.event_name != 'pull_request' && !matrix.arch
uses: aquasecurity/trivy-action@ed142fd0673e97e23eac54620cfb913e5ce36c25 # v0.36.0
with:
image-ref: ${{ matrix.image }}:${{ steps.tags.outputs.short_sha }}
Expand All @@ -139,8 +178,40 @@ jobs:
output: trivy-results-${{ matrix.id }}.sarif

- name: Upload SARIF
if: github.event_name != 'pull_request'
if: github.event_name != 'pull_request' && !matrix.arch
uses: github/codeql-action/upload-sarif@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4.36.2
with:
sarif_file: trivy-results-${{ matrix.id }}.sarif
category: trivy-${{ matrix.id }}

# Assemble the multi-arch ramparts manifest from the two per-arch images
# pushed by the build job. Runs only on push to main and workflow_dispatch
# (not pull_request, since per-arch images aren't pushed on PRs).
merge-ramparts:
needs: build
if: github.event_name != 'pull_request'
runs-on: ubuntu-latest
steps:
- name: Log in to GHCR
uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Set up Buildx
uses: docker/setup-buildx-action@d7f5e7f509e45cec5c76c4d5afdd7de93d0b3df5 # v4.1.0

- name: Create multi-arch :latest manifest
run: |
docker buildx imagetools create \
-t ghcr.io/smart-mcp-proxy/scanner-ramparts:latest \
ghcr.io/smart-mcp-proxy/scanner-ramparts:latest-amd64 \
ghcr.io/smart-mcp-proxy/scanner-ramparts:latest-arm64

- name: Create multi-arch SHA-tagged manifest
run: |
docker buildx imagetools create \
-t ghcr.io/smart-mcp-proxy/scanner-ramparts:${GITHUB_SHA:0:7} \
ghcr.io/smart-mcp-proxy/scanner-ramparts:${GITHUB_SHA:0:7}-amd64 \
ghcr.io/smart-mcp-proxy/scanner-ramparts:${GITHUB_SHA:0:7}-arm64
30 changes: 30 additions & 0 deletions docker/scanners/trivy/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Trivy MCP Scanner — wraps the upstream Aqua image with a pre-cached
# vulnerability database so scans don't race the download timeout on first run.
#
# Upstream: https://github.com/aquasecurity/trivy
# Base: ghcr.io/aquasecurity/trivy:latest
#
# Published as: ghcr.io/smart-mcp-proxy/scanner-trivy:latest
# Rebuilt weekly via scanner-images.yml schedule to keep the DB current.
#
# Why a custom wrapper (MCP-2150):
# The upstream image downloads its ~96 MiB vuln DB on first scan. On slow
# connections this races the per-scanner timeout and causes exit 1 with no
# output. Baking the DB into the image eliminates the download entirely for
# filesystem scans (trivy fs) and avoids the race for image scans too.
#
# Runtime notes:
# TRIVY_CACHE_DIR is set here so the trivy binary picks up the baked-in DB
# at the same path without needing --cache-dir flags in the scan command.
# NetworkReq is still required for `trivy image` scanning (pulling remote
# container images from registries) but NOT for `trivy fs` (vuln DB is local).
FROM ghcr.io/aquasecurity/trivy:latest
LABEL org.opencontainers.image.source="https://github.com/smart-mcp-proxy/mcpproxy-go"
LABEL org.opencontainers.image.description="Trivy vulnerability scanner with pre-cached vuln DB for MCPProxy"
LABEL org.opencontainers.image.licenses="Apache-2.0"

# Pre-cache the vulnerability database at image build time.
# --download-db-only: fetch the DB and exit, no scan performed (trivy 0.34+).
# --no-progress: suppress the progress bar so build logs are clean.
ENV TRIVY_CACHE_DIR=/trivy-cache
RUN trivy --cache-dir /trivy-cache image --download-db-only --no-progress
9 changes: 6 additions & 3 deletions internal/security/scanner/registry_bundled.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,10 @@ var bundledScanners = []*ScannerPlugin{
Description: "Comprehensive vulnerability scanner for filesystem, dependencies, and container images. Detects known CVEs and misconfigurations.",
License: "Apache-2.0",
Homepage: "https://trivy.dev",
DockerImage: "ghcr.io/aquasecurity/trivy:latest",
// Our wrapper pre-caches the vuln DB at image build time (MCP-2150),
// eliminating the ~96 MiB first-run download that raced the timeout.
// Rebuilt weekly via scanner-images.yml to keep the DB current.
DockerImage: "ghcr.io/smart-mcp-proxy/scanner-trivy:latest",
Inputs: []string{"source", "container_image"},
Outputs: []string{"sarif"},
RequiredEnv: nil,
Expand All @@ -163,7 +166,7 @@ var bundledScanners = []*ScannerPlugin{
// daemon/containerd/podman and falls back to pulling from the remote
// registry (network is enabled), so no docker socket mount is required.
ImageCommand: []string{"image", "--format", "sarif", "{{IMAGE}}"},
Timeout: "300s", // First run downloads vuln DB (~90MB)
NetworkReq: true, // Needs to download vulnerability database
Timeout: "120s", // DB is pre-cached; network only needed for image pulls
NetworkReq: true, // Required for `trivy image` pulling remote container images
},
}
Loading