Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/features/security-scanner-plugins.md
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,7 @@ Package-runner servers (`npx`, `uvx`) are the **primary** quarantine/scan target

**The source is fetched but never executed.** A scanner must not run the untrusted code it is scanning. The fetch only ever downloads and unpacks archives:

- **Registry name required.** Only a server whose configured spec is a **bare registry name** (PEP 503 for PyPI, npm package name — optionally version-pinned) is fetched. Local-path, `file:`, URL, and VCS (`git+…`, `[email protected]:…`) specs are **rejected** and fall back to tool definitions only. This is mandatory: for a non-registry spec, `pip download` / `uv pip download` still invokes the package's `setup.py` / PEP 517 build backend to resolve metadata **even with `--only-binary=:all:`** — executing untrusted code on the (static) scan path. `--only-binary=:all:` alone protects only bare registry names. Validation covers the **whole spec including the `@`-tail**: a PEP 508 / npm direct reference such as `pkg@./local`, `pkg@/abs/path`, or `pkg@git+https://…` is rejected (the `name@<ref>` tail must be a bare version specifier, not a path/URL/VCS); for PyPI, any `@` is treated as a direct reference and refused.
- **npm (`npx`)** — `npm pack <pkg>@<version> --ignore-scripts` downloads the published tarball without running any lifecycle scripts (`install`/`postinstall`), then it is extracted (`source_method=npm_pack`).
- **PyPI (`uvx`)** — `uv pip download <pkg>==<version> --no-deps --only-binary=:all:` (falling back to `pip download`) fetches **only a prebuilt wheel**, which is unpacked without building or running `setup.py` (`source_method=pip_download`). `--only-binary=:all:` is mandatory: downloading an sdist would invoke the package's PEP 517 build backend (`setup.py egg_info`) to resolve metadata, executing the untrusted code. A package that ships **no wheel** therefore fails the fetch and falls back to tool definitions only — sdists are never built or extracted.

Expand Down
84 changes: 84 additions & 0 deletions internal/security/scanner/package_fetch.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"os"
"os/exec"
"path/filepath"
"regexp"
"strings"

"go.uber.org/zap"
Expand Down Expand Up @@ -104,6 +105,83 @@ func parsePackageSpec(spec string) (name, version string) {
return spec, ""
}

var (
// npmNameRe matches a bare npm registry package name: an optional
// "@scope/" prefix followed by the package name. It deliberately excludes
// any character ('/', ':', etc. beyond the single scope separator) that
// would appear in a path, URL, or VCS spec.
npmNameRe = regexp.MustCompile(`^(@[a-z0-9][a-z0-9._-]*/)?[a-z0-9][a-z0-9._-]*$`)
// pep503NameRe matches a PEP 503 / PEP 508 distribution name (case
// insensitive). No path/URL/VCS characters are permitted.
pep503NameRe = regexp.MustCompile(`^[A-Za-z0-9]([A-Za-z0-9._-]*[A-Za-z0-9])?$`)
// bareVersionRe matches a plain version pin, range, or npm dist-tag. It must
// START with an alphanumeric (so it rejects "./x", "/abs", "~/x", "../x") and
// contains NO '/' or ':' (so it rejects local paths and direct-reference
// URLs). This validates the 'name@<tail>' / 'name==<tail>' tail so a PEP 508 /
// npm direct reference (e.g. "pkg@./local", "pkg@/abs") cannot smuggle a
// path/URL/VCS past the name check and on to pip/npm (MCP-2442 re-review).
bareVersionRe = regexp.MustCompile(`^[A-Za-z0-9][A-Za-z0-9.+_~^*<>=!,-]*$`)
)

// validatePackageSpec rejects any package spec that is not a bare registry name
// (optionally version-pinned). This is a SECURITY gate, not a convenience check:
// for a local-path, file:, URL, or VCS (git+/hg+/ssh) spec, `pip download` /
// `uv pip download` STILL invoke the package's setup.py / PEP 517 build backend
// to resolve metadata — EVEN with --only-binary=:all: — which executes untrusted
// code from the server config on the (meant-to-be-static) scan path (MCP-2442).
// `npm pack` of a non-registry spec is similarly unsafe. Only a bare PEP 503
// (python) / npm registry name is guaranteed to download-without-build. On
// rejection the caller falls back to tool_definitions_only (no fetch, no exec).
func validatePackageSpec(ecosystem, spec string) error {
if spec == "" {
return fmt.Errorf("empty package spec")
}
// Disqualify URLs (any scheme), file:, VCS (git+/hg+/...), ssh, drive
// letters, and Windows paths up front. None of ':' or '\' ever appear in a
// bare registry name or a PEP 508 version pin, so their mere presence means
// the spec is not a registry name.
if strings.ContainsAny(spec, ":\\") {
return fmt.Errorf("package spec %q is not a bare registry name (contains path/URL/VCS markers)", spec)
}
// Disqualify filesystem paths (absolute, relative, home-relative) and any
// path traversal. A scoped npm name legitimately starts with '@', which is
// not matched here.
if strings.HasPrefix(spec, ".") || strings.HasPrefix(spec, "/") || strings.HasPrefix(spec, "~") {
return fmt.Errorf("package spec %q looks like a filesystem path", spec)
}
if strings.Contains(spec, "..") {
return fmt.Errorf("package spec %q contains a path traversal", spec)
}
name, version := parsePackageSpec(spec)
switch ecosystem {
case "npm":
if !npmNameRe.MatchString(name) {
return fmt.Errorf("%q is not a valid npm package name", name)
}
case "python":
// Python version pins use "==" / ">=" etc., never "@". The ONLY use of
// "@" in a PEP 508 spec is a direct reference ("name @ url"), which is
// always a path/URL/VCS — never a registry fetch. Reject any "@" outright.
if strings.Contains(spec, "@") {
return fmt.Errorf("python package spec %q is a PEP 508 direct reference (@), not a registry name", spec)
}
if !pep503NameRe.MatchString(name) {
return fmt.Errorf("%q is not a valid PEP 503 package name", name)
}
default:
return fmt.Errorf("unknown ecosystem %q", ecosystem)
}
// Validate the version / tail too. parsePackageSpec splits "name@tail" on the
// last '@'; without this check a direct-reference tail (e.g. "./local",
// "/abs", "~/x") would pass because only the NAME was validated, and the full
// untrusted spec would still reach pip/npm and execute setup.py (MCP-2442
// re-review). The tail must be a bare version specifier / dist-tag.
if version != "" && !bareVersionRe.MatchString(version) {
return fmt.Errorf("package spec %q has a non-version reference tail %q (path/URL/VCS not allowed)", spec, version)
}
return nil
}

// firstPackageArg returns the package spec from a runner command's args. It
// honors `--from <pkg>` (uvx) and otherwise returns the first non-flag arg.
func firstPackageArg(args []string) string {
Expand Down Expand Up @@ -392,8 +470,14 @@ func (r *SourceResolver) resolveFromPackageFetch(ctx context.Context, info Serve

switch cmdBase {
case "npx", "bunx", "pnpm":
if err := validatePackageSpec("npm", spec); err != nil {
return nil, fmt.Errorf("refusing to fetch untrusted spec for %s: %w", info.Name, err)
}
return r.fetchNpmPackage(ctx, info, spec)
case "uvx", "pipx":
if err := validatePackageSpec("python", spec); err != nil {
return nil, fmt.Errorf("refusing to fetch untrusted spec for %s: %w", info.Name, err)
}
return r.fetchPythonPackage(ctx, info, spec)
}
return nil, fmt.Errorf("package fetch unsupported for command %q", cmdBase)
Expand Down
118 changes: 118 additions & 0 deletions internal/security/scanner/package_fetch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,13 @@ import (
"archive/zip"
"bytes"
"compress/gzip"
"context"
"os"
"path/filepath"
"strings"
"testing"

"go.uber.org/zap"
)

func TestParsePackageSpec(t *testing.T) {
Expand Down Expand Up @@ -465,3 +468,118 @@ func TestFindDownloadedArchive(t *testing.T) {
t.Errorf("wheel should be preferred: got (%q,%v)", got2, kind2)
}
}

// MCP-2442: a `pip download` / `uv pip download` of a NON-REGISTRY spec
// (local path, git+, URL, file:, VCS, ssh) STILL executes the package's
// setup.py / PEP 517 build backend even with --only-binary=:all: — arbitrary
// code execution from untrusted server config on the (static) scan path. The
// fetch must validate the spec is a bare PEP 503 / npm registry name and REFUSE
// anything else, falling back to tool-definitions-only.
func TestValidatePackageSpec(t *testing.T) {
cases := []struct {
ecosystem string
spec string
wantOK bool
}{
// Registry names (allowed) — must not regress.
{"python", "flights", true},
{"python", "flights==0.2.4", true},
{"python", "google-flights-mcp-server==0.2.4", true},
{"python", "Flask", true},
{"python", "some.dotted.name", true},
{"npm", "google-flights-mcp-server", true},
{"npm", "google-flights-mcp-server@0.2.4", true},
{"npm", "@modelcontextprotocol/server-everything", true},
{"npm", "@modelcontextprotocol/server-everything@1.0.0", true},
// Non-registry specs (must be REJECTED → fall back to tool-defs).
{"python", "./local-pkg", false},
{"python", "../evil", false},
{"python", "/abs/path/pkg", false},
{"python", "~/pkg", false},
{"python", "file:./pkg", false},
{"python", "git+https://github.com/user/repo.git", false},
{"python", "git+ssh://[email protected]/user/repo.git", false},
{"python", "https://example.com/pkg.tar.gz", false},
{"python", "[email protected]:user/repo.git", false},
{"python", "hg+https://example.com/repo", false},
{"python", `C:\Users\evil\pkg`, false},
{"python", "", false},
{"npm", "./local-pkg", false},
{"npm", "/abs/path", false},
{"npm", "git+https://github.com/user/repo.git", false},
{"npm", "file:../evil", false},
{"npm", "https://example.com/pkg.tgz", false},
{"npm", "", false},
// MCP-2442 re-review: PEP 508 / npm direct-reference '@'-tail bypass — the
// NAME parses as a bare registry name but the tail is a path/URL/VCS, which
// reaches pip/npm verbatim and STILL executes setup.py. The WHOLE spec
// (including the tail after '@') must be validated.
{"npm", "pkg@./local", false},
{"npm", "pkg@/abs/path", false},
{"npm", "pkg@~/evil", false},
{"npm", "pkg@git+https://github.com/user/repo.git", false},
{"npm", "pkg@file:./evil", false},
{"npm", "pkg@https://example.com/pkg.tgz", false},
{"npm", "@scope/pkg@./local", false},
{"python", "pkg@./local", false},
{"python", "pkg@/abs/path", false},
{"python", "pkg@git+https://github.com/user/repo.git", false},
{"python", "pkg@~/evil", false},
{"python", "flights @ git+https://example.com/repo.git", false},
// Bare version pins / dist-tags after '@' stay valid (no regression).
{"npm", "pkg@1.2.3", true},
{"npm", "pkg@1.2.3-beta.1", true},
{"npm", "pkg@latest", true},
{"npm", "@scope/pkg@2.0.0", true},
}
for _, c := range cases {
err := validatePackageSpec(c.ecosystem, c.spec)
if c.wantOK && err != nil {
t.Errorf("validatePackageSpec(%q, %q) = %v, want OK", c.ecosystem, c.spec, err)
}
if !c.wantOK && err == nil {
t.Errorf("validatePackageSpec(%q, %q) = nil, want rejection", c.ecosystem, c.spec)
}
}
}

// TestResolveFromPackageFetch_RejectsNonRegistrySpec proves the resolver refuses
// a non-registry uvx spec BEFORE invoking any download command — so the untrusted
// package's setup.py is never executed. We point the spec at a local directory
// containing a setup.py that drops an execution marker; the resolver must return
// an error (caller falls back to tool_definitions_only) and the marker must be
// absent.
func TestResolveFromPackageFetch_RejectsNonRegistrySpec(t *testing.T) {
pkgDir := t.TempDir()
marker := filepath.Join(t.TempDir(), "EXECUTED.marker")
setupPy := "import pathlib\npathlib.Path(" + pyQuote(marker) + ").write_text('pwned')\n"
if err := os.WriteFile(filepath.Join(pkgDir, "setup.py"), []byte(setupPy), 0o644); err != nil {
t.Fatal(err)
}

r := NewSourceResolver(zap.NewNop())
cases := []ServerInfo{
{Name: "evil-path", Protocol: "stdio", Command: "uvx", Args: []string{"--from", pkgDir, "evil"}},
{Name: "evil-git", Protocol: "stdio", Command: "uvx", Args: []string{"--from", "git+https://example.com/repo.git", "evil"}},
{Name: "evil-npm-path", Protocol: "stdio", Command: "npx", Args: []string{"-y", "./local-evil"}},
// MCP-2442 re-review: PEP 508 / npm direct-reference '@'-tail bypass.
{Name: "evil-uvx-directref", Protocol: "stdio", Command: "uvx", Args: []string{"--from", "evilpkg@" + pkgDir, "evil"}},
{Name: "evil-npm-directref", Protocol: "stdio", Command: "npx", Args: []string{"-y", "evilpkg@./local-evil"}},
}
for _, info := range cases {
res, err := r.resolveFromPackageFetch(context.Background(), info)
if err == nil {
if res != nil && res.Cleanup != nil {
res.Cleanup()
}
t.Errorf("%s: expected error (fallback to tool-defs), got nil", info.Name)
}
}
if _, err := os.Stat(marker); err == nil {
t.Fatalf("setup.py was EXECUTED — marker present at %s", marker)
}
}

func pyQuote(s string) string {
return "r'" + s + "'"
}
Loading