diff --git a/docs/features/security-scanner-plugins.md b/docs/features/security-scanner-plugins.md index 7bd130fbc..1876902f8 100644 --- a/docs/features/security-scanner-plugins.md +++ b/docs/features/security-scanner-plugins.md @@ -259,6 +259,7 @@ Package-runner servers (`npx`, `uvx`) are the **primary** quarantine/scan target **The source is fetched but never executed.** A scanner must not run the untrusted code it is scanning. The fetch only ever downloads and unpacks archives: +- **Registry name required.** Only a server whose configured spec is a **bare registry name** (PEP 503 for PyPI, npm package name — optionally version-pinned) is fetched. Local-path, `file:`, URL, and VCS (`git+…`, `[email protected]:…`) specs are **rejected** and fall back to tool definitions only. This is mandatory: for a non-registry spec, `pip download` / `uv pip download` still invokes the package's `setup.py` / PEP 517 build backend to resolve metadata **even with `--only-binary=:all:`** — executing untrusted code on the (static) scan path. `--only-binary=:all:` alone protects only bare registry names. Validation covers the **whole spec including the `@`-tail**: a PEP 508 / npm direct reference such as `pkg@./local`, `pkg@/abs/path`, or `pkg@git+https://…` is rejected (the `name@` tail must be a bare version specifier, not a path/URL/VCS); for PyPI, any `@` is treated as a direct reference and refused. - **npm (`npx`)** — `npm pack @ --ignore-scripts` downloads the published tarball without running any lifecycle scripts (`install`/`postinstall`), then it is extracted (`source_method=npm_pack`). - **PyPI (`uvx`)** — `uv pip download == --no-deps --only-binary=:all:` (falling back to `pip download`) fetches **only a prebuilt wheel**, which is unpacked without building or running `setup.py` (`source_method=pip_download`). `--only-binary=:all:` is mandatory: downloading an sdist would invoke the package's PEP 517 build backend (`setup.py egg_info`) to resolve metadata, executing the untrusted code. A package that ships **no wheel** therefore fails the fetch and falls back to tool definitions only — sdists are never built or extracted. diff --git a/internal/security/scanner/package_fetch.go b/internal/security/scanner/package_fetch.go index 5daad507f..eb8eb5aee 100644 --- a/internal/security/scanner/package_fetch.go +++ b/internal/security/scanner/package_fetch.go @@ -11,6 +11,7 @@ import ( "os" "os/exec" "path/filepath" + "regexp" "strings" "go.uber.org/zap" @@ -104,6 +105,83 @@ func parsePackageSpec(spec string) (name, version string) { return spec, "" } +var ( + // npmNameRe matches a bare npm registry package name: an optional + // "@scope/" prefix followed by the package name. It deliberately excludes + // any character ('/', ':', etc. beyond the single scope separator) that + // would appear in a path, URL, or VCS spec. + npmNameRe = regexp.MustCompile(`^(@[a-z0-9][a-z0-9._-]*/)?[a-z0-9][a-z0-9._-]*$`) + // pep503NameRe matches a PEP 503 / PEP 508 distribution name (case + // insensitive). No path/URL/VCS characters are permitted. + pep503NameRe = regexp.MustCompile(`^[A-Za-z0-9]([A-Za-z0-9._-]*[A-Za-z0-9])?$`) + // bareVersionRe matches a plain version pin, range, or npm dist-tag. It must + // START with an alphanumeric (so it rejects "./x", "/abs", "~/x", "../x") and + // contains NO '/' or ':' (so it rejects local paths and direct-reference + // URLs). This validates the 'name@' / 'name==' tail so a PEP 508 / + // npm direct reference (e.g. "pkg@./local", "pkg@/abs") cannot smuggle a + // path/URL/VCS past the name check and on to pip/npm (MCP-2442 re-review). + bareVersionRe = regexp.MustCompile(`^[A-Za-z0-9][A-Za-z0-9.+_~^*<>=!,-]*$`) +) + +// validatePackageSpec rejects any package spec that is not a bare registry name +// (optionally version-pinned). This is a SECURITY gate, not a convenience check: +// for a local-path, file:, URL, or VCS (git+/hg+/ssh) spec, `pip download` / +// `uv pip download` STILL invoke the package's setup.py / PEP 517 build backend +// to resolve metadata — EVEN with --only-binary=:all: — which executes untrusted +// code from the server config on the (meant-to-be-static) scan path (MCP-2442). +// `npm pack` of a non-registry spec is similarly unsafe. Only a bare PEP 503 +// (python) / npm registry name is guaranteed to download-without-build. On +// rejection the caller falls back to tool_definitions_only (no fetch, no exec). +func validatePackageSpec(ecosystem, spec string) error { + if spec == "" { + return fmt.Errorf("empty package spec") + } + // Disqualify URLs (any scheme), file:, VCS (git+/hg+/...), ssh, drive + // letters, and Windows paths up front. None of ':' or '\' ever appear in a + // bare registry name or a PEP 508 version pin, so their mere presence means + // the spec is not a registry name. + if strings.ContainsAny(spec, ":\\") { + return fmt.Errorf("package spec %q is not a bare registry name (contains path/URL/VCS markers)", spec) + } + // Disqualify filesystem paths (absolute, relative, home-relative) and any + // path traversal. A scoped npm name legitimately starts with '@', which is + // not matched here. + if strings.HasPrefix(spec, ".") || strings.HasPrefix(spec, "/") || strings.HasPrefix(spec, "~") { + return fmt.Errorf("package spec %q looks like a filesystem path", spec) + } + if strings.Contains(spec, "..") { + return fmt.Errorf("package spec %q contains a path traversal", spec) + } + name, version := parsePackageSpec(spec) + switch ecosystem { + case "npm": + if !npmNameRe.MatchString(name) { + return fmt.Errorf("%q is not a valid npm package name", name) + } + case "python": + // Python version pins use "==" / ">=" etc., never "@". The ONLY use of + // "@" in a PEP 508 spec is a direct reference ("name @ url"), which is + // always a path/URL/VCS — never a registry fetch. Reject any "@" outright. + if strings.Contains(spec, "@") { + return fmt.Errorf("python package spec %q is a PEP 508 direct reference (@), not a registry name", spec) + } + if !pep503NameRe.MatchString(name) { + return fmt.Errorf("%q is not a valid PEP 503 package name", name) + } + default: + return fmt.Errorf("unknown ecosystem %q", ecosystem) + } + // Validate the version / tail too. parsePackageSpec splits "name@tail" on the + // last '@'; without this check a direct-reference tail (e.g. "./local", + // "/abs", "~/x") would pass because only the NAME was validated, and the full + // untrusted spec would still reach pip/npm and execute setup.py (MCP-2442 + // re-review). The tail must be a bare version specifier / dist-tag. + if version != "" && !bareVersionRe.MatchString(version) { + return fmt.Errorf("package spec %q has a non-version reference tail %q (path/URL/VCS not allowed)", spec, version) + } + return nil +} + // firstPackageArg returns the package spec from a runner command's args. It // honors `--from ` (uvx) and otherwise returns the first non-flag arg. func firstPackageArg(args []string) string { @@ -392,8 +470,14 @@ func (r *SourceResolver) resolveFromPackageFetch(ctx context.Context, info Serve switch cmdBase { case "npx", "bunx", "pnpm": + if err := validatePackageSpec("npm", spec); err != nil { + return nil, fmt.Errorf("refusing to fetch untrusted spec for %s: %w", info.Name, err) + } return r.fetchNpmPackage(ctx, info, spec) case "uvx", "pipx": + if err := validatePackageSpec("python", spec); err != nil { + return nil, fmt.Errorf("refusing to fetch untrusted spec for %s: %w", info.Name, err) + } return r.fetchPythonPackage(ctx, info, spec) } return nil, fmt.Errorf("package fetch unsupported for command %q", cmdBase) diff --git a/internal/security/scanner/package_fetch_test.go b/internal/security/scanner/package_fetch_test.go index 7a5a5a717..a82eacb7b 100644 --- a/internal/security/scanner/package_fetch_test.go +++ b/internal/security/scanner/package_fetch_test.go @@ -5,10 +5,13 @@ import ( "archive/zip" "bytes" "compress/gzip" + "context" "os" "path/filepath" "strings" "testing" + + "go.uber.org/zap" ) func TestParsePackageSpec(t *testing.T) { @@ -465,3 +468,118 @@ func TestFindDownloadedArchive(t *testing.T) { t.Errorf("wheel should be preferred: got (%q,%v)", got2, kind2) } } + +// MCP-2442: a `pip download` / `uv pip download` of a NON-REGISTRY spec +// (local path, git+, URL, file:, VCS, ssh) STILL executes the package's +// setup.py / PEP 517 build backend even with --only-binary=:all: — arbitrary +// code execution from untrusted server config on the (static) scan path. The +// fetch must validate the spec is a bare PEP 503 / npm registry name and REFUSE +// anything else, falling back to tool-definitions-only. +func TestValidatePackageSpec(t *testing.T) { + cases := []struct { + ecosystem string + spec string + wantOK bool + }{ + // Registry names (allowed) — must not regress. + {"python", "flights", true}, + {"python", "flights==0.2.4", true}, + {"python", "google-flights-mcp-server==0.2.4", true}, + {"python", "Flask", true}, + {"python", "some.dotted.name", true}, + {"npm", "google-flights-mcp-server", true}, + {"npm", "google-flights-mcp-server@0.2.4", true}, + {"npm", "@modelcontextprotocol/server-everything", true}, + {"npm", "@modelcontextprotocol/server-everything@1.0.0", true}, + // Non-registry specs (must be REJECTED → fall back to tool-defs). + {"python", "./local-pkg", false}, + {"python", "../evil", false}, + {"python", "/abs/path/pkg", false}, + {"python", "~/pkg", false}, + {"python", "file:./pkg", false}, + {"python", "git+https://github.com/user/repo.git", false}, + {"python", "git+ssh://[email protected]/user/repo.git", false}, + {"python", "https://example.com/pkg.tar.gz", false}, + {"python", "[email protected]:user/repo.git", false}, + {"python", "hg+https://example.com/repo", false}, + {"python", `C:\Users\evil\pkg`, false}, + {"python", "", false}, + {"npm", "./local-pkg", false}, + {"npm", "/abs/path", false}, + {"npm", "git+https://github.com/user/repo.git", false}, + {"npm", "file:../evil", false}, + {"npm", "https://example.com/pkg.tgz", false}, + {"npm", "", false}, + // MCP-2442 re-review: PEP 508 / npm direct-reference '@'-tail bypass — the + // NAME parses as a bare registry name but the tail is a path/URL/VCS, which + // reaches pip/npm verbatim and STILL executes setup.py. The WHOLE spec + // (including the tail after '@') must be validated. + {"npm", "pkg@./local", false}, + {"npm", "pkg@/abs/path", false}, + {"npm", "pkg@~/evil", false}, + {"npm", "pkg@git+https://github.com/user/repo.git", false}, + {"npm", "pkg@file:./evil", false}, + {"npm", "pkg@https://example.com/pkg.tgz", false}, + {"npm", "@scope/pkg@./local", false}, + {"python", "pkg@./local", false}, + {"python", "pkg@/abs/path", false}, + {"python", "pkg@git+https://github.com/user/repo.git", false}, + {"python", "pkg@~/evil", false}, + {"python", "flights @ git+https://example.com/repo.git", false}, + // Bare version pins / dist-tags after '@' stay valid (no regression). + {"npm", "pkg@1.2.3", true}, + {"npm", "pkg@1.2.3-beta.1", true}, + {"npm", "pkg@latest", true}, + {"npm", "@scope/pkg@2.0.0", true}, + } + for _, c := range cases { + err := validatePackageSpec(c.ecosystem, c.spec) + if c.wantOK && err != nil { + t.Errorf("validatePackageSpec(%q, %q) = %v, want OK", c.ecosystem, c.spec, err) + } + if !c.wantOK && err == nil { + t.Errorf("validatePackageSpec(%q, %q) = nil, want rejection", c.ecosystem, c.spec) + } + } +} + +// TestResolveFromPackageFetch_RejectsNonRegistrySpec proves the resolver refuses +// a non-registry uvx spec BEFORE invoking any download command — so the untrusted +// package's setup.py is never executed. We point the spec at a local directory +// containing a setup.py that drops an execution marker; the resolver must return +// an error (caller falls back to tool_definitions_only) and the marker must be +// absent. +func TestResolveFromPackageFetch_RejectsNonRegistrySpec(t *testing.T) { + pkgDir := t.TempDir() + marker := filepath.Join(t.TempDir(), "EXECUTED.marker") + setupPy := "import pathlib\npathlib.Path(" + pyQuote(marker) + ").write_text('pwned')\n" + if err := os.WriteFile(filepath.Join(pkgDir, "setup.py"), []byte(setupPy), 0o644); err != nil { + t.Fatal(err) + } + + r := NewSourceResolver(zap.NewNop()) + cases := []ServerInfo{ + {Name: "evil-path", Protocol: "stdio", Command: "uvx", Args: []string{"--from", pkgDir, "evil"}}, + {Name: "evil-git", Protocol: "stdio", Command: "uvx", Args: []string{"--from", "git+https://example.com/repo.git", "evil"}}, + {Name: "evil-npm-path", Protocol: "stdio", Command: "npx", Args: []string{"-y", "./local-evil"}}, + // MCP-2442 re-review: PEP 508 / npm direct-reference '@'-tail bypass. + {Name: "evil-uvx-directref", Protocol: "stdio", Command: "uvx", Args: []string{"--from", "evilpkg@" + pkgDir, "evil"}}, + {Name: "evil-npm-directref", Protocol: "stdio", Command: "npx", Args: []string{"-y", "evilpkg@./local-evil"}}, + } + for _, info := range cases { + res, err := r.resolveFromPackageFetch(context.Background(), info) + if err == nil { + if res != nil && res.Cleanup != nil { + res.Cleanup() + } + t.Errorf("%s: expected error (fallback to tool-defs), got nil", info.Name) + } + } + if _, err := os.Stat(marker); err == nil { + t.Fatalf("setup.py was EXECUTED — marker present at %s", marker) + } +} + +func pyQuote(s string) string { + return "r'" + s + "'" +}