diff --git a/docs/errors/MCPX_DOCKER_EXEC_NOT_FOUND.md b/docs/errors/MCPX_DOCKER_EXEC_NOT_FOUND.md new file mode 100644 index 000000000..bd3498161 --- /dev/null +++ b/docs/errors/MCPX_DOCKER_EXEC_NOT_FOUND.md @@ -0,0 +1,81 @@ +--- +id: MCPX_DOCKER_EXEC_NOT_FOUND +title: MCPX_DOCKER_EXEC_NOT_FOUND +sidebar_label: EXEC_NOT_FOUND +description: The Docker isolation image is missing the interpreter the server needs (e.g. no uvx/node). +--- + +# `MCPX_DOCKER_EXEC_NOT_FOUND` + +**Severity:** error +**Domain:** Docker + +## What happened + +The container started, but its entrypoint interpreter was not found *inside* the +image. The failure is reported by the OCI runtime (`runc`) at exec time, e.g.: + +``` +exec: "uvx": executable file not found in $PATH +``` + +This means Docker isolation worked — the image just doesn't contain the tool the +server's command needs. + +## Common cause: a per-server image override that lacks the interpreter + +The usual culprit is a per-server `isolation.image` override pointing at a stock +image that doesn't bundle the runtime. The classic example is a `uvx` server +pinned to `python:3.11`: + +```jsonc +{ + "name": "elevenlabs", + "command": "uvx", + "args": ["elevenlabs-mcp"], + "isolation": { "image": "python:3.11" } // ❌ stock python has no uvx +} +``` + +`uvx` ships with [Astral's `uv`](https://github.com/astral-sh/uv), which is a +separate tool — `python:3.11` does not include it. So +`docker run python:3.11 uvx …` fails at exec time. + +When mcpproxy detects this, the diagnostic names the **detected runtime**, the +**recommended image** for it, and flags the per-server override as the likely +culprit. + +## How to fix + +### Remove the per-server override to inherit the runtime default (recommended) + +Each runtime has a default image that includes the right interpreter +(`default_images` in `docker_isolation`). For `uvx`/`pip`/`pipx`/`python` the +default is `ghcr.io/astral-sh/uv:python3.13-bookworm-slim`; for +`npx`/`npm`/`node`/`yarn` it is `node:22`. Drop the override: + +```jsonc +{ "name": "elevenlabs", "command": "uvx", "args": ["elevenlabs-mcp"] } +``` + +### Or pick an image that includes the interpreter + +If you must pin a specific image, choose one that bundles the tool: + +```jsonc +{ "isolation": { "image": "ghcr.io/astral-sh/uv:python3.11-bookworm-slim" } } +``` + +### Verify the image has the tool + +```bash +docker run --rm which uvx # or: node, npx, python3 … +``` + +## Related + +- [Docker Isolation](../features/docker-isolation.md) +- [`MCPX_DOCKER_IMAGE_PULL_FAILED`](MCPX_DOCKER_IMAGE_PULL_FAILED.md) +- [`MCPX_DOCKER_DAEMON_DOWN`](MCPX_DOCKER_DAEMON_DOWN.md) + + diff --git a/docs/errors/README.md b/docs/errors/README.md index dd14e7106..10ac5f06d 100644 --- a/docs/errors/README.md +++ b/docs/errors/README.md @@ -68,6 +68,7 @@ Run `mcpproxy doctor list-codes` for the machine-readable list. - [`MCPX_DOCKER_DAEMON_DOWN`](MCPX_DOCKER_DAEMON_DOWN.md) — daemon unreachable - [`MCPX_DOCKER_IMAGE_PULL_FAILED`](MCPX_DOCKER_IMAGE_PULL_FAILED.md) — pull failed +- [`MCPX_DOCKER_EXEC_NOT_FOUND`](MCPX_DOCKER_EXEC_NOT_FOUND.md) — image missing the runtime interpreter (e.g. no `uvx`) - [`MCPX_DOCKER_NO_PERMISSION`](MCPX_DOCKER_NO_PERMISSION.md) — socket permission denied - [`MCPX_DOCKER_SNAP_APPARMOR`](MCPX_DOCKER_SNAP_APPARMOR.md) — snap Docker AppArmor block diff --git a/internal/diagnostics/classifier_domains_test.go b/internal/diagnostics/classifier_domains_test.go index eeded868e..1c53ccc30 100644 --- a/internal/diagnostics/classifier_domains_test.go +++ b/internal/diagnostics/classifier_domains_test.go @@ -2,6 +2,7 @@ package diagnostics import ( "errors" + "strings" "testing" ) @@ -209,3 +210,73 @@ func TestClassify_Quarantine_ToolChanged(t *testing.T) { t.Errorf("Classify(tool_changed) = %q, want %q", got, QuarantineToolChanged) } } + +// --- RUNTIME-AWARE REMEDIATION (MCP-2909) ----------------------------------- + +// TestRuntimeAwareRemediation_DockerExecNotFound covers the field-report case +// (ElevenLabs / uvx / per-server image override): a `uvx` server pinned to a +// stock `python:3.11` image fails at exec time because that image has no `uvx`. +// The enriched DockerExecNotFound remediation must name the detected runtime, +// the recommended runtime-default image, and flag the per-server override as the +// likely culprit when it differs from the default. +func TestRuntimeAwareRemediation_DockerExecNotFound(t *testing.T) { + const uvImage = "ghcr.io/astral-sh/uv:python3.13-bookworm-slim" + defaults := map[string]string{ + "uvx": uvImage, + "pipx": uvImage, + "npx": "node:22", + } + + t.Run("uvx_on_bare_python_override", func(t *testing.T) { + msg := RuntimeAwareRemediation(DockerExecNotFound, ClassifierHints{ + DockerCommand: "uvx", + DockerImageOverride: "python:3.11", + DockerDefaultImages: defaults, + }) + // Must name the detected runtime. + if !strings.Contains(msg, "uvx") { + t.Errorf("message must name the runtime 'uvx'; got: %q", msg) + } + // Must name the recommended image. + if !strings.Contains(msg, uvImage) { + t.Errorf("message must name recommended image %q; got: %q", uvImage, msg) + } + // Must flag the per-server override as the culprit. + if !strings.Contains(msg, "python:3.11") { + t.Errorf("message must name the failing override image 'python:3.11'; got: %q", msg) + } + if !strings.Contains(strings.ToLower(msg), "override") { + t.Errorf("message must flag the per-server override; got: %q", msg) + } + }) + + t.Run("npx_no_override_still_names_runtime_and_image", func(t *testing.T) { + msg := RuntimeAwareRemediation(DockerExecNotFound, ClassifierHints{ + DockerCommand: "npx", + DockerDefaultImages: defaults, + }) + if !strings.Contains(msg, "npx") { + t.Errorf("message must name the runtime 'npx'; got: %q", msg) + } + if !strings.Contains(msg, "node:22") { + t.Errorf("message must name recommended image 'node:22'; got: %q", msg) + } + }) + + t.Run("no_enrichment_without_command", func(t *testing.T) { + if msg := RuntimeAwareRemediation(DockerExecNotFound, ClassifierHints{ + DockerDefaultImages: defaults, + }); msg != "" { + t.Errorf("no docker command → empty enrichment (fall back to static catalog); got: %q", msg) + } + }) + + t.Run("no_enrichment_for_other_codes", func(t *testing.T) { + if msg := RuntimeAwareRemediation(DockerCLINotFound, ClassifierHints{ + DockerCommand: "uvx", + DockerDefaultImages: defaults, + }); msg != "" { + t.Errorf("only DockerExecNotFound is enriched; got: %q", msg) + } + }) +} diff --git a/internal/diagnostics/remediation.go b/internal/diagnostics/remediation.go new file mode 100644 index 000000000..6b550a4b2 --- /dev/null +++ b/internal/diagnostics/remediation.go @@ -0,0 +1,108 @@ +package diagnostics + +import ( + "fmt" + "path/filepath" + "strings" +) + +// RuntimeAwareRemediation returns an enriched, context-specific remediation +// message for codes that support per-server enrichment, or "" to fall back to +// the static CatalogEntry.UserMessage. +// +// Today it enriches only DockerExecNotFound (MCP-2909): the in-container +// interpreter was missing because the chosen Docker image lacks it. The field +// report that motivated this — a `uvx` server pinned via a per-server +// `isolation.image: "python:3.11"` override — failed at exec time because stock +// `python:3.11` has no `uvx` (uv is a separate Astral tool). The static catalog +// message is too generic to self-resolve, so we name (a) the detected runtime, +// (b) the recommended runtime-default image, and (c) when a per-server image +// override is the likely culprit. +// +// This is diagnostics-only: it never changes classification or image selection. +func RuntimeAwareRemediation(code Code, hints ClassifierHints) string { + if code != DockerExecNotFound || hints.DockerCommand == "" { + return "" + } + + runtimeType := detectDockerRuntimeType(hints.DockerCommand) + recommended := hints.DockerDefaultImages[runtimeType] + override := strings.TrimSpace(hints.DockerImageOverride) + + var b strings.Builder + fmt.Fprintf(&b, "This `%s` server's Docker image has no `%s` interpreter, so the container could not start it.", runtimeType, runtimeType) + + if override != "" { + fmt.Fprintf(&b, " The per-server `isolation.image` override `%s` is the likely culprit", override) + if recommended != "" && override != recommended { + fmt.Fprintf(&b, " — it differs from the recommended image for `%s`", runtimeType) + } + b.WriteString(".") + } + + switch { + case recommended != "" && override != "": + fmt.Fprintf(&b, " The recommended image for `%s` is `%s`. Remove the per-server `isolation.image` override to inherit it, or pick an image that includes `%s`.", runtimeType, recommended, runtimeType) + case recommended != "": + fmt.Fprintf(&b, " The recommended image for `%s` is `%s`. Pick an image that includes `%s`.", runtimeType, recommended, runtimeType) + default: + fmt.Fprintf(&b, " Pick an image that includes `%s`.", runtimeType) + } + + return b.String() +} + +// detectDockerRuntimeType maps a server's configured command to its runtime +// type key (the same keys used by config.DockerIsolationConfig.DefaultImages). +// +// It is a deliberately small, side-effect-free mirror of +// core.IsolationManager.DetectRuntimeType (internal/upstream/core/isolation.go) +// — the diagnostics package must not import upstream/core, and (like +// supervisor.usesDockerIsolation mirrors ShouldIsolate) faithfulness for the +// display path matters more than sharing the implementation. Unknown commands +// fall back to the base command name so the message still names something +// concrete rather than a generic "interpreter". +func detectDockerRuntimeType(command string) string { + cmdName := filepath.Base(command) + switch cmdName { + case "python", "python3", "python3.11", "python3.12", "python3.13": + return "python" + case "uvx": + return "uvx" + case "pip", "pip3": + return "pip" + case "pipx": + return "pipx" + case "node": + return "node" + case "npm": + return "npm" + case "npx": + return "npx" + case "yarn": + return "yarn" + case "go": + return "go" + case "cargo": + return "cargo" + case "rustc": + return "rustc" + case "ruby": + return "ruby" + case "gem": + return "gem" + case "php": + return "php" + case "composer": + return "composer" + default: + lower := strings.ToLower(cmdName) + if strings.Contains(lower, "python") { + return "python" + } + if strings.Contains(lower, "node") { + return "node" + } + return cmdName + } +} diff --git a/internal/diagnostics/types.go b/internal/diagnostics/types.go index 9090e5e73..0e2b2c1af 100644 --- a/internal/diagnostics/types.go +++ b/internal/diagnostics/types.go @@ -54,12 +54,16 @@ type CatalogEntry struct { // DiagnosticError is the runtime record attached to a server's stateview snapshot // while the server has an active failure. type DiagnosticError struct { - Code Code `json:"code"` - Severity Severity `json:"severity"` - Cause string `json:"cause,omitempty"` - CauseType string `json:"cause_type,omitempty"` - ServerID string `json:"server_id"` - DetectedAt time.Time `json:"detected_at"` + Code Code `json:"code"` + Severity Severity `json:"severity"` + Cause string `json:"cause,omitempty"` + // Remediation is an optional runtime-aware, context-specific user message + // that overrides the static CatalogEntry.UserMessage when present (MCP-2909). + // Empty when the generic catalog message is sufficient. + Remediation string `json:"remediation,omitempty"` + CauseType string `json:"cause_type,omitempty"` + ServerID string `json:"server_id"` + DetectedAt time.Time `json:"detected_at"` } // ClassifierHints lets callers nudge the classifier when context is known @@ -73,6 +77,25 @@ type ClassifierHints struct { // per #696, in-container interpreter missing) instead of a generic // MCPX_STDIO_SPAWN_ENOENT. See classifyDockerIsolatedSpawn. DockerIsolated bool + + // The fields below enrich the DockerExecNotFound remediation with + // per-server context (MCP-2909). They are diagnostics-only — they never + // change classification, only the user-facing message produced by + // RuntimeAwareRemediation. + + // DockerCommand is the configured stdio command for a Docker-isolated + // server (e.g. "uvx", "npx"). The detected runtime type is derived from it. + // Empty when unknown or for non-isolated servers. + DockerCommand string + + // DockerImageOverride is the per-server isolation.image override, if any. + // When set, the DockerExecNotFound remediation flags it as the likely + // culprit (a stock image that lacks the runtime interpreter). + DockerImageOverride string + + // DockerDefaultImages is the global default_images map (runtime → image). + // Used to name the recommended image for the detected runtime. + DockerDefaultImages map[string]string } // FixRequest is the input to a registered fixer. diff --git a/internal/runtime/runtime.go b/internal/runtime/runtime.go index f635b7be0..66fd5be60 100644 --- a/internal/runtime/runtime.go +++ b/internal/runtime/runtime.go @@ -1934,7 +1934,14 @@ func (r *Runtime) GetAllServers() ([]map[string]interface{}, error) { "detected_at": d.DetectedAt, } if entry, ok := diagnostics.Get(d.Code); ok { - diagMap["user_message"] = entry.UserMessage + // MCP-2909: prefer the runtime-aware remediation when present so + // the user sees the detected runtime + recommended image instead + // of the generic catalog message. + if d.Remediation != "" { + diagMap["user_message"] = d.Remediation + } else { + diagMap["user_message"] = entry.UserMessage + } diagMap["fix_steps"] = entry.FixSteps diagMap["docs_url"] = entry.DocsURL } diff --git a/internal/runtime/supervisor/supervisor.go b/internal/runtime/supervisor/supervisor.go index e61ab817f..23a179d33 100644 --- a/internal/runtime/supervisor/supervisor.go +++ b/internal/runtime/supervisor/supervisor.go @@ -23,16 +23,13 @@ import ( // classifyAndAttach converts a raw connection error into a DiagnosticError and // stores it on the server status. Called from the supervisor's reconcile and // event paths. Spec 044. -func classifyAndAttach(status *stateview.ServerStatus, err error, transport string, dockerIsolated bool) { +func classifyAndAttach(status *stateview.ServerStatus, err error, hints diagnostics.ClassifierHints) { if err == nil { status.Diagnostic = nil return } - code := diagnostics.Classify(err, diagnostics.ClassifierHints{ - Transport: transport, - ServerID: status.Name, - DockerIsolated: dockerIsolated, - }) + hints.ServerID = status.Name + code := diagnostics.Classify(err, hints) if code == "" { // Classify always returns at least UnknownUnclassified for non-nil err, // so reaching here means a logic regression. Defensive fallback keeps @@ -46,12 +43,40 @@ func classifyAndAttach(status *stateview.ServerStatus, err error, transport stri msg = msg[:maxCause] + "..." } status.Diagnostic = &diagnostics.DiagnosticError{ - Code: code, - Severity: entry.Severity, - Cause: msg, - ServerID: status.Name, - DetectedAt: time.Now(), + Code: code, + Severity: entry.Severity, + Cause: msg, + // MCP-2909: runtime-aware override of the static catalog message when + // context (detected runtime + recommended image + override culprit) is + // available; empty otherwise so the generic UserMessage is used. + Remediation: diagnostics.RuntimeAwareRemediation(code, hints), + ServerID: status.Name, + DetectedAt: time.Now(), + } +} + +// classifierHints builds the diagnostics.ClassifierHints for a server's failure, +// including the Docker-isolation enrichment context (MCP-2909) the +// DockerExecNotFound remediation needs: the configured command (→ detected +// runtime), the per-server isolation.image override (likely culprit), and the +// global default_images map (→ recommended image). The enrichment fields are +// only populated for Docker-isolated servers; they are inert for every other +// code. +func (s *Supervisor) classifierHints(srv *config.ServerConfig, transport string) diagnostics.ClassifierHints { + hints := diagnostics.ClassifierHints{ + Transport: transport, + DockerIsolated: s.usesDockerIsolation(srv), + } + if hints.DockerIsolated && srv != nil { + hints.DockerCommand = srv.Command + if srv.Isolation != nil { + hints.DockerImageOverride = srv.Isolation.Image + } + if snap := s.configSvc.Current(); snap != nil && snap.Config != nil && snap.Config.DockerIsolation != nil { + hints.DockerDefaultImages = snap.Config.DockerIsolation.DefaultImages + } } + return hints } // usesDockerIsolation reports whether the given server would be launched @@ -710,7 +735,7 @@ func (s *Supervisor) updateStateView(name string, state *ServerState) { if state.Config != nil { transport = transportpkg.DetermineTransportType(state.Config) } - classifyAndAttach(status, state.ConnectionInfo.LastError, transport, s.usesDockerIsolation(state.Config)) + classifyAndAttach(status, state.ConnectionInfo.LastError, s.classifierHints(state.Config, transport)) // Spec 044 Phase H: notify telemetry counter store. if status.Diagnostic != nil { s.callbackMu.RLock() @@ -1008,7 +1033,7 @@ func (s *Supervisor) updateSnapshotFromEvent(event Event) { if status.Config != nil { transport = transportpkg.DetermineTransportType(status.Config) } - classifyAndAttach(status, connInfo.LastError, transport, s.usesDockerIsolation(status.Config)) + classifyAndAttach(status, connInfo.LastError, s.classifierHints(status.Config, transport)) // Spec 044 Phase H: notify telemetry counter store. if status.Diagnostic != nil { s.callbackMu.RLock() diff --git a/internal/server/server.go b/internal/server/server.go index ef21415c4..012be4e0c 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -902,7 +902,14 @@ func (s *Server) GetAllServers() ([]map[string]interface{}, error) { "detected_at": d.DetectedAt, } if entry, ok := diagnostics.Get(d.Code); ok { - diagMap["user_message"] = entry.UserMessage + // MCP-2909: prefer the runtime-aware remediation when present so + // the user sees the detected runtime + recommended image instead + // of the generic catalog message. + if d.Remediation != "" { + diagMap["user_message"] = d.Remediation + } else { + diagMap["user_message"] = entry.UserMessage + } diagMap["fix_steps"] = entry.FixSteps diagMap["docs_url"] = entry.DocsURL }