Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions cmd/thv-operator/api/v1alpha1/mcpserver_types.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
package v1alpha1

import (
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
)

// Condition types for MCPServer
Expand Down Expand Up @@ -85,8 +85,11 @@ type MCPServerSpec struct {
// This allows for customizing the pod configuration beyond what is provided by the other fields.
// Note that to modify the specific container the MCP server runs in, you must specify
// the `mcp` container name in the PodTemplateSpec.
// This field accepts a PodTemplateSpec object as JSON/YAML.
// +optional
PodTemplateSpec *corev1.PodTemplateSpec `json:"podTemplateSpec,omitempty"`
// +kubebuilder:pruning:PreserveUnknownFields
// +kubebuilder:validation:Type=object
PodTemplateSpec *runtime.RawExtension `json:"podTemplateSpec,omitempty"`

// ResourceOverrides allows overriding annotations and labels for resources created by the operator
// +optional
Expand Down
4 changes: 2 additions & 2 deletions cmd/thv-operator/api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 21 additions & 0 deletions cmd/thv-operator/controllers/helpers_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package controllers

import (
"encoding/json"
"testing"

"github.com/stretchr/testify/require"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
)

// podTemplateSpecToRawExtension is a test helper to convert PodTemplateSpec to RawExtension
func podTemplateSpecToRawExtension(t *testing.T, pts *corev1.PodTemplateSpec) *runtime.RawExtension {
t.Helper()
if pts == nil {
return nil
}
raw, err := json.Marshal(pts)
require.NoError(t, err, "Failed to marshal PodTemplateSpec")
return &runtime.RawExtension{Raw: raw}
}
170 changes: 132 additions & 38 deletions cmd/thv-operator/controllers/mcpserver_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/record"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
Expand All @@ -40,6 +41,7 @@ import (
type MCPServerReconciler struct {
client.Client
Scheme *runtime.Scheme
Recorder record.EventRecorder
platformDetector kubernetes.PlatformDetector
detectedPlatform kubernetes.Platform
platformOnce sync.Once
Expand Down Expand Up @@ -186,6 +188,15 @@ func (r *MCPServerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
return ctrl.Result{Requeue: true}, nil
}

// Validate PodTemplateSpec early - before other validations
// This ensures we fail fast if the spec is invalid
if !r.validateAndUpdatePodTemplateStatus(ctx, mcpServer) {
// Invalid PodTemplateSpec - return without error to avoid infinite retries
// The user must fix the spec and the next reconciliation will retry
ctxLogger.Info("Skipping reconciliation due to invalid PodTemplateSpec")
return ctrl.Result{}, nil
}

// Check if MCPToolConfig is referenced and handle it
if err := r.handleToolConfig(ctx, mcpServer); err != nil {
ctxLogger.Error(err, "Failed to handle MCPToolConfig")
Expand All @@ -206,6 +217,10 @@ func (r *MCPServerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
setImageValidationCondition(mcpServer, metav1.ConditionTrue,
mcpv1alpha1.ConditionReasonImageValidationSkipped,
"Image validation was not performed (no enforcement configured)")
// Update status to persist the condition
if statusErr := r.Status().Update(ctx, mcpServer); statusErr != nil {
ctxLogger.Error(statusErr, "Failed to update MCPServer status after image validation")
}
} else if goerr.Is(err, validation.ErrImageInvalid) {
ctxLogger.Error(err, "MCPServer image validation failed", "image", mcpServer.Spec.Image)
// Update status to reflect validation failure
Expand Down Expand Up @@ -236,6 +251,10 @@ func (r *MCPServerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
setImageValidationCondition(mcpServer, metav1.ConditionTrue,
mcpv1alpha1.ConditionReasonImageValidationSuccess,
"Image validation passed - image found in enforced registries")
// Update status to persist the condition
if statusErr := r.Status().Update(ctx, mcpServer); statusErr != nil {
ctxLogger.Error(statusErr, "Failed to update MCPServer status after image validation")
}
}

// Check if the MCPServer instance is marked to be deleted
Expand Down Expand Up @@ -406,6 +425,64 @@ func setImageValidationCondition(mcpServer *mcpv1alpha1.MCPServer, status metav1
})
}

// validateAndUpdatePodTemplateStatus validates the PodTemplateSpec and updates the MCPServer status
// with appropriate conditions and events
func (r *MCPServerReconciler) validateAndUpdatePodTemplateStatus(ctx context.Context, mcpServer *mcpv1alpha1.MCPServer) bool {
ctxLogger := log.FromContext(ctx)

// Only validate if PodTemplateSpec is provided
if mcpServer.Spec.PodTemplateSpec == nil || mcpServer.Spec.PodTemplateSpec.Raw == nil {
// No PodTemplateSpec provided, validation passes
return true
}

_, err := NewMCPServerPodTemplateSpecBuilder(mcpServer.Spec.PodTemplateSpec)
if err != nil {
// Record event for invalid PodTemplateSpec
r.Recorder.Eventf(mcpServer, corev1.EventTypeWarning, "InvalidPodTemplateSpec",
"Failed to parse PodTemplateSpec: %v. Deployment blocked until PodTemplateSpec is fixed.", err)

// Set phase and message
mcpServer.Status.Phase = mcpv1alpha1.MCPServerPhaseFailed
mcpServer.Status.Message = fmt.Sprintf("Invalid PodTemplateSpec: %v", err)

// Set condition for invalid PodTemplateSpec
meta.SetStatusCondition(&mcpServer.Status.Conditions, metav1.Condition{
Type: "PodTemplateValid",
Status: metav1.ConditionFalse,
ObservedGeneration: mcpServer.Generation,
Reason: "InvalidPodTemplateSpec",
Message: fmt.Sprintf("Failed to parse PodTemplateSpec: %v. Deployment blocked until fixed.", err),
})

// Update status with the condition
if statusErr := r.Status().Update(ctx, mcpServer); statusErr != nil {
ctxLogger.Error(statusErr, "Failed to update MCPServer status with PodTemplateSpec validation")
return false
}

ctxLogger.Error(err, "PodTemplateSpec validation failed")
return false
}

// Set condition for valid PodTemplateSpec
meta.SetStatusCondition(&mcpServer.Status.Conditions, metav1.Condition{
Type: "PodTemplateValid",
Status: metav1.ConditionTrue,
ObservedGeneration: mcpServer.Generation,
Reason: "ValidPodTemplateSpec",
Message: "PodTemplateSpec is valid",
})

// Update status with the condition
if statusErr := r.Status().Update(ctx, mcpServer); statusErr != nil {
ctxLogger.Error(statusErr, "Failed to update MCPServer status with PodTemplateSpec validation")
}

ctxLogger.V(1).Info("PodTemplateSpec validation completed successfully")
return true
}

// handleRestartAnnotation checks if the restart annotation has been updated and triggers a restart if needed
// Returns true if a restart was triggered and the reconciliation should be requeued
func (r *MCPServerReconciler) handleRestartAnnotation(ctx context.Context, mcpServer *mcpv1alpha1.MCPServer) (bool, error) {
Expand Down Expand Up @@ -755,17 +832,22 @@ func (r *MCPServerReconciler) deploymentForMCPServer(ctx context.Context, m *mcp
useConfigMap := os.Getenv("TOOLHIVE_USE_CONFIGMAP") == trueValue
if useConfigMap {
// Also add pod template patch for secrets (same as regular flags approach)
finalPodTemplateSpec := NewMCPServerPodTemplateSpecBuilder(m.Spec.PodTemplateSpec).
WithSecrets(m.Spec.Secrets).
Build()
// Add pod template patch if we have one
if finalPodTemplateSpec != nil {
podTemplatePatch, err := json.Marshal(finalPodTemplateSpec)
if err != nil {
ctxLogger := log.FromContext(ctx)
ctxLogger.Error(err, "Failed to marshal pod template spec")
} else {
args = append(args, fmt.Sprintf("--k8s-pod-patch=%s", string(podTemplatePatch)))
builder, err := NewMCPServerPodTemplateSpecBuilder(m.Spec.PodTemplateSpec)
if err != nil {
ctxLogger := log.FromContext(ctx)
ctxLogger.Error(err, "Invalid PodTemplateSpec in MCPServer spec, continuing without customizations")
// Continue without pod template patch - the deployment will still work
} else {
finalPodTemplateSpec := builder.WithSecrets(m.Spec.Secrets).Build()
// Add pod template patch if we have one
if finalPodTemplateSpec != nil {
podTemplatePatch, err := json.Marshal(finalPodTemplateSpec)
if err != nil {
ctxLogger := log.FromContext(ctx)
ctxLogger.Error(err, "Failed to marshal pod template spec")
} else {
args = append(args, fmt.Sprintf("--k8s-pod-patch=%s", string(podTemplatePatch)))
}
}
}

Expand Down Expand Up @@ -812,18 +894,26 @@ func (r *MCPServerReconciler) deploymentForMCPServer(ctx context.Context, m *mcp
defaultSA := mcpServerServiceAccountName(m.Name)
serviceAccount = &defaultSA
}
finalPodTemplateSpec := NewMCPServerPodTemplateSpecBuilder(m.Spec.PodTemplateSpec).
WithServiceAccount(serviceAccount).
WithSecrets(m.Spec.Secrets).
Build()
// Add pod template patch if we have one
if finalPodTemplateSpec != nil {
podTemplatePatch, err := json.Marshal(finalPodTemplateSpec)
if err != nil {
ctxLogger := log.FromContext(ctx)
ctxLogger.Error(err, "Failed to marshal pod template spec")
} else {
args = append(args, fmt.Sprintf("--k8s-pod-patch=%s", string(podTemplatePatch)))

builder, err := NewMCPServerPodTemplateSpecBuilder(m.Spec.PodTemplateSpec)
if err != nil {
ctxLogger := log.FromContext(ctx)
ctxLogger.Error(err, "Invalid PodTemplateSpec in MCPServer spec, continuing without customizations")
// Continue without pod template patch - the deployment will still work
} else {
finalPodTemplateSpec := builder.
WithServiceAccount(serviceAccount).
WithSecrets(m.Spec.Secrets).
Build()
// Add pod template patch if we have one
if finalPodTemplateSpec != nil {
podTemplatePatch, err := json.Marshal(finalPodTemplateSpec)
if err != nil {
ctxLogger := log.FromContext(ctx)
ctxLogger.Error(err, "Failed to marshal pod template spec")
} else {
args = append(args, fmt.Sprintf("--k8s-pod-patch=%s", string(podTemplatePatch)))
}
}
}

Expand Down Expand Up @@ -1393,21 +1483,19 @@ func (r *MCPServerReconciler) deploymentNeedsUpdate(
// TODO: Add more comprehensive checks for PodTemplateSpec changes beyond just the args
// This would involve comparing the actual pod template spec fields with what would be
// generated by the operator, rather than just checking the command-line arguments.
if mcpServer.Spec.PodTemplateSpec != nil {
podTemplatePatch, err := json.Marshal(mcpServer.Spec.PodTemplateSpec)
if err == nil {
podTemplatePatchArg := fmt.Sprintf("--k8s-pod-patch=%s", string(podTemplatePatch))
found := false
for _, arg := range container.Args {
if arg == podTemplatePatchArg {
found = true
break
}
}
if !found {
return true
if mcpServer.Spec.PodTemplateSpec != nil && mcpServer.Spec.PodTemplateSpec.Raw != nil {
// Use the raw bytes directly since PodTemplateSpec is now a RawExtension
podTemplatePatchArg := fmt.Sprintf("--k8s-pod-patch=%s", string(mcpServer.Spec.PodTemplateSpec.Raw))
found := false
for _, arg := range container.Args {
if arg == podTemplatePatchArg {
found = true
break
}
}
if !found {
return true
}
}

// Check if the container port has changed
Expand Down Expand Up @@ -1461,7 +1549,14 @@ func (r *MCPServerReconciler) deploymentNeedsUpdate(
defaultSA := mcpServerServiceAccountName(mcpServer.Name)
serviceAccount = &defaultSA
}
expectedPodTemplateSpec := NewMCPServerPodTemplateSpecBuilder(mcpServer.Spec.PodTemplateSpec).

builder, err := NewMCPServerPodTemplateSpecBuilder(mcpServer.Spec.PodTemplateSpec)
if err != nil {
// If we can't parse the PodTemplateSpec, consider it as needing update
return true
}

expectedPodTemplateSpec := builder.
WithServiceAccount(serviceAccount).
WithSecrets(mcpServer.Spec.Secrets).
Build()
Expand Down Expand Up @@ -1527,7 +1622,6 @@ func (r *MCPServerReconciler) deploymentNeedsUpdate(
if !equalOpenTelemetryArgs(otelConfig, container.Args) {
return true
}

}

// Check if the service account name has changed
Expand Down
Loading
Loading