diff --git a/charts/cluster-api-runtime-extensions-nutanix/addons/cni/cilium/values-template.yaml b/charts/cluster-api-runtime-extensions-nutanix/addons/cni/cilium/values-template.yaml index d47da5cc3..ea0f46571 100644 --- a/charts/cluster-api-runtime-extensions-nutanix/addons/cni/cilium/values-template.yaml +++ b/charts/cluster-api-runtime-extensions-nutanix/addons/cni/cilium/values-template.yaml @@ -33,11 +33,7 @@ socketLB: envoy: image: useDigest: false -{{- with .ControlPlane }} -{{- range $key, $val := .metadata.annotations }} -{{- if eq $key "controlplane.cluster.x-k8s.io/skip-kube-proxy" }} k8sServiceHost: auto -kubeProxyReplacement: true{{ break }} -{{- end }} -{{- end }} +{{- if .EnableKubeProxyReplacement }} +kubeProxyReplacement: true {{- end }} diff --git a/hack/addons/update-cilium-manifests.sh b/hack/addons/update-cilium-manifests.sh index b2ff2d6e8..9805bca58 100755 --- a/hack/addons/update-cilium-manifests.sh +++ b/hack/addons/update-cilium-manifests.sh @@ -24,7 +24,7 @@ mkdir -p "${ASSETS_DIR}/cilium" envsubst -no-unset <"${KUSTOMIZE_BASE_DIR}/kustomization.yaml.tmpl" >"${ASSETS_DIR}/kustomization.yaml" cat <"${ASSETS_DIR}/gomplate-context.yaml" -ControlPlane: {} +EnableKubeProxyReplacement: false EOF gomplate -f "${GIT_REPO_ROOT}/charts/cluster-api-runtime-extensions-nutanix/addons/cni/cilium/values-template.yaml" \ --context .="${ASSETS_DIR}/gomplate-context.yaml" \ diff --git a/hack/tools/fetch-images/main.go b/hack/tools/fetch-images/main.go index 5fdb25aea..e2339707b 100644 --- a/hack/tools/fetch-images/main.go +++ b/hack/tools/fetch-images/main.go @@ -266,16 +266,10 @@ func getValuesFileForChartIfNeeded(chartName, carenChartDirectory string) (strin } type input struct { - ControlPlane map[string]interface{} + EnableKubeProxyReplacement bool } templateInput := input{ - ControlPlane: map[string]interface{}{ - "metadata": map[string]interface{}{ - "annotations": map[string]interface{}{ - "controlplane.cluster.x-k8s.io/skip-kube-proxy": "", - }, - }, - }, + EnableKubeProxyReplacement: true, } err = template.Must(template.New(defaultHelmAddonFilename).ParseFiles(f)).Execute(tempFile, &templateInput) diff --git a/pkg/handlers/generic/lifecycle/cni/cilium/handler.go b/pkg/handlers/generic/lifecycle/cni/cilium/handler.go index 50473c2f5..4c751eb86 100644 --- a/pkg/handlers/generic/lifecycle/cni/cilium/handler.go +++ b/pkg/handlers/generic/lifecycle/cni/cilium/handler.go @@ -12,6 +12,7 @@ import ( "github.com/spf13/pflag" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" "sigs.k8s.io/cluster-api/controllers/remote" @@ -228,6 +229,7 @@ func (c *CiliumCNI) apply( c.client, helmChart, ). + WithValueTemplater(templateValues). WithDefaultWaiter() case "": resp.SetStatus(runtimehooksv1.ResponseStatusFailure) @@ -259,23 +261,58 @@ func runApply( return err } + // It is possible to disable kube-proxy and migrate to Cilium's kube-proxy replacement feature in a running cluster. + // In this case, we need to wait for Cilium to be restarted with new configuration and then cleanup kube-proxy. + // If skip kube-proxy is not set, return early. - // Otherwise, wait for Cilium to be rolled out and then cleanup kube-proxy if installed. if !capiutils.ShouldSkipKubeProxy(cluster) { return nil } + remoteClient, err := remote.NewClusterClient( + ctx, + "", + client, + ctrlclient.ObjectKeyFromObject(cluster), + ) + if err != nil { + return fmt.Errorf("error creating remote cluster client: %w", err) + } + + // If kube-proxy is not installed, + // assume that the one-time migration of kube-proxy is complete and return early. + isKubeProxyInstalled, err := isKubeProxyInstalled(ctx, remoteClient) + if err != nil { + return fmt.Errorf("failed to check if kube-proxy is installed: %w", err) + } + if !isKubeProxyInstalled { + return nil + } + + log.Info( + fmt.Sprintf( + "Waiting for Cilium ConfigMap to be updated with new configuration for cluster %s", + ctrlclient.ObjectKeyFromObject(cluster), + ), + ) + if err := waitForCiliumConfigMapToBeUpdatedWithKubeProxyReplacement(ctx, remoteClient); err != nil { + return fmt.Errorf("failed to wait for Cilium ConfigMap to be updated: %w", err) + } + log.Info( - fmt.Sprintf("Waiting for Cilium to be ready for cluster %s", ctrlclient.ObjectKeyFromObject(cluster)), + fmt.Sprintf( + "Trigger a rollout of Cilium DaemonSet Pods for cluster %s", + ctrlclient.ObjectKeyFromObject(cluster), + ), ) - if err := waitForCiliumToBeReady(ctx, client, cluster); err != nil { - return fmt.Errorf("failed to wait for Cilium to be ready: %w", err) + if err := forceCiliumRollout(ctx, remoteClient); err != nil { + return fmt.Errorf("failed to force trigger a rollout of Cilium DaemonSet Pods: %w", err) } log.Info( fmt.Sprintf("Cleaning up kube-proxy for cluster %s", ctrlclient.ObjectKeyFromObject(cluster)), ) - if err := cleanupKubeProxy(ctx, client, cluster); err != nil { + if err := cleanupKubeProxy(ctx, remoteClient); err != nil { return fmt.Errorf("failed to cleanup kube-proxy: %w", err) } @@ -283,41 +320,86 @@ func runApply( } const ( + kubeProxyReplacementConfigKey = "kube-proxy-replacement" + ciliumConfigMapName = "cilium-config" + + restartedAtAnnotation = "caren.nutanix.com/restartedAt" + kubeProxyName = "kube-proxy" kubeProxyNamespace = "kube-system" ) -func waitForCiliumToBeReady( - ctx context.Context, - c ctrlclient.Client, - cluster *clusterv1.Cluster, -) error { - remoteClient, err := remote.NewClusterClient( +// Use vars to override in integration tests. +var ( + waitInterval = 1 * time.Second + waitTimeout = 30 * time.Second +) + +func waitForCiliumConfigMapToBeUpdatedWithKubeProxyReplacement(ctx context.Context, c ctrlclient.Client) error { + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: ciliumConfigMapName, + Namespace: defaultCiliumNamespace, + }, + } + if err := wait.ForObject( ctx, - "", - c, - ctrlclient.ObjectKeyFromObject(cluster), - ) - if err != nil { - return fmt.Errorf("error creating remote cluster client: %w", err) + wait.ForObjectInput[*corev1.ConfigMap]{ + Reader: c, + Target: cm.DeepCopy(), + Check: func(_ context.Context, obj *corev1.ConfigMap) (bool, error) { + return obj.Data[kubeProxyReplacementConfigKey] == "true", nil + }, + Interval: waitInterval, + Timeout: waitTimeout, + }, + ); err != nil { + return fmt.Errorf("failed to wait for ConfigMap %s to be updated: %w", ctrlclient.ObjectKeyFromObject(cm), err) } + return nil +} +func forceCiliumRollout(ctx context.Context, c ctrlclient.Client) error { ds := &appsv1.DaemonSet{ ObjectMeta: metav1.ObjectMeta{ Name: defaultCiliumReleaseName, Namespace: defaultCiliumNamespace, }, } + if err := c.Get(ctx, ctrlclient.ObjectKeyFromObject(ds), ds); err != nil { + return fmt.Errorf("failed to get cilium daemon set: %w", err) + } + + // Update the DaemonSet to force a rollout. + annotations := ds.Spec.Template.Annotations + if annotations == nil { + annotations = make(map[string]string, 1) + } + if _, ok := annotations[restartedAtAnnotation]; !ok { + // Only set the annotation once to avoid a race conditition where rollouts are triggered repeatedly. + annotations[restartedAtAnnotation] = time.Now().UTC().Format(time.RFC3339) + } + ds.Spec.Template.Annotations = annotations + if err := c.Update(ctx, ds); err != nil { + return fmt.Errorf("failed to update cilium daemon set: %w", err) + } + if err := wait.ForObject( ctx, wait.ForObjectInput[*appsv1.DaemonSet]{ - Reader: remoteClient, + Reader: c, Target: ds.DeepCopy(), Check: func(_ context.Context, obj *appsv1.DaemonSet) (bool, error) { - return obj.Status.NumberAvailable == obj.Status.DesiredNumberScheduled && obj.Status.NumberUnavailable == 0, nil + if obj.Generation != obj.Status.ObservedGeneration { + return false, nil + } + isUpdated := obj.Status.NumberAvailable == obj.Status.DesiredNumberScheduled && + // We're forcing a rollout so we expect the UpdatedNumberScheduled to be always set. + obj.Status.UpdatedNumberScheduled == obj.Status.DesiredNumberScheduled + return isUpdated, nil }, - Interval: 1 * time.Second, - Timeout: 30 * time.Second, + Interval: waitInterval, + Timeout: waitTimeout, }, ); err != nil { return fmt.Errorf( @@ -331,17 +413,7 @@ func waitForCiliumToBeReady( } // cleanupKubeProxy cleans up kube-proxy DaemonSet and ConfigMap on the remote cluster when kube-proxy is disabled. -func cleanupKubeProxy(ctx context.Context, c ctrlclient.Client, cluster *clusterv1.Cluster) error { - remoteClient, err := remote.NewClusterClient( - ctx, - "", - c, - ctrlclient.ObjectKeyFromObject(cluster), - ) - if err != nil { - return fmt.Errorf("error creating remote cluster client: %w", err) - } - +func cleanupKubeProxy(ctx context.Context, c ctrlclient.Client) error { objs := []ctrlclient.Object{ &appsv1.DaemonSet{ ObjectMeta: metav1.ObjectMeta{ @@ -357,10 +429,27 @@ func cleanupKubeProxy(ctx context.Context, c ctrlclient.Client, cluster *cluster }, } for _, obj := range objs { - if err := ctrlclient.IgnoreNotFound(remoteClient.Delete(ctx, obj)); err != nil { + if err := ctrlclient.IgnoreNotFound(c.Delete(ctx, obj)); err != nil { return fmt.Errorf("failed to delete %s/%s: %w", obj.GetNamespace(), obj.GetName(), err) } } return nil } + +func isKubeProxyInstalled(ctx context.Context, c ctrlclient.Client) (bool, error) { + ds := &appsv1.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: kubeProxyName, + Namespace: kubeProxyNamespace, + }, + } + err := c.Get(ctx, ctrlclient.ObjectKeyFromObject(ds), ds) + if err != nil { + if apierrors.IsNotFound(err) { + return false, nil + } + return false, err + } + return true, nil +} diff --git a/pkg/handlers/generic/lifecycle/cni/cilium/handler_integration_test.go b/pkg/handlers/generic/lifecycle/cni/cilium/handler_integration_test.go index 75ff16bf2..43408f396 100644 --- a/pkg/handlers/generic/lifecycle/cni/cilium/handler_integration_test.go +++ b/pkg/handlers/generic/lifecycle/cni/cilium/handler_integration_test.go @@ -5,6 +5,7 @@ package cilium import ( "fmt" + "time" "github.com/go-logr/logr" . "github.com/onsi/ginkgo/v2" @@ -51,7 +52,7 @@ var _ = Describe("Test runApply", func() { Expect(err).To(BeNil()) Expect(configMap).ToNot(BeNil()) - By("Should not delete when the addon is not applied") + By("Should not delete kube-proxy when the addon is not applied") err = runApply( ctx, c, @@ -70,14 +71,65 @@ var _ = Describe("Test runApply", func() { Expect(err).To(BeNil()) Expect(configMap).ToNot(BeNil()) - By("Should delete kube-proxy when skip kube-proxy is set") + By("Should not delete kube-proxy when Cilium DaemonSet is not updated") cluster.Spec.Topology.ControlPlane.Metadata.Annotations = map[string]string{ controlplanev1.SkipKubeProxyAnnotation: "", } + // Speed up the test. + waitTimeout = 1 * time.Second + err = runApply(ctx, c, cluster, strategy, cluster.Namespace, logr.Discard()) + Expect(err).ToNot(BeNil()) + + // Verify that the kube-proxy DaemonSet and ConfigMap are not deleted when Cilium DaemonSet is not updated + err = remoteClient.Get(ctx, ctrlclient.ObjectKey{Name: kubeProxyName, Namespace: kubeProxyNamespace}, daemonSet) + Expect(err).To(BeNil()) + Expect(daemonSet).ToNot(BeNil()) + err = remoteClient.Get(ctx, ctrlclient.ObjectKey{Name: kubeProxyName, Namespace: kubeProxyNamespace}, configMap) + Expect(err).To(BeNil()) + Expect(configMap).ToNot(BeNil()) + + By("Should delete kube-proxy when skip kube-proxy is set") + // Update the status of the Cilium DaemonSet to simulate a roll out. + ciliumDaemonSet := &appsv1.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: defaultCiliumReleaseName, + Namespace: defaultCiliumNamespace, + }, + } + err = remoteClient.Get( + ctx, + ctrlclient.ObjectKey{Name: defaultCiliumReleaseName, Namespace: defaultCiliumNamespace}, + ciliumDaemonSet, + ) + Expect(err).To(BeNil()) + ciliumDaemonSet.Status = appsv1.DaemonSetStatus{ + ObservedGeneration: 2, + NumberAvailable: 2, + DesiredNumberScheduled: 2, + UpdatedNumberScheduled: 2, + NumberUnavailable: 0, + } + Expect(remoteClient.Status().Update(ctx, ciliumDaemonSet)).To(Succeed()) err = runApply(ctx, c, cluster, strategy, cluster.Namespace, logr.Discard()) Expect(err).To(BeNil()) + // Verify that the Cilium DaemonSet was updated. + ciliumDaemonSet = &appsv1.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: defaultCiliumReleaseName, + Namespace: defaultCiliumNamespace, + }, + } + err = remoteClient.Get( + ctx, + ctrlclient.ObjectKeyFromObject(ciliumDaemonSet), + ciliumDaemonSet, + ) + Expect(err).To(BeNil()) + Expect(ciliumDaemonSet).ToNot(BeNil()) + Expect(ciliumDaemonSet.Spec.Template.Annotations).To(HaveKey(restartedAtAnnotation)) + // Verify that the kube-proxy DaemonSet and ConfigMap are deleted. err = remoteClient.Get(ctx, ctrlclient.ObjectKey{Name: kubeProxyName, Namespace: kubeProxyNamespace}, daemonSet) Expect(err).ToNot(BeNil()) @@ -158,6 +210,7 @@ func setupTestCluster( } Expect(remoteClient.Create(ctx, configMap)).To(Succeed()) + // Cilium DaemonSet, Pods and ConfigMap ciliumDaemonSet := &appsv1.DaemonSet{ ObjectMeta: metav1.ObjectMeta{ Name: defaultCiliumReleaseName, @@ -165,6 +218,7 @@ func setupTestCluster( Labels: map[string]string{ "app": defaultCiliumReleaseName, }, + Generation: 1, }, Spec: appsv1.DaemonSetSpec{ Selector: &metav1.LabelSelector{ @@ -192,6 +246,21 @@ func setupTestCluster( }, } Expect(remoteClient.Create(ctx, ciliumDaemonSet)).To(Succeed()) + ciliumDaemonSet.Status = appsv1.DaemonSetStatus{ + ObservedGeneration: 1, + } + Expect(remoteClient.Status().Update(ctx, ciliumDaemonSet)).To(Succeed()) + + configMap = &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: ciliumConfigMapName, + Namespace: defaultCiliumNamespace, + }, + Data: map[string]string{ + kubeProxyReplacementConfigKey: "true", + }, + } + Expect(remoteClient.Create(ctx, configMap)).To(Succeed()) return cluster, remoteClient } diff --git a/pkg/handlers/generic/lifecycle/cni/cilium/template.go b/pkg/handlers/generic/lifecycle/cni/cilium/template.go new file mode 100644 index 000000000..110184203 --- /dev/null +++ b/pkg/handlers/generic/lifecycle/cni/cilium/template.go @@ -0,0 +1,42 @@ +// Copyright 2025 Nutanix. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +package cilium + +import ( + "bytes" + "fmt" + "text/template" + + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + + capiutils "github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/common/pkg/capi/utils" +) + +// templateValues enables kube-proxy replacement when kube-proxy is disabled. +func templateValues(cluster *clusterv1.Cluster, text string) (string, error) { + ciliumTemplate, err := template.New("").Parse(text) + if err != nil { + return "", fmt.Errorf("failed to parse template: %w", err) + } + + type input struct { + EnableKubeProxyReplacement bool + } + + // Assume when kube-proxy is skipped, we should enable Cilium's kube-proxy replacement feature. + templateInput := input{ + EnableKubeProxyReplacement: capiutils.ShouldSkipKubeProxy(cluster), + } + + var b bytes.Buffer + err = ciliumTemplate.Execute(&b, templateInput) + if err != nil { + return "", fmt.Errorf( + "failed templating Cilium values: %w", + err, + ) + } + + return b.String(), nil +} diff --git a/test/e2e/serviceloadbalancer_helpers.go b/test/e2e/serviceloadbalancer_helpers.go index 6939026d0..d094dae47 100644 --- a/test/e2e/serviceloadbalancer_helpers.go +++ b/test/e2e/serviceloadbalancer_helpers.go @@ -20,6 +20,7 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/klog/v2" "k8s.io/utils/ptr" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" "sigs.k8s.io/cluster-api/test/framework" @@ -168,8 +169,10 @@ func EnsureLoadBalancerService( Host: getLoadBalancerAddress(svc), Path: "/clientip", } + klog.Infof("Testing the LoadBalancer Service on: %q", getClientIPURL.String()) output := testServiceLoadBalancer(ctx, getClientIPURL, input.ServiceIntervals) Expect(output).ToNot(BeEmpty()) + klog.Infof("Got output from Kubernetes LoadBalancer Service: %q", output) } func createTestService(