diff --git a/cmd/aws-cloud-controller-manager/main.go b/cmd/aws-cloud-controller-manager/main.go index 16c3174184..dee89a61c7 100644 --- a/cmd/aws-cloud-controller-manager/main.go +++ b/cmd/aws-cloud-controller-manager/main.go @@ -31,6 +31,7 @@ import ( "k8s.io/apimachinery/pkg/util/wait" cloudprovider "k8s.io/cloud-provider" + "k8s.io/cloud-provider-aws/pkg/controllers/nodeipam" "k8s.io/cloud-provider-aws/pkg/controllers/tagging" awsv1 "k8s.io/cloud-provider-aws/pkg/providers/v1" "k8s.io/cloud-provider/app" @@ -78,6 +79,19 @@ func main() { controllerAliases := names.CCMControllerAliases() controllerAliases[tagging.TaggingControllerKey] = tagging.TaggingControllerKey + nodeIpamControllerWrapper := nodeipam.ControllerWrapper{} + nodeIpamControllerWrapper.Options.AddFlags(fss.FlagSet("nodeipam controller")) + + nodeIpamControllerConstructor := app.ControllerInitFuncConstructor{ + InitContext: app.ControllerInitContext{ + ClientName: nodeipam.NodeIpamControllerClientName, + }, + Constructor: nodeIpamControllerWrapper.StartNodeIpamControllerWrapper, + } + + controllerInitializers[nodeipam.NodeIpamControllerKey] = nodeIpamControllerConstructor + app.ControllersDisabledByDefault.Insert(nodeipam.NodeIpamControllerKey) + command := app.NewCloudControllerManagerCommand(opts, cloudInitializer, controllerInitializers, controllerAliases, fss, wait.NeverStop) if err := command.Execute(); err != nil { diff --git a/examples/existing-cluster/overlays/superset-role/cluster-role-patch.yaml b/examples/existing-cluster/overlays/superset-role/cluster-role-patch.yaml index 53dfc26f86..9a3404ce01 100644 --- a/examples/existing-cluster/overlays/superset-role/cluster-role-patch.yaml +++ b/examples/existing-cluster/overlays/superset-role/cluster-role-patch.yaml @@ -73,6 +73,7 @@ rules: - apiGroups: - "" resourceNames: + - nodeipam-controller - node-controller - service-controller - route-controller diff --git a/go.mod b/go.mod index 969a843361..546c767654 100644 --- a/go.mod +++ b/go.mod @@ -4,6 +4,7 @@ go 1.22.1 require ( github.com/aws/aws-sdk-go v1.51.1 + github.com/evanphx/json-patch v5.6.0+incompatible github.com/golang/mock v1.6.0 github.com/spf13/cobra v1.8.0 github.com/spf13/pflag v1.0.5 @@ -16,6 +17,7 @@ require ( k8s.io/cloud-provider v0.30.0-rc.0 k8s.io/code-generator v0.30.0-rc.0 k8s.io/component-base v0.30.0-rc.0 + k8s.io/component-helpers v0.30.0-rc.0 k8s.io/controller-manager v0.30.0-rc.0 k8s.io/csi-translation-lib v0.30.0-rc.0 k8s.io/klog/v2 v2.120.1 @@ -36,7 +38,6 @@ require ( github.com/coreos/go-systemd/v22 v22.5.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/emicklei/go-restful/v3 v3.11.0 // indirect - github.com/evanphx/json-patch v5.6.0+incompatible // indirect github.com/felixge/httpsnoop v1.0.4 // indirect github.com/fsnotify/fsnotify v1.7.0 // indirect github.com/go-logr/logr v1.4.1 // indirect @@ -109,7 +110,6 @@ require ( gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect k8s.io/apiserver v0.30.0-rc.0 // indirect - k8s.io/component-helpers v0.30.0-rc.0 // indirect k8s.io/gengo/v2 v2.0.0-20240228010128-51d4e06bde70 // indirect k8s.io/kms v0.30.0-rc.0 // indirect k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect diff --git a/hack/e2e/overlays/cluster-role-create-nodeipam-controller.yaml b/hack/e2e/overlays/cluster-role-create-nodeipam-controller.yaml new file mode 100644 index 0000000000..6af0df29fa --- /dev/null +++ b/hack/e2e/overlays/cluster-role-create-nodeipam-controller.yaml @@ -0,0 +1,25 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: provider-aws:nodeipam-controller-clusterrole + annotations: + resources.gardener.cloud/keep-object: "true" +rules: +- apiGroups: [""] + resources: ["nodes"] + verbs: ["patch","list"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: nodeipam-clusterrolebinding + annotations: + resources.gardener.cloud/keep-object: "true" +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: provider-aws:nodeipam-controller-clusterrole +subjects: +- kind: ServiceAccount + name: nodeipam-controller + namespace: kube-system \ No newline at end of file diff --git a/hack/e2e/overlays/cluster-role-patch-cloud-controller.yaml b/hack/e2e/overlays/cluster-role-patch-cloud-controller.yaml new file mode 100644 index 0000000000..5f596f29e0 --- /dev/null +++ b/hack/e2e/overlays/cluster-role-patch-cloud-controller.yaml @@ -0,0 +1,101 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + addon.kops.k8s.io/name: aws-cloud-controller.addons.k8s.io + app.kubernetes.io/managed-by: kops + k8s-addon: aws-cloud-controller.addons.k8s.io + name: system:cloud-controller-manager +rules: +- apiGroups: + - "" + resources: + - events + verbs: + - create + - patch + - update +- apiGroups: + - "" + resources: + - nodes + verbs: + - '*' +- apiGroups: + - "" + resources: + - nodes/status + verbs: + - patch +- apiGroups: + - "" + resources: + - services + verbs: + - list + - patch + - update + - watch +- apiGroups: + - "" + resources: + - services/status + verbs: + - list + - patch + - update + - watch +- apiGroups: + - "" + resources: + - serviceaccounts + verbs: + - create + - get +- apiGroups: + - "" + resources: + - persistentvolumes + verbs: + - get + - list + - update + - watch +- apiGroups: + - "" + resources: + - endpoints + verbs: + - create + - get + - list + - watch + - update +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - create + - get + - list + - watch + - update +- apiGroups: + - "" + resources: + - secrets + verbs: + - list + - watch +- apiGroups: + - "" + resourceNames: + - nodeipam-controller + - node-controller + - service-controller + - route-controller + resources: + - serviceaccounts/token + verbs: + - create diff --git a/hack/e2e/run.sh b/hack/e2e/run.sh index a2ea7c4c6a..3639da9e17 100755 --- a/hack/e2e/run.sh +++ b/hack/e2e/run.sh @@ -55,11 +55,12 @@ ZONES="${AWS_AVAILABILITY_ZONES:-us-west-2a,us-west-2b,us-west-2c}" AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text) IMAGE_NAME=${IMAGE_NAME:-${AWS_ACCOUNT_ID}.dkr.ecr.${REGION}.amazonaws.com/provider-aws/cloud-controller-manager} IMAGE_TAG=${IMAGE_TAG:-${BUILD_VERSION}-${test_run_id}} - +DNS_ZONE="${DNS_ZONE:-example.com}" # Test args GINKGO_FOCUS=${GINKGO_FOCUS:-"\[cloud-provider-aws-e2e\]"} GINKGO_SKIP=${GINKGO_SKIP:-"\[Disruptive\]"} GINKGO_NODES=${GINKGO_NODES:-4} +GINKGO_LABEL_FILTER="loadbalancer" EXPANDED_TEST_EXTRA_FLAGS="${EXPANDED_TEST_EXTRA_FLAGS:-}" @@ -94,12 +95,13 @@ fi export PATH="${INSTALL_PATH}:${PATH}" -echo "Starting test run ---" +echo "Starting test run for loadbalancer ---" echo " + Region: ${REGION} (${ZONES})" echo " + Cluster name: ${CLUSTER_NAME}" echo " + Kubernetes version: ${KUBERNETES_VERSION}" echo " + Focus: ${GINKGO_FOCUS}" echo " + Skip: ${GINKGO_SKIP}" +echo " + Label Filter: ${GINKGO_LABEL_FILTER}" echo " + kOps state store: ${KOPS_STATE_STORE}" echo " + SSH public key path: ${SSH_PUBLIC_KEY_PATH}" echo " + Test run ID: ${test_run_id}" @@ -143,10 +145,62 @@ fi set -x pushd ./tests/e2e -ginkgo . -v -p --nodes="${GINKGO_NODES}" --focus="${GINKGO_FOCUS}" --skip="${GINKGO_SKIP}" --report-dir="${ARTIFACTS}" +ginkgo --focus="${GINKGO_FOCUS}" --skip="${GINKGO_SKIP}" --label-filter="${GINKGO_LABEL_FILTER}" . -v -p --nodes="${GINKGO_NODES}" --report-dir="${ARTIFACTS}" popd if [[ "${DOWN}" = "yes" ]]; then # This should be changed to ${test_run}/kops once https://github.com/kubernetes/kops/pull/13217 is merged. ${test_run}/${test_run_id}/kops delete cluster --name "${CLUSTER_NAME}" --yes fi + +GINKGO_LABEL_FILTER="ipv6 prefix" + +echo "Starting test run for nodeipam controller ---" +echo " + Region: ${REGION} (${ZONES})" +echo " + Cluster name: ${CLUSTER_NAME}.${DNS_ZONE}" +echo " + Kubernetes version: ${KUBERNETES_VERSION}" +echo " + Focus: ${GINKGO_FOCUS}" +echo " + Skip: ${GINKGO_SKIP}" +echo " + Label Filter: ${GINKGO_LABEL_FILTER}" +echo " + kOps state store: ${KOPS_STATE_STORE}" +echo " + SSH public key path: ${SSH_PUBLIC_KEY_PATH}" +echo " + Test run ID: ${test_run_id}" +echo " + Kubetest run dir: ${test_run}" +echo " + Image: ${IMAGE_NAME}:${IMAGE_TAG}" +echo " + Create cluster: ${UP}" +echo " + Delete cluster: ${DOWN}" + +if [[ "${UP}" = "yes" ]]; then + kubetest2 kops \ + -v 2 \ + --up \ + --run-id="${test_run_id}" \ + --cloud-provider=aws \ + --cluster-name="${CLUSTER_NAME}.${DNS_ZONE}" \ + --create-args="--dns-zone=${DNS_ZONE} --ipv6 --zones=${ZONES} --node-size=m5.large --master-size=m5.large --set cluster.spec.cloudControllerManager.cloudProvider=aws --set cluster.spec.cloudControllerManager.clusterCIDR=10.0.0.0/16 --set cluster.spec.cloudControllerManager.configureCloudRoutes=false --set cluster.spec.cloudControllerManager.controllers=cloud-node --set cluster.spec.cloudControllerManager.controllers=cloud-node-lifecycle --set cluster.spec.cloudControllerManager.controllers=nodeipam --set cluster.spec.cloudControllerManager.controllers=service --set cluster.spec.cloudControllerManager.controllers=route --set cluster.spec.cloudControllerManager.image=${IMAGE_NAME}:${IMAGE_TAG} --set cluster.spec.kubeControllerManager.configureCloudRoutes=false" \ + --admin-access="0.0.0.0/0" \ + --kubernetes-version="${KUBERNETES_VERSION}" \ + --kops-version-marker=https://storage.googleapis.com/kops-ci/bin/latest-ci-updown-green.txt \ + + # Use the kops tester once we have a way of consuming an arbitrary e2e.test binary. + #--test=kops \ + #-- \ + #--use-built-binaries=true \ + #--focus-regex="${GINKGO_FOCUS}" \ + #--parallel 25 +fi + +set -x +pushd ./hack/e2e/overlays +kubectl patch clusterrole system:cloud-controller-manager --type strategic --patch-file cluster-role-patch-cloud-controller.yaml +kubectl create -f cluster-role-create-nodeipam-controller.yaml +popd + +pushd ./tests/e2e +ginkgo --focus="${GINKGO_FOCUS}" --skip="${GINKGO_SKIP}" --label-filter="${GINKGO_LABEL_FILTER}" . -v -p --nodes="${GINKGO_NODES}" --report-dir="${ARTIFACTS}" +popd + +if [[ "${DOWN}" = "yes" ]]; then + # This should be changed to ${test_run}/kops once https://github.com/kubernetes/kops/pull/13217 is merged. + ${test_run}/${test_run_id}/kops delete cluster --name "${CLUSTER_NAME}.${DNS_ZONE}" --yes +fi diff --git a/hack/install-e2e-tools.sh b/hack/install-e2e-tools.sh index 8504d7980d..5060ed4c72 100755 --- a/hack/install-e2e-tools.sh +++ b/hack/install-e2e-tools.sh @@ -18,7 +18,7 @@ set -o errexit set -o nounset set -o pipefail -GINKGO_VERSION="${GINKGO_VERSION:-v1.14.0}" +GINKGO_VERSION="${GINKGO_VERSION:-v2.13.2}" KOPS_ROOT="${KOPS_ROOT:-}" export GO111MODULE=on @@ -31,8 +31,8 @@ cd "$(mktemp -d)" > /dev/null echo " + Installing kubetest2" go install "sigs.k8s.io/kubetest2@latest" -echo " + Installing ginkgo" -go install "github.com/onsi/ginkgo/ginkgo@${GINKGO_VERSION}" +echo " + Installing ginkgo v2" +go install "github.com/onsi/ginkgo/v2/ginkgo@${GINKGO_VERSION}" if [[ -z "${KOPS_ROOT}" ]]; then git clone https://github.com/kubernetes/kops.git diff --git a/pkg/controllers/nodeipam/config/types.go b/pkg/controllers/nodeipam/config/types.go new file mode 100644 index 0000000000..2e1c7e90f4 --- /dev/null +++ b/pkg/controllers/nodeipam/config/types.go @@ -0,0 +1,30 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package config + +import "net" + +// NodeIPAMControllerConfiguration contains elements describing NodeIPAMController. +type NodeIPAMControllerConfiguration struct { + RateLimit float64 + BurstLimit int + DualStack bool + ClusterCIDRs []*net.IPNet + // NodeCIDRMaskSize is the mask size for node cidr in single-stack cluster. + // This can be used only with single stack clusters and is incompatible with dual stack clusters. + NodeCIDRMaskSize int32 +} diff --git a/pkg/controllers/nodeipam/ipam/cidr_allocator.go b/pkg/controllers/nodeipam/ipam/cidr_allocator.go new file mode 100644 index 0000000000..22f55ee739 --- /dev/null +++ b/pkg/controllers/nodeipam/ipam/cidr_allocator.go @@ -0,0 +1,480 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package ipam + +import ( + "context" + "fmt" + "net" + "sync" + "time" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/ec2" + "k8s.io/klog/v2" + + v1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/fields" + "k8s.io/apimachinery/pkg/labels" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/apimachinery/pkg/util/wait" + informers "k8s.io/client-go/informers/core/v1" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/client-go/kubernetes/scheme" + v1core "k8s.io/client-go/kubernetes/typed/core/v1" + corelisters "k8s.io/client-go/listers/core/v1" + "k8s.io/client-go/tools/cache" + "k8s.io/client-go/tools/record" + cidrset "k8s.io/cloud-provider-aws/pkg/controllers/nodeipam/ipam/cidrset" + awsv1 "k8s.io/cloud-provider-aws/pkg/providers/v1" + nodeutil "k8s.io/cloud-provider-aws/pkg/util" +) + +// cidrs are reserved, then node resource is patched with them +// this type holds the reservation info for a node + +// NodeReservedCIDRs holds the allocated CIDRs +type NodeReservedCIDRs struct { + allocatedCIDRs []*net.IPNet + nodeName string +} + +// TODO: figure out the good setting for those constants. +const ( + // The amount of time the nodecontroller polls on the list nodes endpoint. + apiserverStartupGracePeriod = 10 * time.Minute + + // The no. of NodeSpec updates NC can process concurrently. + cidrUpdateWorkers = 30 + + // The max no. of NodeSpec updates that can be enqueued. + CidrUpdateQueueSize = 5000 + + // cidrUpdateRetries is the no. of times a NodeSpec update will be retried before dropping it. + cidrUpdateRetries = 3 + + // updateRetryTimeout is the time to wait before requeing a failed node for retry + updateRetryTimeout = 250 * time.Millisecond + + // maxUpdateRetryTimeout is the maximum amount of time between timeouts. + maxUpdateRetryTimeout = 5 * time.Second + + // updateMaxRetries is the max retries for a failed node + updateMaxRetries = 10 +) + +// nodePollInterval is used in listing node +// This is a variable instead of a const to enable testing. +var nodePollInterval = 10 * time.Second + +// CIDRAllocator is an interface implemented by things that know how +// to allocate/occupy/recycle CIDR for nodes. +type CIDRAllocator interface { + // AllocateOrOccupyCIDR looks at the given node, assigns it a valid + // CIDR if it doesn't currently have one or mark the CIDR as used if + // the node already have one. + AllocateOrOccupyCIDR(node *v1.Node) error + // ReleaseCIDR releases the CIDR of the removed node + ReleaseCIDR(node *v1.Node) error + // Run starts all the working logic of the allocator. + Run(stopCh <-chan struct{}) +} + +// CIDRAllocatorParams is parameters that's required for creating new +// cidr range allocator. +type CIDRAllocatorParams struct { + // ClusterCIDRs is list of cluster cidrs + ClusterCIDRs []*net.IPNet + // NodeCIDRMaskSizes is list of node cidr mask sizes + NodeCIDRMaskSizes []int +} + +func listNodes(kubeClient clientset.Interface) (*v1.NodeList, error) { + var nodeList *v1.NodeList + // We must poll because apiserver might not be up. This error causes + // controller manager to restart. + if pollErr := wait.Poll(nodePollInterval, apiserverStartupGracePeriod, func() (bool, error) { + var err error + nodeList, err = kubeClient.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{ + FieldSelector: fields.Everything().String(), + LabelSelector: labels.Everything().String(), + }) + if err != nil { + klog.Errorf("Failed to list all nodes: %v", err) + return false, nil + } + return true, nil + }); pollErr != nil { + return nil, fmt.Errorf("failed to list all nodes in %v, cannot proceed without updating CIDR map", + apiserverStartupGracePeriod) + } + return nodeList, nil +} + +type rangeAllocator struct { + client clientset.Interface + // cluster cidrs as passed in during controller creation + clusterCIDRs []*net.IPNet + // for each entry in clusterCIDRs we maintain a list of what is used and what is not + cidrSets []*cidrset.CidrSet + // nodeLister is able to list/get nodes and is populated by the shared informer passed to controller + nodeLister corelisters.NodeLister + // nodesSynced returns true if the node shared informer has been synced at least once. + nodesSynced cache.InformerSynced + // Channel that is used to pass updating Nodes and their reserved CIDRs to the background + // This increases a throughput of CIDR assignment by not blocking on long operations. + nodeCIDRUpdateChannel chan NodeReservedCIDRs + recorder record.EventRecorder + // Keep a set of nodes that are currently being processed to avoid races in CIDR allocation + lock sync.Mutex + nodesInProcessing sets.String + cloud *awsv1.Cloud +} + +// NewCIDRRangeAllocator returns a CIDRAllocator to allocate CIDRs for node (one from each of clusterCIDRs) +// Caller must ensure subNetMaskSize is not less than cluster CIDR mask size. +// Caller must always pass in a list of existing nodes so the new allocator. +// can initialize its CIDR map. NodeList is only nil in testing. +func NewCIDRRangeAllocator(client clientset.Interface, nodeInformer informers.NodeInformer, awsCloud *awsv1.Cloud, allocatorParams CIDRAllocatorParams, nodeList *v1.NodeList) (CIDRAllocator, error) { + if client == nil { + klog.Fatalf("kubeClient is nil when starting NodeController") + } + + eventBroadcaster := record.NewBroadcaster() + recorder := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "cidrAllocator"}) + eventBroadcaster.StartStructuredLogging(0) + klog.V(0).Infof("Sending events to api server.") + eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: client.CoreV1().Events("")}) + + // create a cidrSet for each CIDR we operate on. + // cidrSet are mapped to clusterCIDR by index + cidrSets := make([]*cidrset.CidrSet, len(allocatorParams.ClusterCIDRs)) + for idx, cidr := range allocatorParams.ClusterCIDRs { + cidrSet, err := cidrset.NewCIDRSet(cidr, allocatorParams.NodeCIDRMaskSizes[idx]) + if err != nil { + return nil, err + } + cidrSets[idx] = cidrSet + } + + ra := &rangeAllocator{ + client: client, + clusterCIDRs: allocatorParams.ClusterCIDRs, + cloud: awsCloud, + cidrSets: cidrSets, + nodeLister: nodeInformer.Lister(), + nodesSynced: nodeInformer.Informer().HasSynced, + nodeCIDRUpdateChannel: make(chan NodeReservedCIDRs, CidrUpdateQueueSize), + recorder: recorder, + nodesInProcessing: sets.NewString(), + } + + if nodeList != nil { + for _, node := range nodeList.Items { + if len(node.Spec.PodCIDRs) == 0 { + klog.V(4).Infof("Node %v has no CIDR, ignoring", node.Name) + continue + } + klog.V(4).Infof("Node %v has CIDR %s, occupying it in CIDR map", node.Name, node.Spec.PodCIDR) + if err := ra.occupyCIDRs(&node); err != nil { + // This will happen if: + // 1. We find garbage in the podCIDRs field. Retrying is useless. + // 2. CIDR out of range: This means a node CIDR has changed. + // This error will keep crashing controller-manager. + return nil, err + } + } + } + + nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: nodeutil.CreateAddNodeHandler(ra.AllocateOrOccupyCIDR), + UpdateFunc: nodeutil.CreateUpdateNodeHandler(func(_, newNode *v1.Node) error { + // If the PodCIDRs list is not empty we either: + // - already processed a Node that already had CIDRs after NC restarted + // (cidr is marked as used), + // - already processed a Node successfully and allocated CIDRs for it + // (cidr is marked as used), + // - already processed a Node but we did saw a "timeout" response and + // request eventually got through in this case we haven't released + // the allocated CIDRs (cidr is still marked as used). + // There's a possible error here: + // - NC sees a new Node and assigns CIDRs X,Y.. to it, + // - Update Node call fails with a timeout, + // - Node is updated by some other component, NC sees an update and + // assigns CIDRs A,B.. to the Node, + // - Both CIDR X,Y.. and CIDR A,B.. are marked as used in the local cache, + // even though Node sees only CIDR A,B.. + // The problem here is that in in-memory cache we see CIDR X,Y.. as marked, + // which prevents it from being assigned to any new node. The cluster + // state is correct. + // Restart of NC fixes the issue. + if len(newNode.Spec.PodCIDRs) == 0 { + return ra.AllocateOrOccupyCIDR(newNode) + } + return nil + }), + DeleteFunc: nodeutil.CreateDeleteNodeHandler(ra.ReleaseCIDR), + }) + + return ra, nil +} + +func (r *rangeAllocator) Run(stopCh <-chan struct{}) { + defer utilruntime.HandleCrash() + + klog.Infof("Starting range CIDR allocator") + defer klog.Infof("Shutting down range CIDR allocator") + + if !cache.WaitForNamedCacheSync("cidrallocator", stopCh, r.nodesSynced) { + return + } + + for i := 0; i < cidrUpdateWorkers; i++ { + go r.worker(stopCh) + } + + <-stopCh +} + +func (r *rangeAllocator) worker(stopChan <-chan struct{}) { + for { + select { + case workItem, ok := <-r.nodeCIDRUpdateChannel: + if !ok { + klog.Warning("Channel nodeCIDRUpdateChannel was unexpectedly closed") + return + } + if err := r.updateCIDRsAllocation(workItem); err != nil { + // Requeue the failed node for update again. + r.nodeCIDRUpdateChannel <- workItem + } + case <-stopChan: + return + } + } +} + +func (r *rangeAllocator) insertNodeToProcessing(nodeName string) bool { + r.lock.Lock() + defer r.lock.Unlock() + if r.nodesInProcessing.Has(nodeName) { + return false + } + r.nodesInProcessing.Insert(nodeName) + return true +} + +func (r *rangeAllocator) removeNodeFromProcessing(nodeName string) { + r.lock.Lock() + defer r.lock.Unlock() + r.nodesInProcessing.Delete(nodeName) +} + +// marks node.PodCIDRs[...] as used in allocator's tracked cidrSet +func (r *rangeAllocator) occupyCIDRs(node *v1.Node) error { + defer r.removeNodeFromProcessing(node.Name) + if len(node.Spec.PodCIDRs) == 0 { + return nil + } + for idx, cidr := range node.Spec.PodCIDRs { + _, podCIDR, err := net.ParseCIDR(cidr) + if err != nil { + return fmt.Errorf("failed to parse node %s, CIDR %s", node.Name, node.Spec.PodCIDR) + } + // If node has a pre allocate cidr that does not exist in our cidrs. + // This will happen if cluster went from dualstack(multi cidrs) to non-dualstack + // then we have now way of locking it + if idx >= len(r.cidrSets) { + return fmt.Errorf("node:%s has an allocated cidr: %v at index:%v that does not exist in cluster cidrs configuration", node.Name, cidr, idx) + } + + if err := r.cidrSets[idx].Occupy(podCIDR); err != nil { + return fmt.Errorf("failed to mark cidr[%v] at idx [%v] as occupied for node: %v: %v", podCIDR, idx, node.Name, err) + } + } + return nil +} + +// WARNING: If you're adding any return calls or defer any more work from this +// function you have to make sure to update nodesInProcessing properly with the +// disposition of the node when the work is done. +func (r *rangeAllocator) AllocateOrOccupyCIDR(node *v1.Node) error { + if node == nil { + return nil + } + if !r.insertNodeToProcessing(node.Name) { + klog.V(2).Infof("Node %v is already in a process of CIDR assignment.", node.Name) + return nil + } + + if len(node.Spec.PodCIDRs) > 0 { + return r.occupyCIDRs(node) + } + // allocate and queue the assignment + allocated := NodeReservedCIDRs{ + nodeName: node.Name, + allocatedCIDRs: make([]*net.IPNet, len(r.cidrSets)), + } + + for idx := range r.cidrSets { + podCIDR, err := r.cidrSets[idx].AllocateNext() + if err != nil { + r.removeNodeFromProcessing(node.Name) + nodeutil.RecordNodeStatusChange(r.recorder, node, "CIDRNotAvailable") + return fmt.Errorf("failed to allocate cidr from cluster cidr at idx:%v: %v", idx, err) + } + allocated.allocatedCIDRs[idx] = podCIDR + } + + //queue the assignment + klog.V(4).Infof("Putting node %s with CIDR %v into the work queue", node.Name, allocated.allocatedCIDRs) + r.nodeCIDRUpdateChannel <- allocated + return nil +} + +// ReleaseCIDR marks node.podCIDRs[...] as unused in our tracked cidrSets +func (r *rangeAllocator) ReleaseCIDR(node *v1.Node) error { + if node == nil || len(node.Spec.PodCIDRs) == 0 { + return nil + } + + for idx, cidr := range node.Spec.PodCIDRs { + _, podCIDR, err := net.ParseCIDR(cidr) + if err != nil { + return fmt.Errorf("failed to parse CIDR %s on Node %v: %v", cidr, node.Name, err) + } + + // If node has a pre allocate cidr that does not exist in our cidrs. + // This will happen if cluster went from dualstack(multi cidrs) to non-dualstack + // then we have now way of locking it + if idx >= len(r.cidrSets) { + return fmt.Errorf("node:%s has an allocated cidr: %v at index:%v that does not exist in cluster cidrs configuration", node.Name, cidr, idx) + } + + klog.V(4).Infof("release CIDR %s for node:%v", cidr, node.Name) + if err = r.cidrSets[idx].Release(podCIDR); err != nil { + return fmt.Errorf("error when releasing CIDR %v: %v", cidr, err) + } + } + return nil +} + +// updateCIDRsAllocation assigns CIDR to Node and sends an update to the API server. +func (r *rangeAllocator) updateCIDRsAllocation(data NodeReservedCIDRs) error { + var err error + var node *v1.Node + defer r.removeNodeFromProcessing(data.nodeName) + cidrsString := cidrsAsString(data.allocatedCIDRs) + node, err = r.nodeLister.Get(data.nodeName) + if err != nil { + klog.Errorf("Failed while getting node %v for updating Node.Spec.PodCIDRs: %v", data.nodeName, err) + return err + } + + // if cidr list matches the proposed. + // then we possibly updated this node + // and just failed to ack the success. + if len(node.Spec.PodCIDRs) == len(data.allocatedCIDRs) { + match := true + for idx, cidr := range cidrsString { + if node.Spec.PodCIDRs[idx] != cidr { + match = false + break + } + } + if match { + klog.V(4).Infof("Node %v already has allocated CIDR %v. It matches the proposed one.", node.Name, data.allocatedCIDRs) + return nil + } + } + + // node has cidrs, release the reserved + if len(node.Spec.PodCIDRs) != 0 { + klog.Errorf("Node %v already has a CIDR allocated %v. Releasing the new one.", node.Name, node.Spec.PodCIDRs) + for idx, cidr := range data.allocatedCIDRs { + if releaseErr := r.cidrSets[idx].Release(cidr); releaseErr != nil { + klog.Errorf("Error when releasing CIDR idx:%v value: %v err:%v", idx, cidr, releaseErr) + } + } + return nil + } + + // fetch ipv6 cidr address + if node.Spec.ProviderID == "" { + klog.Infof("Node %q has empty provider ID", node.Name) + return nil + } + + // aws:///eu-central-1a/i-07577a7bcf3e576f2 + instanceID, _ := awsv1.KubernetesInstanceID(node.Spec.ProviderID).MapToAWSInstanceID() + eni, err := r.cloud.DescribeNetworkInterfaces( + &ec2.DescribeNetworkInterfacesInput{ + Filters: []*ec2.Filter{ + { + Name: ptrTo("attachment.instance-id"), + Values: []*string{ + ptrTo(string(instanceID)), + }, + }, + }, + }) + if err != nil { + return err + } + + if len(eni.Ipv6Prefixes) != 1 { + return fmt.Errorf("unexpected amount of ipv6 prefixes on interface %q: %v", *eni.NetworkInterfaceId, len(eni.Ipv6Prefixes)) + } + + ipv6Address := aws.StringValue(eni.Ipv6Prefixes[0].Ipv6Prefix) + cidrsString = append(cidrsString, ipv6Address) + + // If we reached here, it means that the node has no CIDR currently assigned. So we set it. + for i := 0; i < cidrUpdateRetries; i++ { + if err = PatchNodePodCIDRs(r.client, node, cidrsString); err == nil { + klog.Infof("Set node %v PodCIDR to %v", node.Name, cidrsString) + return nil + } + } + // failed release back to the pool + klog.Errorf("Failed to update node %v PodCIDR to %v after multiple attempts: %v", node.Name, cidrsString, err) + nodeutil.RecordNodeStatusChange(r.recorder, node, "CIDRAssignmentFailed") + // We accept the fact that we may leak CIDRs here. This is safer than releasing + // them in case when we don't know if request went through. + // NodeController restart will return all falsely allocated CIDRs to the pool. + if !apierrors.IsServerTimeout(err) { + klog.Errorf("CIDR assignment for node %v failed: %v. Releasing allocated CIDR", node.Name, err) + for idx, cidr := range data.allocatedCIDRs { + if releaseErr := r.cidrSets[idx].Release(cidr); releaseErr != nil { + klog.Errorf("Error releasing allocated CIDR for node %v: %v", node.Name, releaseErr) + } + } + } + return err +} + +// converts a slice of cidrs into ,, +func cidrsAsString(inCIDRs []*net.IPNet) []string { + outCIDRs := make([]string, len(inCIDRs)) + for idx, inCIDR := range inCIDRs { + outCIDRs[idx] = inCIDR.String() + } + return outCIDRs +} diff --git a/pkg/controllers/nodeipam/ipam/cidr_allocator_test.go b/pkg/controllers/nodeipam/ipam/cidr_allocator_test.go new file mode 100644 index 0000000000..f5a9fbb70a --- /dev/null +++ b/pkg/controllers/nodeipam/ipam/cidr_allocator_test.go @@ -0,0 +1,622 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package ipam + +import ( + "context" + "net" + "testing" + "time" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/informers" + coreinformers "k8s.io/client-go/informers/core/v1" + "k8s.io/client-go/kubernetes/fake" + "k8s.io/cloud-provider-aws/pkg/controllers/nodeipam/testutil" + awsv1 "k8s.io/cloud-provider-aws/pkg/providers/v1" +) + +const testNodePollInterval = 10 * time.Millisecond + +var alwaysReady = func() bool { return true } + +func waitForUpdatedNodeWithTimeout(nodeHandler *testutil.FakeNodeHandler, number int, timeout time.Duration) error { + return wait.Poll(nodePollInterval, timeout, func() (bool, error) { + if len(nodeHandler.GetUpdatedNodesCopy()) >= number { + return true, nil + } + return false, nil + }) +} + +// Creates a fakeNodeInformer using the provided fakeNodeHandler. +func getFakeNodeInformer(fakeNodeHandler *testutil.FakeNodeHandler) coreinformers.NodeInformer { + fakeClient := &fake.Clientset{} + fakeInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0*time.Second) + fakeNodeInformer := fakeInformerFactory.Core().V1().Nodes() + + for _, node := range fakeNodeHandler.Existing { + fakeNodeInformer.Informer().GetStore().Add(node) + } + + return fakeNodeInformer +} + +type testCase struct { + description string + fakeNodeHandler *testutil.FakeNodeHandler + allocatorParams CIDRAllocatorParams + // key is index of the cidr allocated + expectedAllocatedCIDR map[int]string + allocatedCIDRs map[int][]string + // should controller creation fail? + ctrlCreateFail bool + rateLimitEnabled bool +} + +func TestOccupyPreExistingCIDR(t *testing.T) { + // all tests operate on a single node + testCases := []testCase{ + { + description: "success, single stack no node allocation", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ClusterCIDRs: func() []*net.IPNet { + _, clusterCIDRv4, _ := net.ParseCIDR("10.10.0.0/16") + return []*net.IPNet{clusterCIDRv4} + }(), + NodeCIDRMaskSizes: []int{24}, + }, + allocatedCIDRs: nil, + expectedAllocatedCIDR: nil, + ctrlCreateFail: false, + }, + { + description: "success, single stack correct node allocation", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + }, + Spec: v1.NodeSpec{ + PodCIDRs: []string{"10.10.0.1/24"}, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ClusterCIDRs: func() []*net.IPNet { + _, clusterCIDRv4, _ := net.ParseCIDR("10.10.0.0/16") + return []*net.IPNet{clusterCIDRv4} + }(), + NodeCIDRMaskSizes: []int{24}, + }, + allocatedCIDRs: nil, + expectedAllocatedCIDR: nil, + ctrlCreateFail: false, + }, + { + description: "fail, single stack incorrect node allocation", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + }, + Spec: v1.NodeSpec{ + PodCIDRs: []string{"172.10.0.1/24"}, + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ClusterCIDRs: func() []*net.IPNet { + _, clusterCIDRv4, _ := net.ParseCIDR("10.10.0.0/16") + return []*net.IPNet{clusterCIDRv4} + }(), + NodeCIDRMaskSizes: []int{24}, + }, + allocatedCIDRs: nil, + expectedAllocatedCIDR: nil, + ctrlCreateFail: true, + }, + } + + // test function + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + // Initialize the range allocator. + fakeNodeInformer := getFakeNodeInformer(tc.fakeNodeHandler) + nodeList, _ := tc.fakeNodeHandler.List(context.TODO(), metav1.ListOptions{}) + _, err := NewCIDRRangeAllocator(tc.fakeNodeHandler, fakeNodeInformer, nil, tc.allocatorParams, nodeList) + if err == nil && tc.ctrlCreateFail { + t.Fatalf("creating range allocator was expected to fail, but it did not") + } + if err != nil && !tc.ctrlCreateFail { + t.Fatalf("creating range allocator was expected to succeed, but it did not") + } + }) + } +} + +func TestAllocateOrOccupyCIDRSuccess(t *testing.T) { + // Non-parallel test (overrides global var) + oldNodePollInterval := nodePollInterval + nodePollInterval = testNodePollInterval + defer func() { + nodePollInterval = oldNodePollInterval + }() + + // all tests operate on a single node + testCases := []testCase{ + { + description: "When there's no ServiceCIDR return first CIDR in range", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + }, + Spec: v1.NodeSpec{ + ProviderID: "aws:///eu-west-1a/i-123456789", + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ClusterCIDRs: func() []*net.IPNet { + _, clusterCIDR, _ := net.ParseCIDR("127.123.234.0/24") + return []*net.IPNet{clusterCIDR} + }(), + NodeCIDRMaskSizes: []int{30}, + }, + expectedAllocatedCIDR: map[int]string{ + 0: "127.123.234.0/30", + }, + }, + { + description: "Correctly ignore already allocated CIDRs", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + }, + Spec: v1.NodeSpec{ + ProviderID: "aws:///eu-west-1a/i-123456789", + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ClusterCIDRs: func() []*net.IPNet { + _, clusterCIDR, _ := net.ParseCIDR("127.123.234.0/24") + return []*net.IPNet{clusterCIDR} + }(), + NodeCIDRMaskSizes: []int{30}, + }, + allocatedCIDRs: map[int][]string{ + 0: {"127.123.234.0/30", "127.123.234.4/30", "127.123.234.8/30", "127.123.234.12/30", "127.123.234.16/30"}, + }, + expectedAllocatedCIDR: map[int]string{ + 0: "127.123.234.20/30", + }, + }, + { + description: "no double counting", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + }, + Spec: v1.NodeSpec{ + PodCIDRs: []string{"10.10.0.0/24"}, + ProviderID: "aws:///eu-west-1a/i-123456789", + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + }, + Spec: v1.NodeSpec{ + PodCIDRs: []string{"10.10.2.0/24"}, + ProviderID: "aws:///eu-west-1a/i-123456789", + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node2", + }, + Spec: v1.NodeSpec{ + ProviderID: "aws:///eu-west-1a/i-123456789", + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ClusterCIDRs: func() []*net.IPNet { + _, clusterCIDR, _ := net.ParseCIDR("10.10.0.0/22") + return []*net.IPNet{clusterCIDR} + }(), + NodeCIDRMaskSizes: []int{24}, + }, + expectedAllocatedCIDR: map[int]string{ + 0: "10.10.1.0/24", + }, + }, + } + + // test function + testFunc := func(tc testCase) { + fakeNodeInformer := getFakeNodeInformer(tc.fakeNodeHandler) + nodeList, _ := tc.fakeNodeHandler.List(context.TODO(), metav1.ListOptions{}) + // Initialize the range allocator. + allocator, err := NewCIDRRangeAllocator(tc.fakeNodeHandler, fakeNodeInformer, nil, tc.allocatorParams, nodeList) + if err != nil { + t.Errorf("%v: failed to create CIDRRangeAllocator with error %v", tc.description, err) + return + } + rangeAllocator, ok := allocator.(*rangeAllocator) + if !ok { + t.Logf("%v: found non-default implementation of CIDRAllocator, skipping white-box test...", tc.description) + return + } + rangeAllocator.nodesSynced = alwaysReady + rangeAllocator.recorder = testutil.NewFakeRecorder() + awsServices := awsv1.NewFakeAWSServices("clusterid.test") + rangeAllocator.cloud, _ = awsv1.NewAWSCloud(awsv1.CloudConfig{}, awsServices) + go allocator.Run(wait.NeverStop) + + // this is a bit of white box testing + // pre allocate the cidrs as per the test + for idx, allocatedList := range tc.allocatedCIDRs { + for _, allocated := range allocatedList { + _, cidr, err := net.ParseCIDR(allocated) + if err != nil { + t.Fatalf("%v: unexpected error when parsing CIDR %v: %v", tc.description, allocated, err) + } + if err = rangeAllocator.cidrSets[idx].Occupy(cidr); err != nil { + t.Fatalf("%v: unexpected error when occupying CIDR %v: %v", tc.description, allocated, err) + } + } + } + + updateCount := 0 + for _, node := range tc.fakeNodeHandler.Existing { + if node.Spec.PodCIDRs == nil { + updateCount++ + } + if err := allocator.AllocateOrOccupyCIDR(node); err != nil { + t.Errorf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err) + } + } + if updateCount != 1 { + t.Fatalf("test error: all tests must update exactly one node") + } + if err := waitForUpdatedNodeWithTimeout(tc.fakeNodeHandler, updateCount, wait.ForeverTestTimeout); err != nil { + t.Fatalf("%v: timeout while waiting for Node update: %v", tc.description, err) + } + + if len(tc.expectedAllocatedCIDR) == 0 { + // nothing further expected + return + } + for _, updatedNode := range tc.fakeNodeHandler.GetUpdatedNodesCopy() { + if len(updatedNode.Spec.PodCIDRs) == 0 { + continue // not assigned yet + } + //match + for podCIDRIdx, expectedPodCIDR := range tc.expectedAllocatedCIDR { + if updatedNode.Spec.PodCIDRs[podCIDRIdx] != expectedPodCIDR { + t.Errorf("%v: Unable to find allocated CIDR %v, found updated Nodes with CIDRs: %v", tc.description, expectedPodCIDR, updatedNode.Spec.PodCIDRs) + break + } + } + } + } + + // run the test cases + for _, tc := range testCases { + testFunc(tc) + } +} + +func TestAllocateOrOccupyCIDRFailure(t *testing.T) { + testCases := []testCase{ + { + description: "When there's no ServiceCIDR return first CIDR in range", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + }, + Spec: v1.NodeSpec{ + ProviderID: "aws:///eu-west-1a/i-123456789", + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ClusterCIDRs: func() []*net.IPNet { + _, clusterCIDR, _ := net.ParseCIDR("127.123.234.0/28") + return []*net.IPNet{clusterCIDR} + }(), + NodeCIDRMaskSizes: []int{30}, + }, + allocatedCIDRs: map[int][]string{ + 0: {"127.123.234.0/30", "127.123.234.4/30", "127.123.234.8/30", "127.123.234.12/30"}, + }, + }, + } + + testFunc := func(tc testCase) { + // Initialize the range allocator. + allocator, err := NewCIDRRangeAllocator(tc.fakeNodeHandler, getFakeNodeInformer(tc.fakeNodeHandler), nil, tc.allocatorParams, nil) + if err != nil { + t.Logf("%v: failed to create CIDRRangeAllocator with error %v", tc.description, err) + } + rangeAllocator, ok := allocator.(*rangeAllocator) + if !ok { + t.Logf("%v: found non-default implementation of CIDRAllocator, skipping white-box test...", tc.description) + return + } + rangeAllocator.nodesSynced = alwaysReady + rangeAllocator.recorder = testutil.NewFakeRecorder() + go allocator.Run(wait.NeverStop) + + // this is a bit of white box testing + for setIdx, allocatedList := range tc.allocatedCIDRs { + for _, allocated := range allocatedList { + _, cidr, err := net.ParseCIDR(allocated) + if err != nil { + t.Fatalf("%v: unexpected error when parsing CIDR %v: %v", tc.description, cidr, err) + } + err = rangeAllocator.cidrSets[setIdx].Occupy(cidr) + if err != nil { + t.Fatalf("%v: unexpected error when occupying CIDR %v: %v", tc.description, cidr, err) + } + } + } + if err := allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0]); err == nil { + t.Errorf("%v: unexpected success in AllocateOrOccupyCIDR: %v", tc.description, err) + } + // We don't expect any updates, so just sleep for some time + time.Sleep(time.Second) + if len(tc.fakeNodeHandler.GetUpdatedNodesCopy()) != 0 { + t.Fatalf("%v: unexpected update of nodes: %v", tc.description, tc.fakeNodeHandler.GetUpdatedNodesCopy()) + } + if len(tc.expectedAllocatedCIDR) == 0 { + // nothing further expected + return + } + for _, updatedNode := range tc.fakeNodeHandler.GetUpdatedNodesCopy() { + if len(updatedNode.Spec.PodCIDRs) == 0 { + continue // not assigned yet + } + //match + for podCIDRIdx, expectedPodCIDR := range tc.expectedAllocatedCIDR { + if updatedNode.Spec.PodCIDRs[podCIDRIdx] == expectedPodCIDR { + t.Errorf("%v: found cidr %v that should not be allocated on node with CIDRs:%v", tc.description, expectedPodCIDR, updatedNode.Spec.PodCIDRs) + break + } + } + } + } + for _, tc := range testCases { + testFunc(tc) + } +} + +type releaseTestCase struct { + description string + fakeNodeHandler *testutil.FakeNodeHandler + allocatorParams CIDRAllocatorParams + expectedAllocatedCIDRFirstRound map[int]string + expectedAllocatedCIDRSecondRound map[int]string + allocatedCIDRs map[int][]string + cidrsToRelease [][]string +} + +func TestReleaseCIDRSuccess(t *testing.T) { + // Non-parallel test (overrides global var) + oldNodePollInterval := nodePollInterval + nodePollInterval = testNodePollInterval + defer func() { + nodePollInterval = oldNodePollInterval + }() + + testCases := []releaseTestCase{ + { + description: "Correctly release preallocated CIDR", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + }, + Spec: v1.NodeSpec{ + ProviderID: "aws:///eu-west-1a/i-123456789", + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ClusterCIDRs: func() []*net.IPNet { + _, clusterCIDR, _ := net.ParseCIDR("127.123.234.0/28") + return []*net.IPNet{clusterCIDR} + }(), + NodeCIDRMaskSizes: []int{30}, + }, + allocatedCIDRs: map[int][]string{ + 0: {"127.123.234.0/30", "127.123.234.4/30", "127.123.234.8/30", "127.123.234.12/30"}, + }, + expectedAllocatedCIDRFirstRound: nil, + cidrsToRelease: [][]string{ + {"127.123.234.4/30"}, + }, + expectedAllocatedCIDRSecondRound: map[int]string{ + 0: "127.123.234.4/30", + }, + }, + { + description: "Correctly recycle CIDR", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + }, + Spec: v1.NodeSpec{ + ProviderID: "aws:///eu-west-1a/i-123456789", + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatorParams: CIDRAllocatorParams{ + ClusterCIDRs: func() []*net.IPNet { + _, clusterCIDR, _ := net.ParseCIDR("127.123.234.0/28") + return []*net.IPNet{clusterCIDR} + }(), + NodeCIDRMaskSizes: []int{30}, + }, + allocatedCIDRs: map[int][]string{ + 0: {"127.123.234.4/30", "127.123.234.8/30", "127.123.234.12/30"}, + }, + expectedAllocatedCIDRFirstRound: map[int]string{ + 0: "127.123.234.0/30", + }, + cidrsToRelease: [][]string{ + {"127.123.234.0/30"}, + }, + expectedAllocatedCIDRSecondRound: map[int]string{ + 0: "127.123.234.0/30", + }, + }, + } + + testFunc := func(tc releaseTestCase) { + // Initialize the range allocator. + allocator, _ := NewCIDRRangeAllocator(tc.fakeNodeHandler, getFakeNodeInformer(tc.fakeNodeHandler), nil, tc.allocatorParams, nil) + rangeAllocator, ok := allocator.(*rangeAllocator) + if !ok { + t.Logf("%v: found non-default implementation of CIDRAllocator, skipping white-box test...", tc.description) + return + } + rangeAllocator.nodesSynced = alwaysReady + rangeAllocator.recorder = testutil.NewFakeRecorder() + awsServices := awsv1.NewFakeAWSServices("clusterid.test") + rangeAllocator.cloud, _ = awsv1.NewAWSCloud(awsv1.CloudConfig{}, awsServices) + go allocator.Run(wait.NeverStop) + + // this is a bit of white box testing + for setIdx, allocatedList := range tc.allocatedCIDRs { + for _, allocated := range allocatedList { + _, cidr, err := net.ParseCIDR(allocated) + if err != nil { + t.Fatalf("%v: unexpected error when parsing CIDR %v: %v", tc.description, allocated, err) + } + err = rangeAllocator.cidrSets[setIdx].Occupy(cidr) + if err != nil { + t.Fatalf("%v: unexpected error when occupying CIDR %v: %v", tc.description, allocated, err) + } + } + } + + err := allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0]) + if len(tc.expectedAllocatedCIDRFirstRound) != 0 { + if err != nil { + t.Fatalf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err) + } + if err := waitForUpdatedNodeWithTimeout(tc.fakeNodeHandler, 1, wait.ForeverTestTimeout); err != nil { + t.Fatalf("%v: timeout while waiting for Node update: %v", tc.description, err) + } + } else { + if err == nil { + t.Fatalf("%v: unexpected success in AllocateOrOccupyCIDR: %v", tc.description, err) + } + // We don't expect any updates here + time.Sleep(time.Second) + if len(tc.fakeNodeHandler.GetUpdatedNodesCopy()) != 0 { + t.Fatalf("%v: unexpected update of nodes: %v", tc.description, tc.fakeNodeHandler.GetUpdatedNodesCopy()) + } + } + for _, cidrToRelease := range tc.cidrsToRelease { + nodeToRelease := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + }, + Spec: v1.NodeSpec{ + ProviderID: "aws:///eu-west-1a/i-123456789", + }, + } + nodeToRelease.Spec.PodCIDRs = cidrToRelease + err = allocator.ReleaseCIDR(&nodeToRelease) + if err != nil { + t.Fatalf("%v: unexpected error in ReleaseCIDR: %v", tc.description, err) + } + } + if err = allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0]); err != nil { + t.Fatalf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err) + } + if err := waitForUpdatedNodeWithTimeout(tc.fakeNodeHandler, 1, wait.ForeverTestTimeout); err != nil { + t.Fatalf("%v: timeout while waiting for Node update: %v", tc.description, err) + } + + if len(tc.expectedAllocatedCIDRSecondRound) == 0 { + // nothing further expected + return + } + for _, updatedNode := range tc.fakeNodeHandler.GetUpdatedNodesCopy() { + if len(updatedNode.Spec.PodCIDRs) == 0 { + continue // not assigned yet + } + //match + for podCIDRIdx, expectedPodCIDR := range tc.expectedAllocatedCIDRSecondRound { + if updatedNode.Spec.PodCIDRs[podCIDRIdx] != expectedPodCIDR { + t.Errorf("%v: found cidr %v that should not be allocated on node with CIDRs:%v", tc.description, expectedPodCIDR, updatedNode.Spec.PodCIDRs) + break + } + } + } + } + + for _, tc := range testCases { + testFunc(tc) + } +} diff --git a/pkg/controllers/nodeipam/ipam/cidrset/cidrset.go b/pkg/controllers/nodeipam/ipam/cidrset/cidrset.go new file mode 100644 index 0000000000..b31ef6b78c --- /dev/null +++ b/pkg/controllers/nodeipam/ipam/cidrset/cidrset.go @@ -0,0 +1,295 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cidrset + +import ( + "encoding/binary" + "errors" + "fmt" + "math/big" + "math/bits" + "net" + "sync" +) + +// CidrSet manages a set of CIDR ranges from which blocks of IPs can +// be allocated from. +type CidrSet struct { + sync.Mutex + // clusterCIDR is the CIDR assigned to the cluster + clusterCIDR *net.IPNet + // clusterMaskSize is the mask size, in bits, assigned to the cluster + // caches the mask size to avoid the penalty of calling clusterCIDR.Mask.Size() + clusterMaskSize int + // nodeMask is the network mask assigned to the nodes + nodeMask net.IPMask + // nodeMaskSize is the mask size, in bits,assigned to the nodes + // caches the mask size to avoid the penalty of calling nodeMask.Size() + nodeMaskSize int + // maxCIDRs is the maximum number of CIDRs that can be allocated + maxCIDRs int + // allocatedCIDRs counts the number of CIDRs allocated + allocatedCIDRs int + // nextCandidate points to the next CIDR that should be free + nextCandidate int + // used is a bitmap used to track the CIDRs allocated + used big.Int + // label is used to identify the metrics + label string +} + +const ( + // The subnet mask size cannot be greater than 16 more than the cluster mask size + // TODO: https://github.com/kubernetes/kubernetes/issues/44918 + // clusterSubnetMaxDiff limited to 16 due to the uncompressed bitmap + // Due to this limitation the subnet mask for IPv6 cluster cidr needs to be >= 48 + // as default mask size for IPv6 is 64. + clusterSubnetMaxDiff = 16 + // halfIPv6Len is the half of the IPv6 length + halfIPv6Len = net.IPv6len / 2 +) + +var ( + // ErrCIDRRangeNoCIDRsRemaining occurs when there is no more space + // to allocate CIDR ranges. + ErrCIDRRangeNoCIDRsRemaining = errors.New( + "CIDR allocation failed; there are no remaining CIDRs left to allocate in the accepted range") + // ErrCIDRSetSubNetTooBig occurs when the subnet mask size is too + // big compared to the CIDR mask size. + ErrCIDRSetSubNetTooBig = errors.New( + "New CIDR set failed; the node CIDR size is too big") +) + +// NewCIDRSet creates a new CidrSet. +func NewCIDRSet(clusterCIDR *net.IPNet, subNetMaskSize int) (*CidrSet, error) { + clusterMask := clusterCIDR.Mask + clusterMaskSize, bits := clusterMask.Size() + + var maxCIDRs int + if (clusterCIDR.IP.To4() == nil) && (subNetMaskSize-clusterMaskSize > clusterSubnetMaxDiff) { + return nil, ErrCIDRSetSubNetTooBig + } + + // register CidrSet metrics + registerCidrsetMetrics() + + maxCIDRs = 1 << uint32(subNetMaskSize-clusterMaskSize) + return &CidrSet{ + clusterCIDR: clusterCIDR, + nodeMask: net.CIDRMask(subNetMaskSize, bits), + clusterMaskSize: clusterMaskSize, + maxCIDRs: maxCIDRs, + nodeMaskSize: subNetMaskSize, + label: clusterCIDR.String(), + }, nil +} + +func (s *CidrSet) indexToCIDRBlock(index int) *net.IPNet { + var ip []byte + switch /*v4 or v6*/ { + case s.clusterCIDR.IP.To4() != nil: + { + j := uint32(index) << uint32(32-s.nodeMaskSize) + ipInt := (binary.BigEndian.Uint32(s.clusterCIDR.IP)) | j + ip = make([]byte, net.IPv4len) + binary.BigEndian.PutUint32(ip, ipInt) + } + case s.clusterCIDR.IP.To16() != nil: + { + // leftClusterIP | rightClusterIP + // 2001:0DB8:1234:0000:0000:0000:0000:0000 + const v6NBits = 128 + const halfV6NBits = v6NBits / 2 + leftClusterIP := binary.BigEndian.Uint64(s.clusterCIDR.IP[:halfIPv6Len]) + rightClusterIP := binary.BigEndian.Uint64(s.clusterCIDR.IP[halfIPv6Len:]) + + ip = make([]byte, net.IPv6len) + + if s.nodeMaskSize <= halfV6NBits { + // We only care about left side IP + leftClusterIP |= uint64(index) << uint(halfV6NBits-s.nodeMaskSize) + } else { + if s.clusterMaskSize < halfV6NBits { + // see how many bits are needed to reach the left side + btl := uint(s.nodeMaskSize - halfV6NBits) + indexMaxBit := uint(64 - bits.LeadingZeros64(uint64(index))) + if indexMaxBit > btl { + leftClusterIP |= uint64(index) >> btl + } + } + // the right side will be calculated the same way either the + // subNetMaskSize affects both left and right sides + rightClusterIP |= uint64(index) << uint(v6NBits-s.nodeMaskSize) + } + binary.BigEndian.PutUint64(ip[:halfIPv6Len], leftClusterIP) + binary.BigEndian.PutUint64(ip[halfIPv6Len:], rightClusterIP) + } + } + return &net.IPNet{ + IP: ip, + Mask: s.nodeMask, + } +} + +// AllocateNext allocates the next free CIDR range. This will set the range +// as occupied and return the allocated range. +func (s *CidrSet) AllocateNext() (*net.IPNet, error) { + s.Lock() + defer s.Unlock() + + if s.allocatedCIDRs == s.maxCIDRs { + return nil, ErrCIDRRangeNoCIDRsRemaining + } + candidate := s.nextCandidate + var i int + for i = 0; i < s.maxCIDRs; i++ { + if s.used.Bit(candidate) == 0 { + break + } + candidate = (candidate + 1) % s.maxCIDRs + } + + s.nextCandidate = (candidate + 1) % s.maxCIDRs + s.used.SetBit(&s.used, candidate, 1) + s.allocatedCIDRs++ + // Update metrics + cidrSetAllocations.WithLabelValues(s.label).Inc() + cidrSetAllocationTriesPerRequest.WithLabelValues(s.label).Observe(float64(i)) + cidrSetUsage.WithLabelValues(s.label).Set(float64(s.allocatedCIDRs) / float64(s.maxCIDRs)) + + return s.indexToCIDRBlock(candidate), nil +} + +func (s *CidrSet) getBeginingAndEndIndices(cidr *net.IPNet) (begin, end int, err error) { + if cidr == nil { + return -1, -1, fmt.Errorf("error getting indices for cluster cidr %v, cidr is nil", s.clusterCIDR) + } + begin, end = 0, s.maxCIDRs-1 + cidrMask := cidr.Mask + maskSize, _ := cidrMask.Size() + var ipSize int + + if !s.clusterCIDR.Contains(cidr.IP.Mask(s.clusterCIDR.Mask)) && !cidr.Contains(s.clusterCIDR.IP.Mask(cidr.Mask)) { + return -1, -1, fmt.Errorf("cidr %v is out the range of cluster cidr %v", cidr, s.clusterCIDR) + } + + if s.clusterMaskSize < maskSize { + + ipSize = net.IPv4len + if cidr.IP.To4() == nil { + ipSize = net.IPv6len + } + begin, err = s.getIndexForCIDR(&net.IPNet{ + IP: cidr.IP.Mask(s.nodeMask), + Mask: s.nodeMask, + }) + if err != nil { + return -1, -1, err + } + ip := make([]byte, ipSize) + if cidr.IP.To4() != nil { + ipInt := binary.BigEndian.Uint32(cidr.IP) | (^binary.BigEndian.Uint32(cidr.Mask)) + binary.BigEndian.PutUint32(ip, ipInt) + } else { + // ipIntLeft | ipIntRight + // 2001:0DB8:1234:0000:0000:0000:0000:0000 + ipIntLeft := binary.BigEndian.Uint64(cidr.IP[:net.IPv6len/2]) | (^binary.BigEndian.Uint64(cidr.Mask[:net.IPv6len/2])) + ipIntRight := binary.BigEndian.Uint64(cidr.IP[net.IPv6len/2:]) | (^binary.BigEndian.Uint64(cidr.Mask[net.IPv6len/2:])) + binary.BigEndian.PutUint64(ip[:net.IPv6len/2], ipIntLeft) + binary.BigEndian.PutUint64(ip[net.IPv6len/2:], ipIntRight) + } + end, err = s.getIndexForCIDR(&net.IPNet{ + IP: net.IP(ip).Mask(s.nodeMask), + Mask: s.nodeMask, + }) + if err != nil { + return -1, -1, err + } + } + return begin, end, nil +} + +// Release releases the given CIDR range. +func (s *CidrSet) Release(cidr *net.IPNet) error { + begin, end, err := s.getBeginingAndEndIndices(cidr) + if err != nil { + return err + } + s.Lock() + defer s.Unlock() + for i := begin; i <= end; i++ { + // Only change the counters if we change the bit to prevent + // double counting. + if s.used.Bit(i) != 0 { + s.used.SetBit(&s.used, i, 0) + s.allocatedCIDRs-- + cidrSetReleases.WithLabelValues(s.label).Inc() + } + } + + cidrSetUsage.WithLabelValues(s.label).Set(float64(s.allocatedCIDRs) / float64(s.maxCIDRs)) + return nil +} + +// Occupy marks the given CIDR range as used. Occupy succeeds even if the CIDR +// range was previously used. +func (s *CidrSet) Occupy(cidr *net.IPNet) (err error) { + begin, end, err := s.getBeginingAndEndIndices(cidr) + if err != nil { + return err + } + s.Lock() + defer s.Unlock() + for i := begin; i <= end; i++ { + // Only change the counters if we change the bit to prevent + // double counting. + if s.used.Bit(i) == 0 { + s.used.SetBit(&s.used, i, 1) + s.allocatedCIDRs++ + cidrSetAllocations.WithLabelValues(s.label).Inc() + } + } + + cidrSetUsage.WithLabelValues(s.label).Set(float64(s.allocatedCIDRs) / float64(s.maxCIDRs)) + return nil +} + +func (s *CidrSet) getIndexForCIDR(cidr *net.IPNet) (int, error) { + return s.getIndexForIP(cidr.IP) +} + +func (s *CidrSet) getIndexForIP(ip net.IP) (int, error) { + if ip.To4() != nil { + cidrIndex := (binary.BigEndian.Uint32(s.clusterCIDR.IP) ^ binary.BigEndian.Uint32(ip.To4())) >> uint32(32-s.nodeMaskSize) + if cidrIndex >= uint32(s.maxCIDRs) { + return 0, fmt.Errorf("CIDR: %v/%v is out of the range of CIDR allocator", ip, s.nodeMaskSize) + } + return int(cidrIndex), nil + } + if ip.To16() != nil { + bigIP := big.NewInt(0).SetBytes(s.clusterCIDR.IP) + bigIP = bigIP.Xor(bigIP, big.NewInt(0).SetBytes(ip)) + cidrIndexBig := bigIP.Rsh(bigIP, uint(net.IPv6len*8-s.nodeMaskSize)) + cidrIndex := cidrIndexBig.Uint64() + if cidrIndex >= uint64(s.maxCIDRs) { + return 0, fmt.Errorf("CIDR: %v/%v is out of the range of CIDR allocator", ip, s.nodeMaskSize) + } + return int(cidrIndex), nil + } + + return 0, fmt.Errorf("invalid IP: %v", ip) +} diff --git a/pkg/controllers/nodeipam/ipam/cidrset/cidrset_test.go b/pkg/controllers/nodeipam/ipam/cidrset/cidrset_test.go new file mode 100644 index 0000000000..b28f9fcf38 --- /dev/null +++ b/pkg/controllers/nodeipam/ipam/cidrset/cidrset_test.go @@ -0,0 +1,1038 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cidrset + +import ( + "math/big" + "net" + "reflect" + "testing" + + "k8s.io/component-base/metrics/testutil" + "k8s.io/klog/v2" +) + +func TestCIDRSetFullyAllocated(t *testing.T) { + cases := []struct { + clusterCIDRStr string + subNetMaskSize int + expectedCIDR string + description string + }{ + { + clusterCIDRStr: "127.123.234.0/30", + subNetMaskSize: 30, + expectedCIDR: "127.123.234.0/30", + description: "Fully allocated CIDR with IPv4", + }, + { + clusterCIDRStr: "beef:1234::/30", + subNetMaskSize: 30, + expectedCIDR: "beef:1234::/30", + description: "Fully allocated CIDR with IPv6", + }, + } + for _, tc := range cases { + _, clusterCIDR, _ := net.ParseCIDR(tc.clusterCIDRStr) + a, err := NewCIDRSet(clusterCIDR, tc.subNetMaskSize) + if err != nil { + t.Fatalf("unexpected error: %v for %v", err, tc.description) + } + p, err := a.AllocateNext() + if err != nil { + t.Fatalf("unexpected error: %v for %v", err, tc.description) + } + if p.String() != tc.expectedCIDR { + t.Fatalf("unexpected allocated cidr: %v, expecting %v for %v", + p.String(), tc.expectedCIDR, tc.description) + } + + _, err = a.AllocateNext() + if err == nil { + t.Fatalf("expected error because of fully-allocated range for %v", tc.description) + } + + a.Release(p) + + p, err = a.AllocateNext() + if err != nil { + t.Fatalf("unexpected error: %v for %v", err, tc.description) + } + if p.String() != tc.expectedCIDR { + t.Fatalf("unexpected allocated cidr: %v, expecting %v for %v", + p.String(), tc.expectedCIDR, tc.description) + } + _, err = a.AllocateNext() + if err == nil { + t.Fatalf("expected error because of fully-allocated range for %v", tc.description) + } + } +} + +func TestIndexToCIDRBlock(t *testing.T) { + cases := []struct { + clusterCIDRStr string + subnetMaskSize int + index int + CIDRBlock string + description string + }{ + { + clusterCIDRStr: "127.123.3.0/16", + subnetMaskSize: 24, + index: 0, + CIDRBlock: "127.123.0.0/24", + description: "1st IP address indexed with IPv4", + }, + { + clusterCIDRStr: "127.123.0.0/16", + subnetMaskSize: 24, + index: 15, + CIDRBlock: "127.123.15.0/24", + description: "16th IP address indexed with IPv4", + }, + { + clusterCIDRStr: "192.168.5.219/28", + subnetMaskSize: 32, + index: 5, + CIDRBlock: "192.168.5.213/32", + description: "5th IP address indexed with IPv4", + }, + { + clusterCIDRStr: "2001:0db8:1234:3::/48", + subnetMaskSize: 64, + index: 0, + CIDRBlock: "2001:db8:1234::/64", + description: "1st IP address indexed with IPv6 /64", + }, + { + clusterCIDRStr: "2001:0db8:1234::/48", + subnetMaskSize: 64, + index: 15, + CIDRBlock: "2001:db8:1234:f::/64", + description: "16th IP address indexed with IPv6 /64", + }, + { + clusterCIDRStr: "2001:0db8:85a3::8a2e:0370:7334/50", + subnetMaskSize: 63, + index: 6425, + CIDRBlock: "2001:db8:85a3:3232::/63", + description: "6426th IP address indexed with IPv6 /63", + }, + { + clusterCIDRStr: "2001:0db8::/32", + subnetMaskSize: 48, + index: 0, + CIDRBlock: "2001:db8::/48", + description: "1st IP address indexed with IPv6 /48", + }, + { + clusterCIDRStr: "2001:0db8::/32", + subnetMaskSize: 48, + index: 15, + CIDRBlock: "2001:db8:f::/48", + description: "16th IP address indexed with IPv6 /48", + }, + { + clusterCIDRStr: "2001:0db8:85a3::8a2e:0370:7334/32", + subnetMaskSize: 48, + index: 6425, + CIDRBlock: "2001:db8:1919::/48", + description: "6426th IP address indexed with IPv6 /48", + }, + { + clusterCIDRStr: "2001:0db8:1234:ff00::/56", + subnetMaskSize: 72, + index: 0, + CIDRBlock: "2001:db8:1234:ff00::/72", + description: "1st IP address indexed with IPv6 /72", + }, + { + clusterCIDRStr: "2001:0db8:1234:ff00::/56", + subnetMaskSize: 72, + index: 15, + CIDRBlock: "2001:db8:1234:ff00:f00::/72", + description: "16th IP address indexed with IPv6 /72", + }, + { + clusterCIDRStr: "2001:0db8:1234:ff00::0370:7334/56", + subnetMaskSize: 72, + index: 6425, + CIDRBlock: "2001:db8:1234:ff19:1900::/72", + description: "6426th IP address indexed with IPv6 /72", + }, + { + clusterCIDRStr: "2001:0db8:1234:0:1234::/80", + subnetMaskSize: 96, + index: 0, + CIDRBlock: "2001:db8:1234:0:1234::/96", + description: "1st IP address indexed with IPv6 /96", + }, + { + clusterCIDRStr: "2001:0db8:1234:0:1234::/80", + subnetMaskSize: 96, + index: 15, + CIDRBlock: "2001:db8:1234:0:1234:f::/96", + description: "16th IP address indexed with IPv6 /96", + }, + { + clusterCIDRStr: "2001:0db8:1234:ff00::0370:7334/80", + subnetMaskSize: 96, + index: 6425, + CIDRBlock: "2001:db8:1234:ff00:0:1919::/96", + description: "6426th IP address indexed with IPv6 /96", + }, + } + for _, tc := range cases { + _, clusterCIDR, _ := net.ParseCIDR(tc.clusterCIDRStr) + a, err := NewCIDRSet(clusterCIDR, tc.subnetMaskSize) + if err != nil { + t.Fatalf("error for %v ", tc.description) + } + cidr := a.indexToCIDRBlock(tc.index) + if cidr.String() != tc.CIDRBlock { + t.Fatalf("error for %v index %d %s", tc.description, tc.index, cidr.String()) + } + } +} + +func TestCIDRSet_RandomishAllocation(t *testing.T) { + cases := []struct { + clusterCIDRStr string + description string + }{ + { + clusterCIDRStr: "127.123.234.0/16", + description: "RandomishAllocation with IPv4", + }, + { + clusterCIDRStr: "beef:1234::/16", + description: "RandomishAllocation with IPv6", + }, + } + for _, tc := range cases { + _, clusterCIDR, _ := net.ParseCIDR(tc.clusterCIDRStr) + a, err := NewCIDRSet(clusterCIDR, 24) + if err != nil { + t.Fatalf("Error allocating CIDRSet for %v", tc.description) + } + // allocate all the CIDRs + var cidrs []*net.IPNet + + for i := 0; i < 256; i++ { + if c, err := a.AllocateNext(); err == nil { + cidrs = append(cidrs, c) + } else { + t.Fatalf("unexpected error: %v for %v", err, tc.description) + } + } + + //var err error + _, err = a.AllocateNext() + if err == nil { + t.Fatalf("expected error because of fully-allocated range for %v", tc.description) + } + // release them all + for i := 0; i < len(cidrs); i++ { + a.Release(cidrs[i]) + } + + // allocate the CIDRs again + var rcidrs []*net.IPNet + for i := 0; i < 256; i++ { + if c, err := a.AllocateNext(); err == nil { + rcidrs = append(rcidrs, c) + } else { + t.Fatalf("unexpected error: %d, %v for %v", i, err, tc.description) + } + } + _, err = a.AllocateNext() + if err == nil { + t.Fatalf("expected error because of fully-allocated range for %v", tc.description) + } + + if !reflect.DeepEqual(cidrs, rcidrs) { + t.Fatalf("expected re-allocated cidrs are the same collection for %v", tc.description) + } + } +} + +func TestCIDRSet_AllocationOccupied(t *testing.T) { + cases := []struct { + clusterCIDRStr string + description string + }{ + { + clusterCIDRStr: "127.123.234.0/16", + description: "AllocationOccupied with IPv4", + }, + { + clusterCIDRStr: "beef:1234::/16", + description: "AllocationOccupied with IPv6", + }, + } + for _, tc := range cases { + _, clusterCIDR, _ := net.ParseCIDR(tc.clusterCIDRStr) + a, err := NewCIDRSet(clusterCIDR, 24) + if err != nil { + t.Fatalf("Error allocating CIDRSet for %v", tc.description) + } + // allocate all the CIDRs + var cidrs []*net.IPNet + var numCIDRs = 256 + + for i := 0; i < numCIDRs; i++ { + if c, err := a.AllocateNext(); err == nil { + cidrs = append(cidrs, c) + } else { + t.Fatalf("unexpected error: %v for %v", err, tc.description) + } + } + + //var err error + _, err = a.AllocateNext() + if err == nil { + t.Fatalf("expected error because of fully-allocated range for %v", tc.description) + } + // release them all + for i := 0; i < len(cidrs); i++ { + a.Release(cidrs[i]) + } + // occupy the last 128 CIDRs + for i := numCIDRs / 2; i < numCIDRs; i++ { + a.Occupy(cidrs[i]) + } + // occupy the first of the last 128 again + a.Occupy(cidrs[numCIDRs/2]) + + // allocate the first 128 CIDRs again + var rcidrs []*net.IPNet + for i := 0; i < numCIDRs/2; i++ { + if c, err := a.AllocateNext(); err == nil { + rcidrs = append(rcidrs, c) + } else { + t.Fatalf("unexpected error: %d, %v for %v", i, err, tc.description) + } + } + _, err = a.AllocateNext() + if err == nil { + t.Fatalf("expected error because of fully-allocated range for %v", tc.description) + } + + // check Occupy() work properly + for i := numCIDRs / 2; i < numCIDRs; i++ { + rcidrs = append(rcidrs, cidrs[i]) + } + if !reflect.DeepEqual(cidrs, rcidrs) { + t.Fatalf("expected re-allocated cidrs are the same collection for %v", tc.description) + } + } +} + +func TestDoubleOccupyRelease(t *testing.T) { + // Run a sequence of operations and check the number of occupied CIDRs + // after each one. + clusterCIDRStr := "10.42.0.0/16" + operations := []struct { + cidrStr string + operation string + numOccupied int + }{ + // Occupy 1 element: +1 + { + cidrStr: "10.42.5.0/24", + operation: "occupy", + numOccupied: 1, + }, + // Occupy 1 more element: +1 + { + cidrStr: "10.42.9.0/24", + operation: "occupy", + numOccupied: 2, + }, + // Occupy 4 elements overlapping with one from the above: +3 + { + cidrStr: "10.42.8.0/22", + operation: "occupy", + numOccupied: 5, + }, + // Occupy an already-coccupied element: no change + { + cidrStr: "10.42.9.0/24", + operation: "occupy", + numOccupied: 5, + }, + // Release an coccupied element: -1 + { + cidrStr: "10.42.9.0/24", + operation: "release", + numOccupied: 4, + }, + // Release an unoccupied element: no change + { + cidrStr: "10.42.9.0/24", + operation: "release", + numOccupied: 4, + }, + // Release 4 elements, only one of which is occupied: -1 + { + cidrStr: "10.42.4.0/22", + operation: "release", + numOccupied: 3, + }, + } + // Check that there are exactly that many allocatable CIDRs after all + // operations have been executed. + numAllocatable24s := (1 << 8) - 3 + + _, clusterCIDR, _ := net.ParseCIDR(clusterCIDRStr) + a, err := NewCIDRSet(clusterCIDR, 24) + if err != nil { + t.Fatalf("Error allocating CIDRSet") + } + + // Execute the operations + for _, op := range operations { + _, cidr, _ := net.ParseCIDR(op.cidrStr) + switch op.operation { + case "occupy": + a.Occupy(cidr) + case "release": + a.Release(cidr) + default: + t.Fatalf("test error: unknown operation %v", op.operation) + } + if a.allocatedCIDRs != op.numOccupied { + t.Fatalf("Expected %d occupied CIDRS, got %d", op.numOccupied, a.allocatedCIDRs) + } + } + + // Make sure that we can allocate exactly `numAllocatable24s` elements. + for i := 0; i < numAllocatable24s; i++ { + _, err := a.AllocateNext() + if err != nil { + t.Fatalf("Expected to be able to allocate %d CIDRS, failed after %d", numAllocatable24s, i) + } + } + + _, err = a.AllocateNext() + if err == nil { + t.Fatalf("Expected to be able to allocate exactly %d CIDRS, got one more", numAllocatable24s) + } +} + +func TestGetBitforCIDR(t *testing.T) { + cases := []struct { + clusterCIDRStr string + subNetMaskSize int + subNetCIDRStr string + expectedBit int + expectErr bool + description string + }{ + { + clusterCIDRStr: "127.0.0.0/8", + subNetMaskSize: 16, + subNetCIDRStr: "127.0.0.0/16", + expectedBit: 0, + expectErr: false, + description: "Get 0 Bit with IPv4", + }, + { + clusterCIDRStr: "be00::/8", + subNetMaskSize: 16, + subNetCIDRStr: "be00::/16", + expectedBit: 0, + expectErr: false, + description: "Get 0 Bit with IPv6", + }, + { + clusterCIDRStr: "127.0.0.0/8", + subNetMaskSize: 16, + subNetCIDRStr: "127.123.0.0/16", + expectedBit: 123, + expectErr: false, + description: "Get 123rd Bit with IPv4", + }, + { + clusterCIDRStr: "be00::/8", + subNetMaskSize: 16, + subNetCIDRStr: "beef::/16", + expectedBit: 0xef, + expectErr: false, + description: "Get xef Bit with IPv6", + }, + { + clusterCIDRStr: "127.0.0.0/8", + subNetMaskSize: 16, + subNetCIDRStr: "127.168.0.0/16", + expectedBit: 168, + expectErr: false, + description: "Get 168th Bit with IPv4", + }, + { + clusterCIDRStr: "be00::/8", + subNetMaskSize: 16, + subNetCIDRStr: "be68::/16", + expectedBit: 0x68, + expectErr: false, + description: "Get x68th Bit with IPv6", + }, + { + clusterCIDRStr: "127.0.0.0/8", + subNetMaskSize: 16, + subNetCIDRStr: "127.224.0.0/16", + expectedBit: 224, + expectErr: false, + description: "Get 224th Bit with IPv4", + }, + { + clusterCIDRStr: "be00::/8", + subNetMaskSize: 16, + subNetCIDRStr: "be24::/16", + expectedBit: 0x24, + expectErr: false, + description: "Get x24th Bit with IPv6", + }, + { + clusterCIDRStr: "192.168.0.0/16", + subNetMaskSize: 24, + subNetCIDRStr: "192.168.12.0/24", + expectedBit: 12, + expectErr: false, + description: "Get 12th Bit with IPv4", + }, + { + clusterCIDRStr: "beef::/16", + subNetMaskSize: 24, + subNetCIDRStr: "beef:1200::/24", + expectedBit: 0x12, + expectErr: false, + description: "Get x12th Bit with IPv6", + }, + { + clusterCIDRStr: "192.168.0.0/16", + subNetMaskSize: 24, + subNetCIDRStr: "192.168.151.0/24", + expectedBit: 151, + expectErr: false, + description: "Get 151st Bit with IPv4", + }, + { + clusterCIDRStr: "beef::/16", + subNetMaskSize: 24, + subNetCIDRStr: "beef:9700::/24", + expectedBit: 0x97, + expectErr: false, + description: "Get x97st Bit with IPv6", + }, + { + clusterCIDRStr: "192.168.0.0/16", + subNetMaskSize: 24, + subNetCIDRStr: "127.168.224.0/24", + expectErr: true, + description: "Get error with IPv4", + }, + { + clusterCIDRStr: "beef::/16", + subNetMaskSize: 24, + subNetCIDRStr: "2001:db00::/24", + expectErr: true, + description: "Get error with IPv6", + }, + } + + for _, tc := range cases { + _, clusterCIDR, err := net.ParseCIDR(tc.clusterCIDRStr) + if err != nil { + t.Fatalf("unexpected error: %v for %v", err, tc.description) + } + + cs, err := NewCIDRSet(clusterCIDR, tc.subNetMaskSize) + if err != nil { + t.Fatalf("Error allocating CIDRSet for %v", tc.description) + } + _, subnetCIDR, err := net.ParseCIDR(tc.subNetCIDRStr) + if err != nil { + t.Fatalf("unexpected error: %v for %v", err, tc.description) + } + + got, err := cs.getIndexForCIDR(subnetCIDR) + if err == nil && tc.expectErr { + klog.Errorf("expected error but got null for %v", tc.description) + continue + } + + if err != nil && !tc.expectErr { + klog.Errorf("unexpected error: %v for %v", err, tc.description) + continue + } + + if got != tc.expectedBit { + klog.Errorf("expected %v, but got %v for %v", tc.expectedBit, got, tc.description) + } + } +} + +func TestOccupy(t *testing.T) { + cases := []struct { + clusterCIDRStr string + subNetMaskSize int + subNetCIDRStr string + expectedUsedBegin int + expectedUsedEnd int + expectErr bool + description string + }{ + { + clusterCIDRStr: "127.0.0.0/8", + subNetMaskSize: 16, + subNetCIDRStr: "127.0.0.0/8", + expectedUsedBegin: 0, + expectedUsedEnd: 255, + expectErr: false, + description: "Occupy all Bits with IPv4", + }, + { + clusterCIDRStr: "2001:beef:1200::/40", + subNetMaskSize: 48, + subNetCIDRStr: "2001:beef:1200::/40", + expectedUsedBegin: 0, + expectedUsedEnd: 255, + expectErr: false, + description: "Occupy all Bits with IPv6", + }, + { + clusterCIDRStr: "127.0.0.0/8", + subNetMaskSize: 16, + subNetCIDRStr: "127.0.0.0/2", + expectedUsedBegin: 0, + expectedUsedEnd: 255, + expectErr: false, + description: "Occupy every Bit with IPv4", + }, + { + clusterCIDRStr: "2001:beef:1200::/40", + subNetMaskSize: 48, + subNetCIDRStr: "2001:beef:1234::/34", + expectedUsedBegin: 0, + expectedUsedEnd: 255, + expectErr: false, + description: "Occupy every Bit with IPv6", + }, + { + clusterCIDRStr: "127.0.0.0/8", + subNetMaskSize: 16, + subNetCIDRStr: "127.0.0.0/16", + expectedUsedBegin: 0, + expectedUsedEnd: 0, + expectErr: false, + description: "Occupy 1st Bit with IPv4", + }, + { + clusterCIDRStr: "2001:beef:1200::/40", + subNetMaskSize: 48, + subNetCIDRStr: "2001:beef:1200::/48", + expectedUsedBegin: 0, + expectedUsedEnd: 0, + expectErr: false, + description: "Occupy 1st Bit with IPv6", + }, + { + clusterCIDRStr: "127.0.0.0/8", + subNetMaskSize: 32, + subNetCIDRStr: "127.0.0.0/16", + expectedUsedBegin: 0, + expectedUsedEnd: 65535, + expectErr: false, + description: "Occupy 65535 Bits with IPv4", + }, + { + clusterCIDRStr: "2001:beef:1200::/48", + subNetMaskSize: 64, + subNetCIDRStr: "2001:beef:1200::/48", + expectedUsedBegin: 0, + expectedUsedEnd: 65535, + expectErr: false, + description: "Occupy 65535 Bits with IPv6", + }, + { + clusterCIDRStr: "127.0.0.0/7", + subNetMaskSize: 16, + subNetCIDRStr: "127.0.0.0/15", + expectedUsedBegin: 256, + expectedUsedEnd: 257, + expectErr: false, + description: "Occupy 257th Bit with IPv4", + }, + { + clusterCIDRStr: "2001:beef:7f00::/39", + subNetMaskSize: 48, + subNetCIDRStr: "2001:beef:7f00::/47", + expectedUsedBegin: 256, + expectedUsedEnd: 257, + expectErr: false, + description: "Occupy 257th Bit with IPv6", + }, + { + clusterCIDRStr: "127.0.0.0/7", + subNetMaskSize: 15, + subNetCIDRStr: "127.0.0.0/15", + expectedUsedBegin: 128, + expectedUsedEnd: 128, + expectErr: false, + description: "Occupy 128th Bit with IPv4", + }, + { + clusterCIDRStr: "2001:beef:7f00::/39", + subNetMaskSize: 47, + subNetCIDRStr: "2001:beef:7f00::/47", + expectedUsedBegin: 128, + expectedUsedEnd: 128, + expectErr: false, + description: "Occupy 128th Bit with IPv6", + }, + { + clusterCIDRStr: "127.0.0.0/7", + subNetMaskSize: 18, + subNetCIDRStr: "127.0.0.0/15", + expectedUsedBegin: 1024, + expectedUsedEnd: 1031, + expectErr: false, + description: "Occupy 1031st Bit with IPv4", + }, + { + clusterCIDRStr: "2001:beef:7f00::/39", + subNetMaskSize: 50, + subNetCIDRStr: "2001:beef:7f00::/47", + expectedUsedBegin: 1024, + expectedUsedEnd: 1031, + expectErr: false, + description: "Occupy 1031st Bit with IPv6", + }, + } + + for _, tc := range cases { + _, clusterCIDR, err := net.ParseCIDR(tc.clusterCIDRStr) + if err != nil { + t.Fatalf("unexpected error: %v for %v", err, tc.description) + } + + cs, err := NewCIDRSet(clusterCIDR, tc.subNetMaskSize) + if err != nil { + t.Fatalf("Error allocating CIDRSet for %v", tc.description) + } + + _, subnetCIDR, err := net.ParseCIDR(tc.subNetCIDRStr) + if err != nil { + t.Fatalf("unexpected error: %v for %v", err, tc.description) + } + + err = cs.Occupy(subnetCIDR) + if err == nil && tc.expectErr { + t.Errorf("expected error but got none for %v", tc.description) + continue + } + if err != nil && !tc.expectErr { + t.Errorf("unexpected error: %v for %v", err, tc.description) + continue + } + + expectedUsed := big.Int{} + for i := tc.expectedUsedBegin; i <= tc.expectedUsedEnd; i++ { + expectedUsed.SetBit(&expectedUsed, i, 1) + } + if expectedUsed.Cmp(&cs.used) != 0 { + t.Errorf("error for %v", tc.description) + } + } +} + +func TestCIDRSetv6(t *testing.T) { + cases := []struct { + clusterCIDRStr string + subNetMaskSize int + expectedCIDR string + expectedCIDR2 string + expectErr bool + description string + }{ + { + clusterCIDRStr: "127.0.0.0/8", + subNetMaskSize: 32, + expectErr: false, + expectedCIDR: "127.0.0.0/32", + expectedCIDR2: "127.0.0.1/32", + description: "Max cluster subnet size with IPv4", + }, + { + clusterCIDRStr: "beef:1234::/32", + subNetMaskSize: 49, + expectErr: true, + description: "Max cluster subnet size with IPv6", + }, + { + clusterCIDRStr: "2001:beef:1234:369b::/60", + subNetMaskSize: 64, + expectedCIDR: "2001:beef:1234:3690::/64", + expectedCIDR2: "2001:beef:1234:3691::/64", + expectErr: false, + description: "Allocate a few IPv6", + }, + } + for _, tc := range cases { + t.Run(tc.description, func(t *testing.T) { + _, clusterCIDR, _ := net.ParseCIDR(tc.clusterCIDRStr) + a, err := NewCIDRSet(clusterCIDR, tc.subNetMaskSize) + if gotErr := err != nil; gotErr != tc.expectErr { + t.Fatalf("NewCIDRSet(%v, %v) = %v, %v; gotErr = %t, want %t", clusterCIDR, tc.subNetMaskSize, a, err, gotErr, tc.expectErr) + } + if a == nil { + return + } + p, err := a.AllocateNext() + if err == nil && tc.expectErr { + t.Errorf("a.AllocateNext() = nil, want error") + } + if err != nil && !tc.expectErr { + t.Errorf("a.AllocateNext() = %+v, want no error", err) + } + if !tc.expectErr { + if p != nil && p.String() != tc.expectedCIDR { + t.Fatalf("a.AllocateNext() got %+v, want %+v", p.String(), tc.expectedCIDR) + } + } + p2, err := a.AllocateNext() + if err == nil && tc.expectErr { + t.Errorf("a.AllocateNext() = nil, want error") + } + if err != nil && !tc.expectErr { + t.Errorf("a.AllocateNext() = %+v, want no error", err) + } + if !tc.expectErr { + if p2 != nil && p2.String() != tc.expectedCIDR2 { + t.Fatalf("a.AllocateNext() got %+v, want %+v", p2.String(), tc.expectedCIDR) + } + } + }) + } +} + +func TestCidrSetMetrics(t *testing.T) { + cidr := "10.0.0.0/16" + _, clusterCIDR, _ := net.ParseCIDR(cidr) + // We have 256 free cidrs + a, err := NewCIDRSet(clusterCIDR, 24) + if err != nil { + t.Fatalf("unexpected error creating CidrSet: %v", err) + } + clearMetrics(map[string]string{"clusterCIDR": cidr}) + + // Allocate next all + for i := 1; i <= 256; i++ { + _, err := a.AllocateNext() + if err != nil { + t.Fatalf("unexpected error allocating a new CIDR: %v", err) + } + em := testMetrics{ + usage: float64(i) / float64(256), + allocs: float64(i), + releases: 0, + allocTries: 0, + } + expectMetrics(t, cidr, em) + } + // Release all + a.Release(clusterCIDR) + em := testMetrics{ + usage: 0, + allocs: 256, + releases: 256, + allocTries: 0, + } + expectMetrics(t, cidr, em) + + // Allocate all + a.Occupy(clusterCIDR) + em = testMetrics{ + usage: 1, + allocs: 512, + releases: 256, + allocTries: 0, + } + expectMetrics(t, cidr, em) + +} + +func TestCidrSetMetricsHistogram(t *testing.T) { + cidr := "10.0.0.0/16" + _, clusterCIDR, _ := net.ParseCIDR(cidr) + // We have 256 free cidrs + a, err := NewCIDRSet(clusterCIDR, 24) + if err != nil { + t.Fatalf("unexpected error creating CidrSet: %v", err) + } + clearMetrics(map[string]string{"clusterCIDR": cidr}) + + // Allocate half of the range + // Occupy does not update the nextCandidate + _, halfClusterCIDR, _ := net.ParseCIDR("10.0.0.0/17") + a.Occupy(halfClusterCIDR) + em := testMetrics{ + usage: 0.5, + allocs: 128, + releases: 0, + allocTries: 0, + } + expectMetrics(t, cidr, em) + // Allocate next should iterate until the next free cidr + // that is exactly the same number we allocated previously + _, err = a.AllocateNext() + if err != nil { + t.Fatalf("unexpected error allocating a new CIDR: %v", err) + } + em = testMetrics{ + usage: float64(129) / float64(256), + allocs: 129, + releases: 0, + allocTries: 128, + } + expectMetrics(t, cidr, em) +} + +func TestCidrSetMetricsDual(t *testing.T) { + // create IPv4 cidrSet + cidrIPv4 := "10.0.0.0/16" + _, clusterCIDRv4, _ := net.ParseCIDR(cidrIPv4) + a, err := NewCIDRSet(clusterCIDRv4, 24) + if err != nil { + t.Fatalf("unexpected error creating CidrSet: %v", err) + } + clearMetrics(map[string]string{"clusterCIDR": cidrIPv4}) + // create IPv6 cidrSet + cidrIPv6 := "2001:db8::/48" + _, clusterCIDRv6, _ := net.ParseCIDR(cidrIPv6) + b, err := NewCIDRSet(clusterCIDRv6, 64) + if err != nil { + t.Fatalf("unexpected error creating CidrSet: %v", err) + } + clearMetrics(map[string]string{"clusterCIDR": cidrIPv6}) + // Allocate all + a.Occupy(clusterCIDRv4) + em := testMetrics{ + usage: 1, + allocs: 256, + releases: 0, + allocTries: 0, + } + expectMetrics(t, cidrIPv4, em) + + b.Occupy(clusterCIDRv6) + em = testMetrics{ + usage: 1, + allocs: 65536, + releases: 0, + allocTries: 0, + } + expectMetrics(t, cidrIPv6, em) + + // Release all + a.Release(clusterCIDRv4) + em = testMetrics{ + usage: 0, + allocs: 256, + releases: 256, + allocTries: 0, + } + expectMetrics(t, cidrIPv4, em) + b.Release(clusterCIDRv6) + em = testMetrics{ + usage: 0, + allocs: 65536, + releases: 65536, + allocTries: 0, + } + expectMetrics(t, cidrIPv6, em) + +} + +// Metrics helpers +func clearMetrics(labels map[string]string) { + cidrSetAllocations.Delete(labels) + cidrSetReleases.Delete(labels) + cidrSetUsage.Delete(labels) + cidrSetAllocationTriesPerRequest.Delete(labels) +} + +type testMetrics struct { + usage float64 + allocs float64 + releases float64 + allocTries float64 +} + +func expectMetrics(t *testing.T, label string, em testMetrics) { + var m testMetrics + var err error + m.usage, err = testutil.GetGaugeMetricValue(cidrSetUsage.WithLabelValues(label)) + if err != nil { + t.Errorf("failed to get %s value, err: %v", cidrSetUsage.Name, err) + } + m.allocs, err = testutil.GetCounterMetricValue(cidrSetAllocations.WithLabelValues(label)) + if err != nil { + t.Errorf("failed to get %s value, err: %v", cidrSetAllocations.Name, err) + } + m.releases, err = testutil.GetCounterMetricValue(cidrSetReleases.WithLabelValues(label)) + if err != nil { + t.Errorf("failed to get %s value, err: %v", cidrSetReleases.Name, err) + } + m.allocTries, err = testutil.GetHistogramMetricValue(cidrSetAllocationTriesPerRequest.WithLabelValues(label)) + if err != nil { + t.Errorf("failed to get %s value, err: %v", cidrSetAllocationTriesPerRequest.Name, err) + } + + if m != em { + t.Fatalf("metrics error: expected %v, received %v", em, m) + } +} + +// Benchmarks +func benchmarkAllocateAllIPv6(cidr string, subnetMaskSize int, b *testing.B) { + _, clusterCIDR, _ := net.ParseCIDR(cidr) + a, _ := NewCIDRSet(clusterCIDR, subnetMaskSize) + for n := 0; n < b.N; n++ { + // Allocate the whole range + 1 + for i := 0; i <= a.maxCIDRs; i++ { + a.AllocateNext() + } + // Release all + a.Release(clusterCIDR) + } +} + +func BenchmarkAllocateAll_48_52(b *testing.B) { benchmarkAllocateAllIPv6("2001:db8::/48", 52, b) } +func BenchmarkAllocateAll_48_56(b *testing.B) { benchmarkAllocateAllIPv6("2001:db8::/48", 56, b) } + +func BenchmarkAllocateAll_48_60(b *testing.B) { benchmarkAllocateAllIPv6("2001:db8::/48", 60, b) } +func BenchmarkAllocateAll_48_64(b *testing.B) { benchmarkAllocateAllIPv6("2001:db8::/48", 64, b) } + +func BenchmarkAllocateAll_64_68(b *testing.B) { benchmarkAllocateAllIPv6("2001:db8::/64", 68, b) } + +func BenchmarkAllocateAll_64_72(b *testing.B) { benchmarkAllocateAllIPv6("2001:db8::/64", 72, b) } +func BenchmarkAllocateAll_64_76(b *testing.B) { benchmarkAllocateAllIPv6("2001:db8::/64", 76, b) } + +func BenchmarkAllocateAll_64_80(b *testing.B) { benchmarkAllocateAllIPv6("2001:db8::/64", 80, b) } diff --git a/pkg/controllers/nodeipam/ipam/cidrset/metrics.go b/pkg/controllers/nodeipam/ipam/cidrset/metrics.go new file mode 100644 index 0000000000..9bc2b2c17b --- /dev/null +++ b/pkg/controllers/nodeipam/ipam/cidrset/metrics.go @@ -0,0 +1,78 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cidrset + +import ( + "sync" + + "k8s.io/component-base/metrics" + "k8s.io/component-base/metrics/legacyregistry" +) + +const nodeIpamSubsystem = "node_ipam_controller" + +var ( + cidrSetAllocations = metrics.NewCounterVec( + &metrics.CounterOpts{ + Subsystem: nodeIpamSubsystem, + Name: "cidrset_cidrs_allocations_total", + Help: "Counter measuring total number of CIDR allocations.", + StabilityLevel: metrics.ALPHA, + }, + []string{"clusterCIDR"}, + ) + cidrSetReleases = metrics.NewCounterVec( + &metrics.CounterOpts{ + Subsystem: nodeIpamSubsystem, + Name: "cidrset_cidrs_releases_total", + Help: "Counter measuring total number of CIDR releases.", + StabilityLevel: metrics.ALPHA, + }, + []string{"clusterCIDR"}, + ) + cidrSetUsage = metrics.NewGaugeVec( + &metrics.GaugeOpts{ + Subsystem: nodeIpamSubsystem, + Name: "cidrset_usage_cidrs", + Help: "Gauge measuring percentage of allocated CIDRs.", + StabilityLevel: metrics.ALPHA, + }, + []string{"clusterCIDR"}, + ) + cidrSetAllocationTriesPerRequest = metrics.NewHistogramVec( + &metrics.HistogramOpts{ + Subsystem: nodeIpamSubsystem, + Name: "cidrset_allocation_tries_per_request", + Help: "Number of endpoints added on each Service sync", + StabilityLevel: metrics.ALPHA, + Buckets: metrics.ExponentialBuckets(1, 5, 5), + }, + []string{"clusterCIDR"}, + ) +) + +var registerMetrics sync.Once + +// registerCidrsetMetrics the metrics that are to be monitored. +func registerCidrsetMetrics() { + registerMetrics.Do(func() { + legacyregistry.MustRegister(cidrSetAllocations) + legacyregistry.MustRegister(cidrSetReleases) + legacyregistry.MustRegister(cidrSetUsage) + legacyregistry.MustRegister(cidrSetAllocationTriesPerRequest) + }) +} diff --git a/pkg/controllers/nodeipam/ipam/ipv6_allocator.go b/pkg/controllers/nodeipam/ipam/ipv6_allocator.go new file mode 100644 index 0000000000..24fefc07e7 --- /dev/null +++ b/pkg/controllers/nodeipam/ipam/ipv6_allocator.go @@ -0,0 +1,284 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package ipam + +import ( + "fmt" + "time" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/ec2" + v1 "k8s.io/api/core/v1" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/apimachinery/pkg/util/wait" + coreinformers "k8s.io/client-go/informers/core/v1" + informers "k8s.io/client-go/informers/core/v1" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/cache" + "k8s.io/client-go/util/workqueue" + awsv1 "k8s.io/cloud-provider-aws/pkg/providers/v1" + "k8s.io/klog/v2" +) + +const ( + maxRequeuingCount = 9 + + // The label for depicting total number of errors a work item encounter and succeed + totalErrorsWorkItemErrorMetric = "total_errors" + + // The label for depicting total time when work item gets queued to processed + workItemProcessingTimeWorkItemMetric = "work_item_processing_time" + + // The label for depicting total time when work item gets queued to dequeued + workItemDequeuingTimeWorkItemMetric = "work_item_dequeuing_time" + + // The label for depicting total number of errors a work item encounter and fail + errorsAfterRetriesExhaustedWorkItemErrorMetric = "errors_after_retries_exhausted" +) + +// IPv6CIDRAllocator is an interface implemented by things that know how +// to allocate IPv6 CIDRs. +type IPv6CIDRAllocator interface { + Run(stopCh <-chan struct{}) +} + +// IPv6RangeAllocator allocates IPv6 CIDRs +type IPv6RangeAllocator struct { + nodeInformer coreinformers.NodeInformer + kubeClient clientset.Interface + cloud *awsv1.Cloud + workqueue workqueue.RateLimitingInterface + nodesSynced cache.InformerSynced + + // Value controlling Controller monitoring period, i.e. how often does Controller + // check node list. This value should be lower than nodeMonitorGracePeriod + // set in controller-manager + nodeMonitorPeriod time.Duration + + rateLimitEnabled bool +} + +// workItem contains the node and an action for that node +type workItem struct { + node *v1.Node + action func(node *v1.Node) error + requeuingCount int + enqueueTime time.Time +} + +func (w workItem) String() string { + return fmt.Sprintf("[Node: %s, RequeuingCount: %d, EnqueueTime: %s]", w.node.GetName(), w.requeuingCount, w.enqueueTime) +} + +// NewIPv6RangeAllocator returns an IPv6CIDRAllocator +func NewIPv6RangeAllocator(kubeClient clientset.Interface, nodeInformer informers.NodeInformer, awsCloud *awsv1.Cloud, rateLimiter workqueue.RateLimiter, rateLimitEnabled bool, nodeMonitorPeriod time.Duration) (IPv6CIDRAllocator, error) { + ra6 := &IPv6RangeAllocator{ + nodeInformer: nodeInformer, + kubeClient: kubeClient, + cloud: awsCloud, + workqueue: workqueue.NewNamedRateLimitingQueue(rateLimiter, "NodeIpam"), + nodesSynced: nodeInformer.Informer().HasSynced, + nodeMonitorPeriod: nodeMonitorPeriod, + rateLimitEnabled: rateLimitEnabled, + } + // Use shared informer to listen to add/update/delete of nodes. Note that any nodes + // that exist before nodeipam controller starts will show up in the update method + ra6.nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: func(obj interface{}) { + node := obj.(*v1.Node) + ra6.enqueueNode(node, ra6.prefixNodeResource) + }, + UpdateFunc: func(oldObj, newObj interface{}) { + node := newObj.(*v1.Node) + // Check if nodeipam is required by inspecting the labels. This check here prevents us from putting a tagged node into the + // work queue. We check this again before nodeipam the node to make sure that between when a node was put in the work queue + // and when it gets prefixed, there might be another event which put the same item in the work queue + // (since the node won't have the labels yet) and hence prevents us from making an unnecessary EC2 call. + if !ra6.isPrefixNodeRequired(node) { + klog.Infof("Skip putting node %s in work queue since it was already prefixed earlier.", node.GetName()) + return + } + + ra6.enqueueNode(node, ra6.prefixNodeResource) + }, + }) + + return ra6, nil +} + +// Run will start the controller and write the prefix CIDR from the network interface to the node +func (ra6 *IPv6RangeAllocator) Run(stopCh <-chan struct{}) { + defer utilruntime.HandleCrash() + defer ra6.workqueue.ShutDown() + + // Wait for the caches to be synced before starting workers + klog.Info("Waiting for informer caches to sync") + if ok := cache.WaitForCacheSync(stopCh, ra6.nodesSynced); !ok { + klog.Errorf("failed to wait for caches to sync") + return + } + + klog.Infof("Starting the nodeipam controller") + go wait.Until(ra6.work, ra6.nodeMonitorPeriod, stopCh) + + <-stopCh +} + +// work is a long-running function that continuously +// call process() for each message on the workqueue +func (ra6 *IPv6RangeAllocator) work() { + for ra6.process() { + } +} + +// process reads each message in the queue and performs either +// add prefix to kubernetes node object +func (ra6 *IPv6RangeAllocator) process() bool { + obj, shutdown := ra6.workqueue.Get() + if shutdown { + return false + } + + klog.Infof("Starting to process %s", obj) + + err := func(obj interface{}) error { + defer ra6.workqueue.Done(obj) + + workItem, ok := obj.(*workItem) + if !ok { + ra6.workqueue.Forget(obj) + err := fmt.Errorf("expected workItem in workqueue but got %s", obj) + utilruntime.HandleError(err) + return nil + } + + timeTaken := time.Since(workItem.enqueueTime).Seconds() + recordWorkItemLatencyMetrics(workItemDequeuingTimeWorkItemMetric, timeTaken) + klog.Infof("Dequeuing latency %f", timeTaken) + + instanceID, err := awsv1.KubernetesInstanceID(workItem.node.Spec.ProviderID).MapToAWSInstanceID() + if err != nil { + err = fmt.Errorf("Error in getting instanceID for node %s, error: %v", workItem.node.GetName(), err) + utilruntime.HandleError(err) + return nil + } + klog.Infof("Instance ID of work item %s is %s", workItem, instanceID) + + if awsv1.IsFargateNode(string(instanceID)) { + klog.Infof("Skip processing the node %s since it is a Fargate node", instanceID) + ra6.workqueue.Forget(obj) + return nil + } + + err = workItem.action(workItem.node) + + if err != nil { + if workItem.requeuingCount < maxRequeuingCount { + // Put the item back on the workqueue to handle any transient errors. + workItem.requeuingCount++ + ra6.workqueue.AddRateLimited(workItem) + + recordWorkItemErrorMetrics(totalErrorsWorkItemErrorMetric, string(instanceID)) + return fmt.Errorf("error processing work item '%v': %s, requeuing count %d", workItem, err.Error(), workItem.requeuingCount) + } + + klog.Errorf("error processing work item %s: %s, requeuing count exceeded", workItem, err.Error()) + recordWorkItemErrorMetrics(errorsAfterRetriesExhaustedWorkItemErrorMetric, string(instanceID)) + } else { + klog.Infof("Finished processing %s", workItem) + timeTaken = time.Since(workItem.enqueueTime).Seconds() + recordWorkItemLatencyMetrics(workItemProcessingTimeWorkItemMetric, timeTaken) + klog.Infof("Processing latency %f", timeTaken) + } + + ra6.workqueue.Forget(obj) + return nil + }(obj) + + if err != nil { + klog.Errorf("Error occurred while processing %s", obj) + utilruntime.HandleError(err) + } + + return true +} + +func (ra6 *IPv6RangeAllocator) prefixNodeResource(node *v1.Node) error { + if node.Spec.ProviderID == "" { + klog.Infof("Node %q has empty provider ID", node.Name) + return nil + } + + // aws:///eu-central-1a/i-07577a7bcf3e576f2 + instanceID, _ := awsv1.KubernetesInstanceID(node.Spec.ProviderID).MapToAWSInstanceID() + eni, err := ra6.cloud.DescribeNetworkInterfaces( + &ec2.DescribeNetworkInterfacesInput{ + Filters: []*ec2.Filter{ + { + Name: ptrTo("attachment.instance-id"), + Values: []*string{ + ptrTo(string(instanceID)), + }, + }, + }, + }) + if err != nil { + return err + } + + if len(eni.Ipv6Prefixes) != 1 { + return fmt.Errorf("unexpected amount of ipv6 prefixes on interface %q: %v", *eni.NetworkInterfaceId, len(eni.Ipv6Prefixes)) + } + + ipv6Address := aws.StringValue(eni.Ipv6Prefixes[0].Ipv6Prefix) + if err := PatchNodePodCIDRs(ra6.kubeClient, node, []string{ipv6Address}); err != nil { + return err + } + klog.Infof("Successfully prefixed node %s with %v.", node.GetName(), ipv6Address) + return nil +} + +// enqueueNode takes in the object and an +// action for the object for a workitem and enqueue to the workqueue +func (ra6 *IPv6RangeAllocator) enqueueNode(node *v1.Node, action func(node *v1.Node) error) { + item := &workItem{ + node: node, + action: action, + requeuingCount: 0, + enqueueTime: time.Now(), + } + + if ra6.rateLimitEnabled { + ra6.workqueue.AddRateLimited(item) + klog.Infof("Added %s to the workqueue (rate-limited)", item) + } else { + ra6.workqueue.Add(item) + klog.Infof("Added %s to the workqueue (without any rate-limit)", item) + } +} + +func (ra6 *IPv6RangeAllocator) isPrefixNodeRequired(node *v1.Node) bool { + if node.Spec.PodCIDR == "" && node.Spec.PodCIDRs == nil { + return true + } + return false +} + +// ptrTo returns a pointer to a copy of any value. +func ptrTo[T any](v T) *T { + return &v +} diff --git a/pkg/controllers/nodeipam/ipam/ipv6_allocator_test.go b/pkg/controllers/nodeipam/ipam/ipv6_allocator_test.go new file mode 100644 index 0000000000..6584022b86 --- /dev/null +++ b/pkg/controllers/nodeipam/ipam/ipv6_allocator_test.go @@ -0,0 +1,119 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package ipam + +import ( + "bytes" + "flag" + "os" + "strings" + "testing" + "time" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/kubernetes/fake" + "k8s.io/client-go/util/workqueue" + "k8s.io/cloud-provider-aws/pkg/controllers/nodeipam/testutil" + awsv1 "k8s.io/cloud-provider-aws/pkg/providers/v1" + "k8s.io/klog/v2" +) + +func TestIPv6CIDRAllocator(t *testing.T) { + klog.InitFlags(nil) + flag.CommandLine.Parse([]string{"--logtostderr=false"}) + // all tests operate on a single node + testCases := []testCase{ + { + rateLimitEnabled: true, + description: "success, correct node allocation", + fakeNodeHandler: &testutil.FakeNodeHandler{ + Existing: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node0", + }, + Spec: v1.NodeSpec{ + ProviderID: "aws:///eu-west-1a/i-123456789", + }, + }, + }, + Clientset: fake.NewSimpleClientset(), + }, + allocatedCIDRs: nil, + expectedAllocatedCIDR: map[int]string{ + 0: "2001:0db8:85a3:0000:0000:8a2e:0000:0000/80", + }, + ctrlCreateFail: false, + }, + } + // test function + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + + var logBuf bytes.Buffer + klog.SetOutput(&logBuf) + defer func() { + klog.SetOutput(os.Stderr) + }() + + fakeNodeInformer := getFakeNodeInformer(tc.fakeNodeHandler) + rateLimiter := workqueue.DefaultControllerRateLimiter() + rateLimitEnabled := false + nodeMonitorPeriod := 1 * time.Second + + // Initialize the IPv6 range allocator. + ra6, err := NewIPv6RangeAllocator(tc.fakeNodeHandler, fakeNodeInformer, nil, rateLimiter, rateLimitEnabled, nodeMonitorPeriod) + if err == nil && tc.ctrlCreateFail { + t.Fatalf("failed to create IPv6 range allocator") + } + rangeAllocatorIPv6, ok := ra6.(*IPv6RangeAllocator) + if !ok { + t.Logf("%v: found non-default implementation of IPv6RangeAllocator, skipping white-box test...", tc.description) + return + } + + rangeAllocatorIPv6.nodesSynced = alwaysReady + awsServices := awsv1.NewFakeAWSServices("clusterid.test") + rangeAllocatorIPv6.cloud, _ = awsv1.NewAWSCloud(awsv1.CloudConfig{}, awsServices) + go rangeAllocatorIPv6.Run(wait.NeverStop) + + rangeAllocatorIPv6.enqueueNode(tc.fakeNodeHandler.Existing[0], rangeAllocatorIPv6.prefixNodeResource) + + if tc.rateLimitEnabled { + // If rate limit is enabled, sleep for 10 ms to wait for the item to be added to the queue since the base delay is 5 ms. + time.Sleep(10 * time.Millisecond) + } + + for rangeAllocatorIPv6.workqueue.Len() > 0 { + rangeAllocatorIPv6.process() + + // sleep briefly because of exponential backoff when requeueing failed workitem + // resulting in workqueue to be empty if checked immediately + time.Sleep(1500 * time.Millisecond) + } + + for _, node := range tc.fakeNodeHandler.Existing { + if !strings.Contains(logBuf.String(), tc.expectedAllocatedCIDR[0]) { + t.Errorf("\nDid not successfully prefix node %s.\n%v\n", node.Name, logBuf.String()) + } + } + + }) + } +} diff --git a/pkg/controllers/nodeipam/ipam/metrics.go b/pkg/controllers/nodeipam/ipam/metrics.go new file mode 100644 index 0000000000..9684283762 --- /dev/null +++ b/pkg/controllers/nodeipam/ipam/metrics.go @@ -0,0 +1,61 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package ipam + +import ( + "sync" + + "k8s.io/component-base/metrics" + "k8s.io/component-base/metrics/legacyregistry" +) + +var register sync.Once + +var ( + workItemDuration = metrics.NewHistogramVec( + &metrics.HistogramOpts{ + Name: "cloudprovider_aws_nodeipam_controller_work_item_duration_seconds", + Help: "workitem latency of workitem being in the queue and time it takes to process", + StabilityLevel: metrics.ALPHA, + Buckets: metrics.ExponentialBuckets(0.5, 1.5, 20), + }, + []string{"latency_type"}) + + workItemError = metrics.NewCounterVec( + &metrics.CounterOpts{ + Name: "cloudprovider_aws_nodeipam_controller_work_item_errors_total", + Help: "any error in dequeueing the work queue and processing workItem", + StabilityLevel: metrics.ALPHA, + }, + []string{"error_type", "instance_id"}) +) + +// RegisterMetrics registers nodeipam-controller metrics. +func RegisterMetrics() { + register.Do(func() { + legacyregistry.MustRegister(workItemDuration) + legacyregistry.MustRegister(workItemError) + }) +} + +func recordWorkItemLatencyMetrics(latencyType string, timeTaken float64) { + workItemDuration.With(metrics.Labels{"latency_type": latencyType}).Observe(timeTaken) +} + +func recordWorkItemErrorMetrics(errorType string, instanceID string) { + workItemError.With(metrics.Labels{"error_type": errorType, "instance_id": instanceID}).Inc() +} diff --git a/pkg/controllers/nodeipam/ipam/util.go b/pkg/controllers/nodeipam/ipam/util.go new file mode 100644 index 0000000000..3fc7f90e6f --- /dev/null +++ b/pkg/controllers/nodeipam/ipam/util.go @@ -0,0 +1,71 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package ipam + +import ( + "context" + "encoding/json" + "fmt" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/klog/v2" +) + +// NodePatch holds the fields to patch +type NodePatch struct { + Spec *NodePatchSpec `json:"spec,omitempty"` + Metadata *NodePatchMetadata `json:"metadata,omitempty"` +} + +// NodePatchSpec holds the spec for the node patch operation +type NodePatchSpec struct { + PodCIDR string `json:"podCIDR,omitempty"` + PodCIDRs []string `json:"podCIDRs,omitempty"` +} + +// NodePatchMetadata holds the metadata for the node patch operation +type NodePatchMetadata struct { + Labels map[string]*string `json:"labels,omitempty"` +} + +// PatchNodePodCIDRs patches the node podCIDR to the specified value. +func PatchNodePodCIDRs(kubeClient clientset.Interface, node *v1.Node, cidr []string) error { + klog.Infof("assigning cidr %q to node %q", cidr, node.ObjectMeta.Name) + nodePatchSpec := &NodePatchSpec{ + PodCIDR: cidr[0], + PodCIDRs: cidr, + } + nodePatch := &NodePatch{ + Spec: nodePatchSpec, + } + nodePatchJSON, err := json.Marshal(nodePatch) + if err != nil { + return fmt.Errorf("error building node patch: %v", err) + } + + klog.V(2).Infof("sending patch for node %q: %q", node.Name, string(nodePatchJSON)) + + _, err = kubeClient.CoreV1().Nodes().Patch(context.TODO(), node.Name, types.StrategicMergePatchType, nodePatchJSON, metav1.PatchOptions{}) + if err != nil { + return fmt.Errorf("error applying patch to node: %v", err) + } + + return nil +} diff --git a/pkg/controllers/nodeipam/nodeipam_controller.go b/pkg/controllers/nodeipam/nodeipam_controller.go new file mode 100644 index 0000000000..67c0a22d27 --- /dev/null +++ b/pkg/controllers/nodeipam/nodeipam_controller.go @@ -0,0 +1,218 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package nodeipam + +import ( + "context" + "fmt" + "net" + "sync" + "time" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/fields" + "k8s.io/apimachinery/pkg/labels" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/apimachinery/pkg/util/wait" + + "golang.org/x/time/rate" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/sets" + coreinformers "k8s.io/client-go/informers/core/v1" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/client-go/kubernetes/scheme" + v1core "k8s.io/client-go/kubernetes/typed/core/v1" + corelisters "k8s.io/client-go/listers/core/v1" + "k8s.io/client-go/tools/cache" + "k8s.io/client-go/tools/record" + "k8s.io/client-go/util/workqueue" + cloudprovider "k8s.io/cloud-provider" + "k8s.io/cloud-provider-aws/pkg/controllers/nodeipam/config" + "k8s.io/cloud-provider-aws/pkg/controllers/nodeipam/ipam" + cidrset "k8s.io/cloud-provider-aws/pkg/controllers/nodeipam/ipam/cidrset" + awsv1 "k8s.io/cloud-provider-aws/pkg/providers/v1" + controllersmetrics "k8s.io/component-base/metrics/prometheus/controllers" + "k8s.io/klog/v2" +) + +const ( + // The amount of time the nodecontroller polls on the list nodes endpoint. + apiserverStartupGracePeriod = 10 * time.Minute +) + +// nodePollInterval is used in listing node +// This is a variable instead of a const to enable testing. +var nodePollInterval = 10 * time.Second + +// Controller is the controller that manages node ipam state. +type Controller struct { + nodeInformer coreinformers.NodeInformer + kubeClient clientset.Interface + cloud *awsv1.Cloud + workqueue workqueue.RateLimitingInterface + nodesSynced cache.InformerSynced + + // Value controlling Controller monitoring period, i.e. how often does Controller + // check node list. This value should be lower than nodeMonitorGracePeriod + // set in controller-manager + nodeMonitorPeriod time.Duration + + rateLimitEnabled bool + + // nodeLister is able to list/get nodes and is populated by the shared informer passed to controller + nodeLister corelisters.NodeLister + // cluster cidrs as passed in during controller creation + clusterCIDRs []*net.IPNet + // for each entry in clusterCIDRs we maintain a list of what is used and what is not + cidrSets []*cidrset.CidrSet + // Channel that is used to pass updating Nodes and their reserved CIDRs to the background + // This increases a throughput of CIDR assignment by not blocking on long operations. + nodeCIDRUpdateChannel chan ipam.NodeReservedCIDRs + recorder record.EventRecorder + // Keep a set of nodes that are currently being processed to avoid races in CIDR allocation + lock sync.Mutex + nodesInProcessing sets.String + cidrAllocator ipam.CIDRAllocator + ipv6CIDRAllocator ipam.IPv6CIDRAllocator +} + +// NewNodeIpamController creates a NewNodeIpamController object +func NewNodeIpamController( + nodeInformer coreinformers.NodeInformer, + kubeClient clientset.Interface, + cloud cloudprovider.Interface, + nodeMonitorPeriod time.Duration, + nodeIpamConfig config.NodeIPAMControllerConfiguration, +) (*Controller, error) { + var err error + awsCloud, ok := cloud.(*awsv1.Cloud) + if !ok { + err = fmt.Errorf("nodeipam controller does not support %v provider", cloud.ProviderName()) + return nil, err + } + + var rateLimiter workqueue.RateLimiter + var rateLimitEnabled bool + if nodeIpamConfig.RateLimit > 0.0 && nodeIpamConfig.BurstLimit > 0 { + klog.Infof("Rate limit enabled on controller with rate %f and burst %d.", nodeIpamConfig.RateLimit, nodeIpamConfig.BurstLimit) + // This is the workqueue.DefaultControllerRateLimiter() but in case where throttling is enabled on the controller, + // the rate and burst values are set to the provided values. + rateLimiter = workqueue.NewMaxOfRateLimiter( + workqueue.NewItemExponentialFailureRateLimiter(5*time.Millisecond, 1000*time.Second), + &workqueue.BucketRateLimiter{Limiter: rate.NewLimiter(rate.Limit(nodeIpamConfig.RateLimit), nodeIpamConfig.BurstLimit)}, + ) + rateLimitEnabled = true + } else { + klog.Infof("Rate limit disabled on controller.") + rateLimiter = workqueue.DefaultControllerRateLimiter() + rateLimitEnabled = false + } + + nc := &Controller{ + nodeInformer: nodeInformer, + kubeClient: kubeClient, + cloud: awsCloud, + workqueue: workqueue.NewNamedRateLimitingQueue(rateLimiter, "NodeIpam"), + nodesSynced: nodeInformer.Informer().HasSynced, + nodeMonitorPeriod: nodeMonitorPeriod, + rateLimitEnabled: rateLimitEnabled, + } + + // for IPv6 only + if !nodeIpamConfig.DualStack { + ipam.RegisterMetrics() + nc.ipv6CIDRAllocator, err = ipam.NewIPv6RangeAllocator(kubeClient, nodeInformer, awsCloud, rateLimiter, rateLimitEnabled, nodeMonitorPeriod) + if err != nil { + return nil, err + } + } else { + eventBroadcaster := record.NewBroadcaster() + recorder := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "cidrAllocator"}) + eventBroadcaster.StartStructuredLogging(0) + klog.V(0).Infof("Sending events to api server.") + eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: kubeClient.CoreV1().Events("")}) + allocatorParams := ipam.CIDRAllocatorParams{ + ClusterCIDRs: nodeIpamConfig.ClusterCIDRs, + NodeCIDRMaskSizes: []int{int(nodeIpamConfig.NodeCIDRMaskSize)}, + } + nc = &Controller{ + kubeClient: kubeClient, + clusterCIDRs: allocatorParams.ClusterCIDRs, + cloud: awsCloud, + cidrSets: []*cidrset.CidrSet{}, + nodeLister: nodeInformer.Lister(), + nodesSynced: nodeInformer.Informer().HasSynced, + nodeCIDRUpdateChannel: make(chan ipam.NodeReservedCIDRs, ipam.CidrUpdateQueueSize), + recorder: recorder, + nodesInProcessing: sets.NewString(), + } + nodeList, err := listNodes(kubeClient) + if err != nil { + return nil, err + } + nc.cidrAllocator, err = ipam.NewCIDRRangeAllocator(kubeClient, nodeInformer, awsCloud, allocatorParams, nodeList) + if err != nil { + return nil, err + } + + } + return nc, nil +} + +// Run starts an asynchronous loop that monitors the status of cluster nodes. +func (nc *Controller) Run(stopCh <-chan struct{}, controllerManagerMetrics *controllersmetrics.ControllerManagerMetrics, dualStack bool) { + defer utilruntime.HandleCrash() + + klog.Infof("Starting ipam controller") + defer klog.Infof("Shutting down ipam controller") + controllerManagerMetrics.ControllerStarted("nodeipam") + defer controllerManagerMetrics.ControllerStopped("nodeipam") + + if !cache.WaitForNamedCacheSync("node", stopCh, nc.nodesSynced) { + return + } + + if !dualStack { + go nc.ipv6CIDRAllocator.Run(stopCh) + } else { + go nc.cidrAllocator.Run(stopCh) + } + + <-stopCh +} + +func listNodes(kubeClient clientset.Interface) (*v1.NodeList, error) { + var nodeList *v1.NodeList + // We must poll because apiserver might not be up. This error causes + // controller manager to restart. + if pollErr := wait.Poll(nodePollInterval, apiserverStartupGracePeriod, func() (bool, error) { + var err error + nodeList, err = kubeClient.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{ + FieldSelector: fields.Everything().String(), + LabelSelector: labels.Everything().String(), + }) + if err != nil { + klog.Errorf("Failed to list all nodes: %v", err) + return false, nil + } + return true, nil + }); pollErr != nil { + return nil, fmt.Errorf("failed to list all nodes in %v, cannot proceed without updating CIDR map", + apiserverStartupGracePeriod) + } + return nodeList, nil +} diff --git a/pkg/controllers/nodeipam/nodeipam_controller_wrapper.go b/pkg/controllers/nodeipam/nodeipam_controller_wrapper.go new file mode 100644 index 0000000000..91d3347306 --- /dev/null +++ b/pkg/controllers/nodeipam/nodeipam_controller_wrapper.go @@ -0,0 +1,114 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package nodeipam + +import ( + "context" + "fmt" + "net" + "strings" + + cloudprovider "k8s.io/cloud-provider" + "k8s.io/cloud-provider/app" + cloudcontrollerconfig "k8s.io/cloud-provider/app/config" + genericcontrollermanager "k8s.io/controller-manager/app" + "k8s.io/controller-manager/controller" + "k8s.io/klog/v2" + netutils "k8s.io/utils/net" + + "k8s.io/cloud-provider-aws/pkg/controllers/nodeipam/config" + "k8s.io/cloud-provider-aws/pkg/controllers/options" +) + +const ( + // NodeIpamControllerClientName is the name of the nodeipam controller + NodeIpamControllerClientName = "nodeipam-controller" + + // NodeIpamControllerKey is the key used to register this controller + NodeIpamControllerKey = "nodeipam" +) + +// ControllerWrapper is the wrapper for the nodeipam controller +type ControllerWrapper struct { + Options options.NodeIpamControllerOptions + Config config.NodeIPAMControllerConfiguration +} + +// StartNodeIpamControllerWrapper is used to take cloud config as input and start the nodeipam controller +func (nc *ControllerWrapper) StartNodeIpamControllerWrapper(initContext app.ControllerInitContext, completedConfig *cloudcontrollerconfig.CompletedConfig, cloud cloudprovider.Interface) app.InitFunc { + return func(ctx context.Context, controllerContext genericcontrollermanager.ControllerContext) (controller.Interface, bool, error) { + return nc.startNodeIpamController(ctx, initContext, completedConfig, controllerContext, cloud) + } +} + +func (nc *ControllerWrapper) startNodeIpamController(ctx context.Context, initContext app.ControllerInitContext, completedConfig *cloudcontrollerconfig.CompletedConfig, controllerContext genericcontrollermanager.ControllerContext, cloud cloudprovider.Interface) (controller.Interface, bool, error) { + err := nc.Options.Validate() + if err != nil { + klog.Fatalf("NodeIpam controller inputs are not properly set: %v", err) + } + + nc.Config.ClusterCIDRs, _, err = processCIDRs(completedConfig.ComponentConfig.KubeCloudShared.ClusterCIDR) + if err != nil { + return nil, false, err + } + nc.Options.ApplyTo(&nc.Config) + + klog.Infof("Cluster CIDR: %s", nc.Config.ClusterCIDRs[0].String()) + klog.Infof("Running in dualstack mode: %t", nc.Config.DualStack) + klog.Infof("Node CIDR mask size: %v", nc.Config.NodeCIDRMaskSize) + + // failure: more than cidrs is not allowed even with dual stack + if len(nc.Config.ClusterCIDRs) > 1 { + return nil, false, fmt.Errorf("len of clusters is:%v > more than 1 is not allowed for the nodeipam controller", len(nc.Config.ClusterCIDRs)) + } + + // Start the Controller + nodeIpamController, err := NewNodeIpamController( + completedConfig.SharedInformers.Core().V1().Nodes(), + completedConfig.ClientBuilder.ClientOrDie(initContext.ClientName), + cloud, + completedConfig.ComponentConfig.KubeCloudShared.NodeMonitorPeriod.Duration, + nc.Config) + + if err != nil { + klog.Warningf("failed to start nodeipam controller: %s", err) + return nil, false, nil + } + + go nodeIpamController.Run(controllerContext.Stop, controllerContext.ControllerManagerMetrics, nc.Config.DualStack) + + return nil, true, nil +} + +// processCIDRs is a helper function that works on a comma separated cidrs and returns +// a list of typed cidrs +// a flag if cidrs represents a dual stack +// error if failed to parse any of the cidrs +func processCIDRs(cidrsList string) ([]*net.IPNet, bool, error) { + cidrsSplit := strings.Split(strings.TrimSpace(cidrsList), ",") + + cidrs, err := netutils.ParseCIDRs(cidrsSplit) + if err != nil { + return nil, false, err + } + + // if cidrs has an error then the previous call will fail + // safe to ignore error checking on next call + dualstack, _ := netutils.IsDualStackCIDRs(cidrs) + + return cidrs, dualstack, nil +} diff --git a/pkg/controllers/nodeipam/testutil/testutil.go b/pkg/controllers/nodeipam/testutil/testutil.go new file mode 100644 index 0000000000..0255930ea2 --- /dev/null +++ b/pkg/controllers/nodeipam/testutil/testutil.go @@ -0,0 +1,544 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package testutil + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "reflect" + "sync" + "testing" + "time" + + v1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/apimachinery/pkg/util/strategicpatch" + "k8s.io/apimachinery/pkg/watch" + v1apply "k8s.io/client-go/applyconfigurations/core/v1" + "k8s.io/client-go/kubernetes/fake" + "k8s.io/client-go/kubernetes/scheme" + v1core "k8s.io/client-go/kubernetes/typed/core/v1" + "k8s.io/client-go/tools/cache" + ref "k8s.io/client-go/tools/reference" + utilnode "k8s.io/component-helpers/node/topology" + "k8s.io/klog/v2" + "k8s.io/utils/clock" + clocktesting "k8s.io/utils/clock/testing" + + jsonpatch "github.com/evanphx/json-patch" +) + +var ( + keyFunc = cache.DeletionHandlingMetaNamespaceKeyFunc +) + +// FakeNodeHandler is a fake implementation of NodesInterface and NodeInterface. It +// allows test cases to have fine-grained control over mock behaviors. We also need +// PodsInterface and PodInterface to test list & delete pods, which is implemented in +// the embedded client.Fake field. +type FakeNodeHandler struct { + *fake.Clientset + + // Input: Hooks determine if request is valid or not + CreateHook func(*FakeNodeHandler, *v1.Node) bool + Existing []*v1.Node + + // Output + CreatedNodes []*v1.Node + DeletedNodes []*v1.Node + UpdatedNodes []*v1.Node + UpdatedNodeStatuses []*v1.Node + RequestCount int + + // Synchronization + lock sync.Mutex + DeleteWaitChan chan struct{} + PatchWaitChan chan struct{} +} + +// FakeLegacyHandler is a fake implementation of CoreV1Interface. +type FakeLegacyHandler struct { + v1core.CoreV1Interface + n *FakeNodeHandler +} + +// GetUpdatedNodesCopy returns a slice of Nodes with updates applied. +func (m *FakeNodeHandler) GetUpdatedNodesCopy() []*v1.Node { + m.lock.Lock() + defer m.lock.Unlock() + updatedNodesCopy := make([]*v1.Node, len(m.UpdatedNodes), len(m.UpdatedNodes)) + for i, ptr := range m.UpdatedNodes { + updatedNodesCopy[i] = ptr + } + return updatedNodesCopy +} + +// Core returns fake CoreInterface. +func (m *FakeNodeHandler) Core() v1core.CoreV1Interface { + return &FakeLegacyHandler{m.Clientset.CoreV1(), m} +} + +// CoreV1 returns fake CoreV1Interface +func (m *FakeNodeHandler) CoreV1() v1core.CoreV1Interface { + return &FakeLegacyHandler{m.Clientset.CoreV1(), m} +} + +// Nodes return fake NodeInterfaces. +func (m *FakeLegacyHandler) Nodes() v1core.NodeInterface { + return m.n +} + +// Create adds a new Node to the fake store. +func (m *FakeNodeHandler) Create(_ context.Context, node *v1.Node, _ metav1.CreateOptions) (*v1.Node, error) { + m.lock.Lock() + defer func() { + m.RequestCount++ + m.lock.Unlock() + }() + for _, n := range m.Existing { + if n.Name == node.Name { + return nil, apierrors.NewAlreadyExists(v1.Resource("nodes"), node.Name) + } + } + if m.CreateHook == nil || m.CreateHook(m, node) { + nodeCopy := *node + m.CreatedNodes = append(m.CreatedNodes, &nodeCopy) + return node, nil + } + return nil, errors.New("create error") +} + +// Get returns a Node from the fake store. +func (m *FakeNodeHandler) Get(_ context.Context, name string, opts metav1.GetOptions) (*v1.Node, error) { + m.lock.Lock() + defer func() { + m.RequestCount++ + m.lock.Unlock() + }() + for i := range m.UpdatedNodes { + if m.UpdatedNodes[i].Name == name { + nodeCopy := *m.UpdatedNodes[i] + return &nodeCopy, nil + } + } + for i := range m.Existing { + if m.Existing[i].Name == name { + nodeCopy := *m.Existing[i] + return &nodeCopy, nil + } + } + return nil, nil +} + +// List returns a list of Nodes from the fake store. +func (m *FakeNodeHandler) List(_ context.Context, opts metav1.ListOptions) (*v1.NodeList, error) { + m.lock.Lock() + defer func() { + m.RequestCount++ + m.lock.Unlock() + }() + var nodes []*v1.Node + for i := 0; i < len(m.UpdatedNodes); i++ { + if !contains(m.UpdatedNodes[i], m.DeletedNodes) { + nodes = append(nodes, m.UpdatedNodes[i]) + } + } + for i := 0; i < len(m.Existing); i++ { + if !contains(m.Existing[i], m.DeletedNodes) && !contains(m.Existing[i], nodes) { + nodes = append(nodes, m.Existing[i]) + } + } + for i := 0; i < len(m.CreatedNodes); i++ { + if !contains(m.CreatedNodes[i], m.DeletedNodes) && !contains(m.CreatedNodes[i], nodes) { + nodes = append(nodes, m.CreatedNodes[i]) + } + } + nodeList := &v1.NodeList{} + for _, node := range nodes { + nodeList.Items = append(nodeList.Items, *node) + } + return nodeList, nil +} + +// Delete deletes a Node from the fake store. +func (m *FakeNodeHandler) Delete(_ context.Context, id string, opt metav1.DeleteOptions) error { + m.lock.Lock() + defer func() { + m.RequestCount++ + if m.DeleteWaitChan != nil { + m.DeleteWaitChan <- struct{}{} + } + m.lock.Unlock() + }() + m.DeletedNodes = append(m.DeletedNodes, NewNode(id)) + return nil +} + +// DeleteCollection deletes a collection of Nodes from the fake store. +func (m *FakeNodeHandler) DeleteCollection(_ context.Context, opt metav1.DeleteOptions, listOpts metav1.ListOptions) error { + return nil +} + +// Update updates a Node in the fake store. +func (m *FakeNodeHandler) Update(_ context.Context, node *v1.Node, _ metav1.UpdateOptions) (*v1.Node, error) { + m.lock.Lock() + defer func() { + m.RequestCount++ + m.lock.Unlock() + }() + + nodeCopy := *node + for i, updateNode := range m.UpdatedNodes { + if updateNode.Name == nodeCopy.Name { + m.UpdatedNodes[i] = &nodeCopy + return node, nil + } + } + m.UpdatedNodes = append(m.UpdatedNodes, &nodeCopy) + return node, nil +} + +// UpdateStatus updates a status of a Node in the fake store. +func (m *FakeNodeHandler) UpdateStatus(_ context.Context, node *v1.Node, _ metav1.UpdateOptions) (*v1.Node, error) { + m.lock.Lock() + defer func() { + m.RequestCount++ + m.lock.Unlock() + }() + + var origNodeCopy v1.Node + found := false + for i := range m.Existing { + if m.Existing[i].Name == node.Name { + origNodeCopy = *m.Existing[i] + found = true + break + } + } + updatedNodeIndex := -1 + for i := range m.UpdatedNodes { + if m.UpdatedNodes[i].Name == node.Name { + origNodeCopy = *m.UpdatedNodes[i] + updatedNodeIndex = i + found = true + break + } + } + + if !found { + return nil, fmt.Errorf("not found node %v", node) + } + + origNodeCopy.Status = node.Status + if updatedNodeIndex < 0 { + m.UpdatedNodes = append(m.UpdatedNodes, &origNodeCopy) + } else { + m.UpdatedNodes[updatedNodeIndex] = &origNodeCopy + } + + nodeCopy := *node + m.UpdatedNodeStatuses = append(m.UpdatedNodeStatuses, &nodeCopy) + return node, nil +} + +// PatchStatus patches a status of a Node in the fake store. +func (m *FakeNodeHandler) PatchStatus(ctx context.Context, nodeName string, data []byte) (*v1.Node, error) { + m.RequestCount++ + return m.Patch(ctx, nodeName, types.StrategicMergePatchType, data, metav1.PatchOptions{}, "status") +} + +// Watch watches Nodes in a fake store. +func (m *FakeNodeHandler) Watch(_ context.Context, opts metav1.ListOptions) (watch.Interface, error) { + return watch.NewFake(), nil +} + +// Patch patches a Node in the fake store. +func (m *FakeNodeHandler) Patch(_ context.Context, name string, pt types.PatchType, data []byte, _ metav1.PatchOptions, subresources ...string) (*v1.Node, error) { + m.lock.Lock() + defer func() { + m.RequestCount++ + if m.PatchWaitChan != nil { + m.PatchWaitChan <- struct{}{} + } + m.lock.Unlock() + }() + var nodeCopy v1.Node + for i := range m.Existing { + if m.Existing[i].Name == name { + nodeCopy = *m.Existing[i] + } + } + updatedNodeIndex := -1 + for i := range m.UpdatedNodes { + if m.UpdatedNodes[i].Name == name { + nodeCopy = *m.UpdatedNodes[i] + updatedNodeIndex = i + } + } + + originalObjJS, err := json.Marshal(nodeCopy) + if err != nil { + klog.Errorf("Failed to marshal %v", nodeCopy) + return nil, nil + } + var originalNode v1.Node + if err = json.Unmarshal(originalObjJS, &originalNode); err != nil { + klog.Errorf("Failed to unmarshal original object: %v", err) + return nil, nil + } + + var patchedObjJS []byte + switch pt { + case types.JSONPatchType: + patchObj, err := jsonpatch.DecodePatch(data) + if err != nil { + klog.Error(err.Error()) + return nil, nil + } + if patchedObjJS, err = patchObj.Apply(originalObjJS); err != nil { + klog.Error(err.Error()) + return nil, nil + } + case types.MergePatchType: + if patchedObjJS, err = jsonpatch.MergePatch(originalObjJS, data); err != nil { + klog.Error(err.Error()) + return nil, nil + } + case types.StrategicMergePatchType: + if patchedObjJS, err = strategicpatch.StrategicMergePatch(originalObjJS, data, originalNode); err != nil { + klog.Error(err.Error()) + return nil, nil + } + default: + klog.Errorf("unknown Content-Type header for patch: %v", pt) + return nil, nil + } + + var updatedNode v1.Node + if err = json.Unmarshal(patchedObjJS, &updatedNode); err != nil { + klog.Errorf("Failed to unmarshal patched object: %v", err) + return nil, nil + } + + if updatedNodeIndex < 0 { + m.UpdatedNodes = append(m.UpdatedNodes, &updatedNode) + } else { + m.UpdatedNodes[updatedNodeIndex] = &updatedNode + } + + return &updatedNode, nil +} + +// Apply applies a NodeApplyConfiguration to a Node in the fake store. +func (m *FakeNodeHandler) Apply(ctx context.Context, node *v1apply.NodeApplyConfiguration, opts metav1.ApplyOptions) (*v1.Node, error) { + patchOpts := opts.ToPatchOptions() + data, err := json.Marshal(node) + if err != nil { + return nil, err + } + name := node.Name + if name == nil { + return nil, fmt.Errorf("deployment.Name must be provided to Apply") + } + + return m.Patch(ctx, *name, types.ApplyPatchType, data, patchOpts) +} + +// ApplyStatus applies a status of a Node in the fake store. +func (m *FakeNodeHandler) ApplyStatus(ctx context.Context, node *v1apply.NodeApplyConfiguration, opts metav1.ApplyOptions) (*v1.Node, error) { + patchOpts := opts.ToPatchOptions() + data, err := json.Marshal(node) + if err != nil { + return nil, err + } + name := node.Name + if name == nil { + return nil, fmt.Errorf("deployment.Name must be provided to Apply") + } + + return m.Patch(ctx, *name, types.ApplyPatchType, data, patchOpts, "status") +} + +// FakeRecorder is used as a fake during testing. +type FakeRecorder struct { + sync.Mutex + source v1.EventSource + Events []*v1.Event + clock clock.Clock +} + +// Event emits a fake event to the fake recorder +func (f *FakeRecorder) Event(obj runtime.Object, eventtype, reason, message string) { + f.generateEvent(obj, metav1.Now(), eventtype, reason, message) +} + +// Eventf emits a fake formatted event to the fake recorder +func (f *FakeRecorder) Eventf(obj runtime.Object, eventtype, reason, messageFmt string, args ...interface{}) { + f.Event(obj, eventtype, reason, fmt.Sprintf(messageFmt, args...)) +} + +// AnnotatedEventf emits a fake formatted event to the fake recorder +func (f *FakeRecorder) AnnotatedEventf(obj runtime.Object, annotations map[string]string, eventtype, reason, messageFmt string, args ...interface{}) { + f.Eventf(obj, eventtype, reason, messageFmt, args...) +} + +func (f *FakeRecorder) generateEvent(obj runtime.Object, timestamp metav1.Time, eventtype, reason, message string) { + f.Lock() + defer f.Unlock() + ref, err := ref.GetReference(scheme.Scheme, obj) + if err != nil { + klog.Errorf("Encountered error while getting reference: %v", err) + return + } + event := f.makeEvent(ref, eventtype, reason, message) + event.Source = f.source + if f.Events != nil { + f.Events = append(f.Events, event) + } +} + +func (f *FakeRecorder) makeEvent(ref *v1.ObjectReference, eventtype, reason, message string) *v1.Event { + t := metav1.Time{Time: f.clock.Now()} + namespace := ref.Namespace + if namespace == "" { + namespace = metav1.NamespaceDefault + } + + clientref := v1.ObjectReference{ + Kind: ref.Kind, + Namespace: ref.Namespace, + Name: ref.Name, + UID: ref.UID, + APIVersion: ref.APIVersion, + ResourceVersion: ref.ResourceVersion, + FieldPath: ref.FieldPath, + } + + return &v1.Event{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("%v.%x", ref.Name, t.UnixNano()), + Namespace: namespace, + }, + InvolvedObject: clientref, + Reason: reason, + Message: message, + FirstTimestamp: t, + LastTimestamp: t, + Count: 1, + Type: eventtype, + } +} + +// NewFakeRecorder returns a pointer to a newly constructed FakeRecorder. +func NewFakeRecorder() *FakeRecorder { + return &FakeRecorder{ + source: v1.EventSource{Component: "nodeControllerTest"}, + Events: []*v1.Event{}, + clock: clocktesting.NewFakeClock(time.Now()), + } +} + +// NewNode is a helper function for creating Nodes for testing. +func NewNode(name string) *v1.Node { + return &v1.Node{ + ObjectMeta: metav1.ObjectMeta{Name: name}, + Status: v1.NodeStatus{ + Capacity: v1.ResourceList{ + v1.ResourceName(v1.ResourceCPU): resource.MustParse("10"), + v1.ResourceName(v1.ResourceMemory): resource.MustParse("10G"), + }, + }, + } +} + +// NewPod is a helper function for creating Pods for testing. +func NewPod(name, host string) *v1.Pod { + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "default", + Name: name, + }, + Spec: v1.PodSpec{ + NodeName: host, + }, + Status: v1.PodStatus{ + Conditions: []v1.PodCondition{ + { + Type: v1.PodReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + + return pod +} + +func contains(node *v1.Node, nodes []*v1.Node) bool { + for i := 0; i < len(nodes); i++ { + if node.Name == nodes[i].Name { + return true + } + } + return false +} + +// GetZones returns list of zones for all Nodes stored in FakeNodeHandler +func GetZones(nodeHandler *FakeNodeHandler) []string { + nodes, _ := nodeHandler.List(context.TODO(), metav1.ListOptions{}) + zones := sets.NewString() + for _, node := range nodes.Items { + zones.Insert(utilnode.GetZoneKey(&node)) + } + return zones.List() +} + +// CreateZoneID returns a single zoneID for a given region and zone. +func CreateZoneID(region, zone string) string { + return region + ":\x00:" + zone +} + +// GetKey is a helper function used by controllers unit tests to get the +// key for a given kubernetes resource. +func GetKey(obj interface{}, t *testing.T) string { + tombstone, ok := obj.(cache.DeletedFinalStateUnknown) + if ok { + // if tombstone , try getting the value from tombstone.Obj + obj = tombstone.Obj + } + val := reflect.ValueOf(obj).Elem() + name := val.FieldByName("Name").String() + kind := val.FieldByName("Kind").String() + // Note kind is not always set in the tests, so ignoring that for now + if len(name) == 0 || len(kind) == 0 { + t.Errorf("Unexpected object %v", obj) + } + + key, err := keyFunc(obj) + if err != nil { + t.Errorf("Unexpected error getting key for %v %v: %v", kind, name, err) + return "" + } + return key +} diff --git a/pkg/controllers/options/nodeipam_controller.go b/pkg/controllers/options/nodeipam_controller.go new file mode 100644 index 0000000000..3025bcc35a --- /dev/null +++ b/pkg/controllers/options/nodeipam_controller.go @@ -0,0 +1,81 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package options + +import ( + "fmt" + + "github.com/spf13/pflag" + "k8s.io/cloud-provider-aws/pkg/controllers/nodeipam/config" +) + +const ( + + // DefaultNodeMaskCIDR is default mask size for IPv4 node cidr + DefaultNodeMaskCIDR = int32(24) +) + +// NodeIpamControllerOptions contains the inputs that can +// be used in the nodeipam controller +type NodeIpamControllerOptions struct { + RateLimit float64 + BurstLimit int + DualStack bool + // NodeCIDRMaskSize is the mask size for node cidr in single-stack cluster. + // This can be used only with single stack clusters and is incompatible with dual stack clusters. + NodeCIDRMaskSize int32 +} + +// AddFlags add the additional flags for the controller +func (o *NodeIpamControllerOptions) AddFlags(fs *pflag.FlagSet) { + fs.Float64Var(&o.RateLimit, "nodeipam-controller-rate-limit", o.RateLimit, + "Steady-state rate limit (per sec) at which the controller processes items in its queue. A value of zero (default) disables rate limiting.") + fs.IntVar(&o.BurstLimit, "nodeipam-controller-burst-limit", o.BurstLimit, + "Burst limit at which the controller processes items in its queue. A value of zero (default) disables rate limiting.") + fs.BoolVar(&o.DualStack, "dualstack", o.DualStack, "IP mode in which the controller runs. Can be either dualstack or IPv6. A value of false (default) enables IPv6 only mode. Experimental feature: may not work, enable at your own risk.") + fs.Int32Var(&o.NodeCIDRMaskSize, "node-cidr-mask-size", o.NodeCIDRMaskSize, "Mask size for node cidr in cluster. Default is 24 for IPv4") +} + +// Validate checks for errors from user input +func (o *NodeIpamControllerOptions) Validate() error { + + if o.RateLimit < 0.0 { + return fmt.Errorf("--nodeipam-controller-rate-limit should not be less than zero") + } + + if o.BurstLimit < 0 { + return fmt.Errorf("--nodeipam-controller-burst-limit should not be less than zero") + } + + return nil +} + +// ApplyTo fills up NodeIpamController config with options. +func (o *NodeIpamControllerOptions) ApplyTo(cfg *config.NodeIPAMControllerConfiguration) error { + if o == nil { + return nil + } + + cfg.DualStack = o.DualStack + if o.NodeCIDRMaskSize == 0 { + cfg.NodeCIDRMaskSize = DefaultNodeMaskCIDR + } else { + cfg.NodeCIDRMaskSize = o.NodeCIDRMaskSize + + } + return nil +} diff --git a/pkg/controllers/options/tagging_controller.go b/pkg/controllers/options/tagging_controller.go index c606a8589a..7ce892ddc6 100644 --- a/pkg/controllers/options/tagging_controller.go +++ b/pkg/controllers/options/tagging_controller.go @@ -15,6 +15,7 @@ package options import ( "fmt" + "github.com/spf13/pflag" ) diff --git a/pkg/providers/v1/aws.go b/pkg/providers/v1/aws.go index 6cc8505f66..3d4b7d565b 100644 --- a/pkg/providers/v1/aws.go +++ b/pkg/providers/v1/aws.go @@ -5283,3 +5283,20 @@ func getRegionFromMetadata(cfg CloudConfig, metadata EC2Metadata) (string, error return cfg.GetRegion(metadata) } + +// DescribeNetworkInterfaces returns network interface information for the given input +func (c *Cloud) DescribeNetworkInterfaces(input *ec2.DescribeNetworkInterfacesInput) (*ec2.NetworkInterface, error) { + + eni, err := c.ec2.DescribeNetworkInterfaces(input) + if err != nil { + return nil, err + } + if len(eni.NetworkInterfaces) == 0 { + return nil, nil + } + if len(eni.NetworkInterfaces) != 1 { + // This should not be possible - ids should be unique + return nil, fmt.Errorf("multiple interfaces found with same id %q", eni.NetworkInterfaces) + } + return eni.NetworkInterfaces[0], nil +} diff --git a/pkg/providers/v1/aws_fakes.go b/pkg/providers/v1/aws_fakes.go index 3de44b6448..898d326d45 100644 --- a/pkg/providers/v1/aws_fakes.go +++ b/pkg/providers/v1/aws_fakes.go @@ -805,6 +805,20 @@ func (ec2i *FakeEC2Impl) DescribeNetworkInterfaces(input *ec2.DescribeNetworkInt }, } } + + if *filter.Values[0] == "i-123456789" { + networkInterface[0].Ipv6Addresses = []*ec2.NetworkInterfaceIpv6Address{ + { + Ipv6Address: aws.String("2001:db8:3333:4444:5555:6666:7777:8888"), + }, + } + networkInterface[0].Ipv6Prefixes = []*ec2.Ipv6PrefixSpecification{ + { + Ipv6Prefix: aws.String("2001:0db8:85a3:0000:0000:8a2e:0000:0000/80"), + }, + } + } + } return &ec2.DescribeNetworkInterfacesOutput{ diff --git a/pkg/util/controller_utils.go b/pkg/util/controller_utils.go new file mode 100644 index 0000000000..4890c76fe8 --- /dev/null +++ b/pkg/util/controller_utils.go @@ -0,0 +1,103 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package node + +import ( + "fmt" + + v1 "k8s.io/api/core/v1" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/client-go/tools/cache" + "k8s.io/client-go/tools/record" + "k8s.io/klog/v2" +) + +// RecordNodeStatusChange records a event related to a node status change. (Common to lifecycle and ipam) +func RecordNodeStatusChange(recorder record.EventRecorder, node *v1.Node, newStatus string) { + ref := &v1.ObjectReference{ + APIVersion: "v1", + Kind: "Node", + Name: node.Name, + UID: node.UID, + Namespace: "", + } + klog.V(2).Infof("Recording status change %s event message for node %s", newStatus, node.Name) + // TODO: This requires a transaction, either both node status is updated + // and event is recorded or neither should happen, see issue #6055. + recorder.Eventf(ref, v1.EventTypeNormal, newStatus, "Node %s status is now: %s", node.Name, newStatus) +} + +// CreateAddNodeHandler creates an add node handler. +func CreateAddNodeHandler(f func(node *v1.Node) error) func(obj interface{}) { + return func(originalObj interface{}) { + node := originalObj.(*v1.Node).DeepCopy() + if err := f(node); err != nil { + utilruntime.HandleError(fmt.Errorf("Error while processing Node Add: %v", err)) + } + } +} + +// CreateUpdateNodeHandler creates a node update handler. (Common to lifecycle and ipam) +func CreateUpdateNodeHandler(f func(oldNode, newNode *v1.Node) error) func(oldObj, newObj interface{}) { + return func(origOldObj, origNewObj interface{}) { + node := origNewObj.(*v1.Node).DeepCopy() + prevNode := origOldObj.(*v1.Node).DeepCopy() + + if err := f(prevNode, node); err != nil { + utilruntime.HandleError(fmt.Errorf("Error while processing Node Add/Delete: %v", err)) + } + } +} + +// CreateDeleteNodeHandler creates a delete node handler. (Common to lifecycle and ipam) +func CreateDeleteNodeHandler(f func(node *v1.Node) error) func(obj interface{}) { + return func(originalObj interface{}) { + originalNode, isNode := originalObj.(*v1.Node) + // We can get DeletedFinalStateUnknown instead of *v1.Node here and + // we need to handle that correctly. #34692 + if !isNode { + deletedState, ok := originalObj.(cache.DeletedFinalStateUnknown) + if !ok { + klog.Errorf("Received unexpected object: %v", originalObj) + return + } + originalNode, ok = deletedState.Obj.(*v1.Node) + if !ok { + klog.Errorf("DeletedFinalStateUnknown contained non-Node object: %v", deletedState.Obj) + return + } + } + node := originalNode.DeepCopy() + if err := f(node); err != nil { + utilruntime.HandleError(fmt.Errorf("Error while processing Node Add/Delete: %v", err)) + } + } +} + +// GetNodeCondition extracts the provided condition from the given status and returns that. +// Returns nil and -1 if the condition is not present, and the index of the located condition. +func GetNodeCondition(status *v1.NodeStatus, conditionType v1.NodeConditionType) (int, *v1.NodeCondition) { + if status == nil { + return -1, nil + } + for i := range status.Conditions { + if status.Conditions[i].Type == conditionType { + return i, &status.Conditions[i] + } + } + return -1, nil +} diff --git a/tests/e2e/go.mod b/tests/e2e/go.mod index a016c8191f..e32d17c086 100644 --- a/tests/e2e/go.mod +++ b/tests/e2e/go.mod @@ -3,6 +3,7 @@ module k8s.io/cloud-provider-aws/tests/e2e go 1.18 require ( + github.com/aws/aws-sdk-go v1.44.116 github.com/onsi/ginkgo/v2 v2.9.4 github.com/onsi/gomega v1.27.6 k8s.io/api v0.26.0 @@ -39,6 +40,7 @@ require ( github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0 // indirect github.com/imdario/mergo v0.3.11 // indirect github.com/inconshreveable/mousetrap v1.0.1 // indirect + github.com/jmespath/go-jmespath v0.4.0 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/mailru/easyjson v0.7.6 // indirect diff --git a/tests/e2e/go.sum b/tests/e2e/go.sum index 18745fcd1a..d5df6fb1aa 100644 --- a/tests/e2e/go.sum +++ b/tests/e2e/go.sum @@ -41,6 +41,8 @@ github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRF github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho= github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= +github.com/aws/aws-sdk-go v1.44.116 h1:NpLIhcvLWXJZAEwvPj3TDHeqp7DleK6ZUVYyW01WNHY= +github.com/aws/aws-sdk-go v1.44.116/go.mod h1:y4AeaBuwd2Lk+GepC1E9v0qOiTws0MIWAX4oIKwKHZo= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= @@ -202,6 +204,10 @@ github.com/imdario/mergo v0.3.11 h1:3tnifQM4i+fbajXKBHXWEH+KvNHqojZ778UH75j3bGA= github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA= github.com/inconshreveable/mousetrap v1.0.1 h1:U3uMjPSQEBMNp1lFxmllqCPM6P5u/Xq7Pgzkat/bFNc= github.com/inconshreveable/mousetrap v1.0.1/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= +github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= +github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= +github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= diff --git a/tests/e2e/ipv6prefix.go b/tests/e2e/ipv6prefix.go new file mode 100644 index 0000000000..8f1f3483f4 --- /dev/null +++ b/tests/e2e/ipv6prefix.go @@ -0,0 +1,81 @@ +/* +Copyright 2023 The Kubernetes Authors. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + "context" + "fmt" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/session" + "github.com/aws/aws-sdk-go/service/ec2" + . "github.com/onsi/ginkgo/v2" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/kubernetes/test/e2e/framework" + admissionapi "k8s.io/pod-security-admission/api" +) + +var _ = Describe("[cloud-provider-aws-e2e]", Label("ipv6 prefix"), func() { + f := framework.NewDefaultFramework("cloud-provider-aws") + f.NamespacePodSecurityEnforceLevel = admissionapi.LevelPrivileged + + var ( + cs clientset.Interface + ) + + BeforeEach(func() { + cs = f.ClientSet + }) + + AfterEach(func() { + // After each test + }) + + It("should check if the nodes have the correct ipv6 prefix from the NIC assigned", func() { + + sess, err := session.NewSession(&aws.Config{ + Region: aws.String("eu-north-1")}, + ) + framework.ExpectNoError(err) + + svc := ec2.New(sess) + + // get the nodes + nodes, err := cs.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{}) + framework.ExpectNoError(err) + + for _, node := range nodes.Items { + input := &ec2.DescribeInstancesInput{ + InstanceIds: []*string{ + &node.Name, + }, + } + result, err := svc.DescribeInstances(input) + framework.ExpectNoError(err) + for _, reservation := range result.Reservations { + for _, instance := range reservation.Instances { + for _, networkInterface := range instance.NetworkInterfaces { + for _, ipv6Prefix := range networkInterface.Ipv6Prefixes { + if node.Spec.PodCIDR != *ipv6Prefix.Ipv6Prefix { + fmt.Errorf("Name: %s, PodCIDR: %s does not match IPv6Prefix: %s\n", node.Name, node.Spec.PodCIDR, *ipv6Prefix.Ipv6Prefix) + framework.ExpectNoError(err) + } + } + } + } + } + } + }) +}) diff --git a/tests/e2e/loadbalancer.go b/tests/e2e/loadbalancer.go index 79b0e2625e..c5c855872f 100644 --- a/tests/e2e/loadbalancer.go +++ b/tests/e2e/loadbalancer.go @@ -23,7 +23,7 @@ import ( admissionapi "k8s.io/pod-security-admission/api" ) -var _ = Describe("[cloud-provider-aws-e2e] loadbalancer", func() { +var _ = Describe("[cloud-provider-aws-e2e]", Label("loadbalancer"), func() { f := framework.NewDefaultFramework("cloud-provider-aws") f.NamespacePodSecurityEnforceLevel = admissionapi.LevelPrivileged