@@ -19,47 +19,53 @@ package machinedeployment
1919import (
2020 "context"
2121 "fmt"
22+ "sort"
2223
23- "github.com/pkg/errors"
24- metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2524 "k8s.io/klog/v2"
2625 "k8s.io/utils/ptr"
2726 ctrl "sigs.k8s.io/controller-runtime"
28- "sigs.k8s.io/controller-runtime/pkg/client"
2927
3028 clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2"
3129 "sigs.k8s.io/cluster-api/internal/controllers/machinedeployment/mdutil"
3230 "sigs.k8s.io/cluster-api/util/patch"
3331)
3432
35- // rolloutOnDelete implements the logic for the OnDelete rollout strategy.
33+ // rolloutOnDelete reconcile machine sets controlled by a MachineDeployment that is using the OnDelete strategy.
3634func (r * Reconciler ) rolloutOnDelete (ctx context.Context , md * clusterv1.MachineDeployment , msList []* clusterv1.MachineSet , templateExists bool ) error {
35+ // TODO(in-place): move create newMS into rolloutPlanner
3736 newMS , oldMSs , err := r .getAllMachineSetsAndSyncRevision (ctx , md , msList , true , templateExists )
3837 if err != nil {
3938 return err
4039 }
4140
42- // newMS can be nil in case there is already a MachineSet associated with this deployment,
43- // but there are only either changes in annotations or MinReadySeconds. Or in other words,
44- // this can be nil if there are changes, but no replacement of existing machines is needed.
45- if newMS == nil {
46- return nil
41+ planner := newRolloutPlanner ()
42+ planner .md = md
43+ planner .newMS = newMS
44+ planner .oldMSs = oldMSs
45+
46+ if err := planner .planOnDelete (ctx ); err != nil {
47+ return err
4748 }
4849
4950 allMSs := append (oldMSs , newMS )
5051
51- // Scale up, if we can.
52- if err := r .reconcileNewMachineSetOnDelete (ctx , md , oldMSs , newMS ); err != nil {
52+ // TODO(in-place): also apply/remove labels to MS should go into rolloutPlanner
53+ if err := r .cleanupDisableMachineCreateAnnotation (ctx , newMS ); err != nil {
5354 return err
5455 }
55-
56- if err := r .syncDeploymentStatus (allMSs , newMS , md ); err != nil {
56+ if err := r .addDisableMachineCreateAnnotation (ctx , oldMSs ); err != nil {
5757 return err
5858 }
5959
60- // Scale down, if we can.
61- if err := r .reconcileOldMachineSetsOnDelete (ctx , oldMSs , allMSs , md ); err != nil {
62- return err
60+ // TODO(in-place): this should be changed as soon as rolloutPlanner support MS creation and adding/removing labels from MS
61+ for _ , ms := range allMSs {
62+ scaleIntent := ptr .Deref (ms .Spec .Replicas , 0 )
63+ if v , ok := planner .scaleIntents [ms .Name ]; ok {
64+ scaleIntent = v
65+ }
66+ if err := r .scaleMachineSet (ctx , ms , scaleIntent , md ); err != nil {
67+ return err
68+ }
6369 }
6470
6571 if err := r .syncDeploymentStatus (allMSs , newMS , md ); err != nil {
@@ -75,115 +81,109 @@ func (r *Reconciler) rolloutOnDelete(ctx context.Context, md *clusterv1.MachineD
7581 return nil
7682}
7783
84+ // planOnDelete determine how to proceed with the rollout when using the OnDelete strategy if we are not yet at the desired state.
85+ func (p * rolloutPlanner ) planOnDelete (ctx context.Context ) error {
86+ // Scale up, if we can.
87+ if err := p .reconcileNewMachineSet (ctx ); err != nil {
88+ return err
89+ }
90+
91+ // Scale down, if we can.
92+ p .reconcileOldMachineSetsOnDelete (ctx )
93+ return nil
94+ }
95+
7896// reconcileOldMachineSetsOnDelete handles reconciliation of Old MachineSets associated with the MachineDeployment in the OnDelete rollout strategy.
79- func (r * Reconciler ) reconcileOldMachineSetsOnDelete (ctx context.Context , oldMSs [] * clusterv1. MachineSet , allMSs [] * clusterv1. MachineSet , deployment * clusterv1. MachineDeployment ) error {
97+ func (p * rolloutPlanner ) reconcileOldMachineSetsOnDelete (ctx context.Context ) {
8098 log := ctrl .LoggerFrom (ctx )
81- if deployment .Spec .Replicas == nil {
82- return errors .Errorf ("spec replicas for MachineDeployment %q/%q is nil, this is unexpected" ,
83- deployment .Namespace , deployment .Name )
99+ oldMachinesCount := mdutil .GetReplicaCountForMachineSets (p .oldMSs )
100+ if oldMachinesCount == 0 {
101+ // Can't scale down further
102+ return
84103 }
85- log .V (4 ).Info ("Checking to see if machines have been deleted or are in the process of deleting for old machine sets" )
86- totalReplicas := mdutil .GetReplicaCountForMachineSets (allMSs )
87- scaleDownAmount := totalReplicas - * deployment .Spec .Replicas
88- for _ , oldMS := range oldMSs {
89- log := log .WithValues ("MachineSet" , klog .KObj (oldMS ))
90- if oldMS .Spec .Replicas == nil || * oldMS .Spec .Replicas <= 0 {
91- log .V (4 ).Info ("fully scaled down" )
104+
105+ // Determine if there are more Machines than MD.spec.replicas, e.g. due to a scale down in MD.
106+ newMSReplicas := ptr .Deref (p .newMS .Spec .Replicas , 0 )
107+ if v , ok := p .scaleIntents [p .newMS .Name ]; ok {
108+ newMSReplicas = v
109+ }
110+ totReplicas := oldMachinesCount + newMSReplicas
111+ totalScaleDownCount := max (totReplicas - ptr .Deref (p .md .Spec .Replicas , 0 ), 0 )
112+
113+ // Sort oldMSs so the system will start deleting from the oldest MS first.
114+ sort .Sort (mdutil .MachineSetsByCreationTimestamp (p .oldMSs ))
115+
116+ // Start scaling down old machine sets to acknowledge spec.replicas without corresponding status.replicas.
117+ // Note: spec.replicas without corresponding status.replicas exists
118+ // - after a user manually deletes a replica
119+ // - when a newMS not yet fully provisioned suddenly becomes an oldMS.
120+ // In both cases spec.replicas without corresponding status.replicas should be dropped, no matter
121+ // if there are replicas to be scaled down due to a scale down in MD or not.
122+ // However, just in case there are replicas to be scaled down due to a scale down in MD, deleted replicas should
123+ // be deducted from the totalScaleDownCount.
124+ for _ , oldMS := range p .oldMSs {
125+ // No op if this MS has been already scaled down to zero.
126+ if ptr .Deref (oldMS .Spec .Replicas , 0 ) <= 0 {
92127 continue
93128 }
94- if oldMS .Annotations == nil {
95- oldMS .Annotations = map [string ]string {}
129+
130+ scaleDownCount := max (ptr .Deref (oldMS .Spec .Replicas , 0 )- ptr .Deref (oldMS .Status .Replicas , 0 ), 0 )
131+ if scaleDownCount > 0 {
132+ newScaleIntent := max (ptr .Deref (oldMS .Spec .Replicas , 0 )- scaleDownCount , 0 )
133+ log .V (5 ).Info (fmt .Sprintf ("Setting scale down intent for MachineSet %s to %d replicas (-%d)" , oldMS .Name , newScaleIntent , scaleDownCount ), "MachineSet" , klog .KObj (oldMS ))
134+ p .scaleIntents [oldMS .Name ] = newScaleIntent
135+
136+ totalScaleDownCount -= scaleDownCount
137+ }
138+ }
139+
140+ // Scale down additional replicas if replicas removed in the for loop above were not enough to align to MD replicas.
141+ for _ , oldMS := range p .oldMSs {
142+ // No op if there is no scaling down left.
143+ if totalScaleDownCount <= 0 {
144+ break
145+ }
146+
147+ // No op if this MS has been already scaled down to zero.
148+ scaleIntent := ptr .Deref (oldMS .Spec .Replicas , 0 )
149+ if v , ok := p .scaleIntents [oldMS .Name ]; ok {
150+ scaleIntent = v
96151 }
152+
153+ if scaleIntent <= 0 {
154+ continue
155+ }
156+
157+ scaleDownCount := min (scaleIntent , totalScaleDownCount )
158+ if scaleDownCount > 0 {
159+ newScaleIntent := max (ptr .Deref (oldMS .Spec .Replicas , 0 )- scaleDownCount , 0 )
160+ log .V (5 ).Info (fmt .Sprintf ("Setting scale down intent for MachineSet %s to %d replicas (-%d)" , oldMS .Name , newScaleIntent , scaleDownCount ), "MachineSet" , klog .KObj (oldMS ))
161+ p .scaleIntents [oldMS .Name ] = newScaleIntent
162+
163+ totalScaleDownCount -= scaleDownCount
164+ }
165+ }
166+ }
167+
168+ // addDisableMachineCreateAnnotation will add the disable machine create annotation to old MachineSets.
169+ func (r * Reconciler ) addDisableMachineCreateAnnotation (ctx context.Context , oldMSs []* clusterv1.MachineSet ) error {
170+ for _ , oldMS := range oldMSs {
171+ log := ctrl .LoggerFrom (ctx , "MachineSet" , klog .KObj (oldMS ))
97172 if _ , ok := oldMS .Annotations [clusterv1 .DisableMachineCreateAnnotation ]; ! ok {
98- log .V (4 ).Info ("setting annotation on old MachineSet to disable machine creation" )
173+ log .V (4 ).Info ("adding annotation on old MachineSet to disable machine creation" )
99174 patchHelper , err := patch .NewHelper (oldMS , r .Client )
100175 if err != nil {
101176 return err
102177 }
178+ if oldMS .Annotations == nil {
179+ oldMS .Annotations = map [string ]string {}
180+ }
103181 oldMS .Annotations [clusterv1 .DisableMachineCreateAnnotation ] = "true"
104- if err := patchHelper .Patch (ctx , oldMS ); err != nil {
182+ err = patchHelper .Patch (ctx , oldMS )
183+ if err != nil {
105184 return err
106185 }
107186 }
108- selectorMap , err := metav1 .LabelSelectorAsMap (& oldMS .Spec .Selector )
109- if err != nil {
110- log .V (4 ).Info ("Failed to convert MachineSet label selector to a map" , "err" , err )
111- continue
112- }
113- log .V (4 ).Info ("Fetching Machines associated with MachineSet" )
114- // Get all Machines linked to this MachineSet.
115- allMachinesInOldMS := & clusterv1.MachineList {}
116- if err := r .Client .List (ctx ,
117- allMachinesInOldMS ,
118- client .InNamespace (oldMS .Namespace ),
119- client .MatchingLabels (selectorMap ),
120- ); err != nil {
121- return errors .Wrap (err , "failed to list machines" )
122- }
123- totalMachineCount := int32 (len (allMachinesInOldMS .Items ))
124- log .V (4 ).Info ("Retrieved machines" , "totalMachineCount" , totalMachineCount )
125- updatedReplicaCount := totalMachineCount - mdutil .GetDeletingMachineCount (allMachinesInOldMS )
126- if updatedReplicaCount < 0 {
127- return errors .Errorf ("negative updated replica count %d for MachineSet %q, this is unexpected" , updatedReplicaCount , oldMS .Name )
128- }
129- machineSetScaleDownAmountDueToMachineDeletion := * oldMS .Spec .Replicas - updatedReplicaCount
130- if machineSetScaleDownAmountDueToMachineDeletion < 0 {
131- log .V (4 ).Info (fmt .Sprintf ("Error reconciling MachineSet %s" , oldMS .Name ), "err" , errors .Errorf ("Unexpected negative scale down amount: %d" , machineSetScaleDownAmountDueToMachineDeletion ))
132- }
133- scaleDownAmount -= machineSetScaleDownAmountDueToMachineDeletion
134- log .V (4 ).Info ("Adjusting replica count for deleted machines" , "oldReplicas" , oldMS .Spec .Replicas , "newReplicas" , updatedReplicaCount )
135- log .V (4 ).Info ("Scaling down" , "replicas" , updatedReplicaCount )
136- if err := r .scaleMachineSet (ctx , oldMS , updatedReplicaCount , deployment ); err != nil {
137- return err
138- }
139187 }
140- log .V (4 ).Info ("Finished reconcile of Old MachineSets to account for deleted machines. Now analyzing if there's more potential to scale down" )
141- for _ , oldMS := range oldMSs {
142- log := log .WithValues ("MachineSet" , klog .KObj (oldMS ))
143- if scaleDownAmount <= 0 {
144- break
145- }
146- if oldMS .Spec .Replicas == nil || * oldMS .Spec .Replicas <= 0 {
147- log .V (4 ).Info ("Fully scaled down" )
148- continue
149- }
150- updatedReplicaCount := * oldMS .Spec .Replicas
151- if updatedReplicaCount >= scaleDownAmount {
152- updatedReplicaCount -= scaleDownAmount
153- scaleDownAmount = 0
154- } else {
155- scaleDownAmount -= updatedReplicaCount
156- updatedReplicaCount = 0
157- }
158- log .V (4 ).Info ("Scaling down" , "replicas" , updatedReplicaCount )
159- if err := r .scaleMachineSet (ctx , oldMS , updatedReplicaCount , deployment ); err != nil {
160- return err
161- }
162- }
163- log .V (4 ).Info ("Finished reconcile of all old MachineSets" )
164188 return nil
165189}
166-
167- // reconcileNewMachineSetOnDelete handles reconciliation of the latest MachineSet associated with the MachineDeployment in the OnDelete rollout strategy.
168- func (r * Reconciler ) reconcileNewMachineSetOnDelete (ctx context.Context , md * clusterv1.MachineDeployment , oldMSs []* clusterv1.MachineSet , newMS * clusterv1.MachineSet ) error {
169- // TODO(in-place): also apply/remove labels should go into rolloutPlanner
170- if err := r .cleanupDisableMachineCreateAnnotation (ctx , newMS ); err != nil {
171- return err
172- }
173-
174- planner := newRolloutPlanner ()
175- planner .md = md
176- planner .newMS = newMS
177- planner .oldMSs = oldMSs
178-
179- if err := planner .reconcileNewMachineSet (ctx ); err != nil {
180- return err
181- }
182-
183- // TODO(in-place): this should be changed as soon as rolloutPlanner support MS creation and adding/removing labels from MS
184- scaleIntent := ptr .Deref (newMS .Spec .Replicas , 0 )
185- if v , ok := planner .scaleIntents [newMS .Name ]; ok {
186- scaleIntent = v
187- }
188- return r .scaleMachineSet (ctx , newMS , scaleIntent , md )
189- }
0 commit comments