Skip to content

Commit 3fe6f96

Browse files
🐛 Fix race conditions ScaleDownOldMS OnDelete (#12830)
* Fix race conditions ScaleDownOldMS OnDelete * Address feedback
1 parent 37dc14b commit 3fe6f96

File tree

34 files changed

+4517
-562
lines changed

34 files changed

+4517
-562
lines changed

internal/controllers/machinedeployment/machinedeployment_controller.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,7 @@ func (r *Reconciler) reconcile(ctx context.Context, s *scope) error {
293293
}
294294

295295
if md.Spec.Rollout.Strategy.Type == clusterv1.RollingUpdateMachineDeploymentStrategyType {
296-
return r.rolloutRolling(ctx, md, s.machineSets, templateExists)
296+
return r.rolloutRollingUpdate(ctx, md, s.machineSets, templateExists)
297297
}
298298

299299
if md.Spec.Rollout.Strategy.Type == clusterv1.OnDeleteMachineDeploymentStrategyType {

internal/controllers/machinedeployment/machinedeployment_rollout_ondelete.go

Lines changed: 110 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -19,47 +19,53 @@ package machinedeployment
1919
import (
2020
"context"
2121
"fmt"
22+
"sort"
2223

23-
"github.com/pkg/errors"
24-
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2524
"k8s.io/klog/v2"
2625
"k8s.io/utils/ptr"
2726
ctrl "sigs.k8s.io/controller-runtime"
28-
"sigs.k8s.io/controller-runtime/pkg/client"
2927

3028
clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2"
3129
"sigs.k8s.io/cluster-api/internal/controllers/machinedeployment/mdutil"
3230
"sigs.k8s.io/cluster-api/util/patch"
3331
)
3432

35-
// rolloutOnDelete implements the logic for the OnDelete rollout strategy.
33+
// rolloutOnDelete reconcile machine sets controlled by a MachineDeployment that is using the OnDelete strategy.
3634
func (r *Reconciler) rolloutOnDelete(ctx context.Context, md *clusterv1.MachineDeployment, msList []*clusterv1.MachineSet, templateExists bool) error {
35+
// TODO(in-place): move create newMS into rolloutPlanner
3736
newMS, oldMSs, err := r.getAllMachineSetsAndSyncRevision(ctx, md, msList, true, templateExists)
3837
if err != nil {
3938
return err
4039
}
4140

42-
// newMS can be nil in case there is already a MachineSet associated with this deployment,
43-
// but there are only either changes in annotations or MinReadySeconds. Or in other words,
44-
// this can be nil if there are changes, but no replacement of existing machines is needed.
45-
if newMS == nil {
46-
return nil
41+
planner := newRolloutPlanner()
42+
planner.md = md
43+
planner.newMS = newMS
44+
planner.oldMSs = oldMSs
45+
46+
if err := planner.planOnDelete(ctx); err != nil {
47+
return err
4748
}
4849

4950
allMSs := append(oldMSs, newMS)
5051

51-
// Scale up, if we can.
52-
if err := r.reconcileNewMachineSetOnDelete(ctx, md, oldMSs, newMS); err != nil {
52+
// TODO(in-place): also apply/remove labels to MS should go into rolloutPlanner
53+
if err := r.cleanupDisableMachineCreateAnnotation(ctx, newMS); err != nil {
5354
return err
5455
}
55-
56-
if err := r.syncDeploymentStatus(allMSs, newMS, md); err != nil {
56+
if err := r.addDisableMachineCreateAnnotation(ctx, oldMSs); err != nil {
5757
return err
5858
}
5959

60-
// Scale down, if we can.
61-
if err := r.reconcileOldMachineSetsOnDelete(ctx, oldMSs, allMSs, md); err != nil {
62-
return err
60+
// TODO(in-place): this should be changed as soon as rolloutPlanner support MS creation and adding/removing labels from MS
61+
for _, ms := range allMSs {
62+
scaleIntent := ptr.Deref(ms.Spec.Replicas, 0)
63+
if v, ok := planner.scaleIntents[ms.Name]; ok {
64+
scaleIntent = v
65+
}
66+
if err := r.scaleMachineSet(ctx, ms, scaleIntent, md); err != nil {
67+
return err
68+
}
6369
}
6470

6571
if err := r.syncDeploymentStatus(allMSs, newMS, md); err != nil {
@@ -75,115 +81,109 @@ func (r *Reconciler) rolloutOnDelete(ctx context.Context, md *clusterv1.MachineD
7581
return nil
7682
}
7783

84+
// planOnDelete determine how to proceed with the rollout when using the OnDelete strategy if we are not yet at the desired state.
85+
func (p *rolloutPlanner) planOnDelete(ctx context.Context) error {
86+
// Scale up, if we can.
87+
if err := p.reconcileNewMachineSet(ctx); err != nil {
88+
return err
89+
}
90+
91+
// Scale down, if we can.
92+
p.reconcileOldMachineSetsOnDelete(ctx)
93+
return nil
94+
}
95+
7896
// reconcileOldMachineSetsOnDelete handles reconciliation of Old MachineSets associated with the MachineDeployment in the OnDelete rollout strategy.
79-
func (r *Reconciler) reconcileOldMachineSetsOnDelete(ctx context.Context, oldMSs []*clusterv1.MachineSet, allMSs []*clusterv1.MachineSet, deployment *clusterv1.MachineDeployment) error {
97+
func (p *rolloutPlanner) reconcileOldMachineSetsOnDelete(ctx context.Context) {
8098
log := ctrl.LoggerFrom(ctx)
81-
if deployment.Spec.Replicas == nil {
82-
return errors.Errorf("spec replicas for MachineDeployment %q/%q is nil, this is unexpected",
83-
deployment.Namespace, deployment.Name)
99+
oldMachinesCount := mdutil.GetReplicaCountForMachineSets(p.oldMSs)
100+
if oldMachinesCount == 0 {
101+
// Can't scale down further
102+
return
84103
}
85-
log.V(4).Info("Checking to see if machines have been deleted or are in the process of deleting for old machine sets")
86-
totalReplicas := mdutil.GetReplicaCountForMachineSets(allMSs)
87-
scaleDownAmount := totalReplicas - *deployment.Spec.Replicas
88-
for _, oldMS := range oldMSs {
89-
log := log.WithValues("MachineSet", klog.KObj(oldMS))
90-
if oldMS.Spec.Replicas == nil || *oldMS.Spec.Replicas <= 0 {
91-
log.V(4).Info("fully scaled down")
104+
105+
// Determine if there are more Machines than MD.spec.replicas, e.g. due to a scale down in MD.
106+
newMSReplicas := ptr.Deref(p.newMS.Spec.Replicas, 0)
107+
if v, ok := p.scaleIntents[p.newMS.Name]; ok {
108+
newMSReplicas = v
109+
}
110+
totReplicas := oldMachinesCount + newMSReplicas
111+
totalScaleDownCount := max(totReplicas-ptr.Deref(p.md.Spec.Replicas, 0), 0)
112+
113+
// Sort oldMSs so the system will start deleting from the oldest MS first.
114+
sort.Sort(mdutil.MachineSetsByCreationTimestamp(p.oldMSs))
115+
116+
// Start scaling down old machine sets to acknowledge spec.replicas without corresponding status.replicas.
117+
// Note: spec.replicas without corresponding status.replicas exists
118+
// - after a user manually deletes a replica
119+
// - when a newMS not yet fully provisioned suddenly becomes an oldMS.
120+
// In both cases spec.replicas without corresponding status.replicas should be dropped, no matter
121+
// if there are replicas to be scaled down due to a scale down in MD or not.
122+
// However, just in case there are replicas to be scaled down due to a scale down in MD, deleted replicas should
123+
// be deducted from the totalScaleDownCount.
124+
for _, oldMS := range p.oldMSs {
125+
// No op if this MS has been already scaled down to zero.
126+
if ptr.Deref(oldMS.Spec.Replicas, 0) <= 0 {
92127
continue
93128
}
94-
if oldMS.Annotations == nil {
95-
oldMS.Annotations = map[string]string{}
129+
130+
scaleDownCount := max(ptr.Deref(oldMS.Spec.Replicas, 0)-ptr.Deref(oldMS.Status.Replicas, 0), 0)
131+
if scaleDownCount > 0 {
132+
newScaleIntent := max(ptr.Deref(oldMS.Spec.Replicas, 0)-scaleDownCount, 0)
133+
log.V(5).Info(fmt.Sprintf("Setting scale down intent for MachineSet %s to %d replicas (-%d)", oldMS.Name, newScaleIntent, scaleDownCount), "MachineSet", klog.KObj(oldMS))
134+
p.scaleIntents[oldMS.Name] = newScaleIntent
135+
136+
totalScaleDownCount -= scaleDownCount
137+
}
138+
}
139+
140+
// Scale down additional replicas if replicas removed in the for loop above were not enough to align to MD replicas.
141+
for _, oldMS := range p.oldMSs {
142+
// No op if there is no scaling down left.
143+
if totalScaleDownCount <= 0 {
144+
break
145+
}
146+
147+
// No op if this MS has been already scaled down to zero.
148+
scaleIntent := ptr.Deref(oldMS.Spec.Replicas, 0)
149+
if v, ok := p.scaleIntents[oldMS.Name]; ok {
150+
scaleIntent = v
96151
}
152+
153+
if scaleIntent <= 0 {
154+
continue
155+
}
156+
157+
scaleDownCount := min(scaleIntent, totalScaleDownCount)
158+
if scaleDownCount > 0 {
159+
newScaleIntent := max(ptr.Deref(oldMS.Spec.Replicas, 0)-scaleDownCount, 0)
160+
log.V(5).Info(fmt.Sprintf("Setting scale down intent for MachineSet %s to %d replicas (-%d)", oldMS.Name, newScaleIntent, scaleDownCount), "MachineSet", klog.KObj(oldMS))
161+
p.scaleIntents[oldMS.Name] = newScaleIntent
162+
163+
totalScaleDownCount -= scaleDownCount
164+
}
165+
}
166+
}
167+
168+
// addDisableMachineCreateAnnotation will add the disable machine create annotation to old MachineSets.
169+
func (r *Reconciler) addDisableMachineCreateAnnotation(ctx context.Context, oldMSs []*clusterv1.MachineSet) error {
170+
for _, oldMS := range oldMSs {
171+
log := ctrl.LoggerFrom(ctx, "MachineSet", klog.KObj(oldMS))
97172
if _, ok := oldMS.Annotations[clusterv1.DisableMachineCreateAnnotation]; !ok {
98-
log.V(4).Info("setting annotation on old MachineSet to disable machine creation")
173+
log.V(4).Info("adding annotation on old MachineSet to disable machine creation")
99174
patchHelper, err := patch.NewHelper(oldMS, r.Client)
100175
if err != nil {
101176
return err
102177
}
178+
if oldMS.Annotations == nil {
179+
oldMS.Annotations = map[string]string{}
180+
}
103181
oldMS.Annotations[clusterv1.DisableMachineCreateAnnotation] = "true"
104-
if err := patchHelper.Patch(ctx, oldMS); err != nil {
182+
err = patchHelper.Patch(ctx, oldMS)
183+
if err != nil {
105184
return err
106185
}
107186
}
108-
selectorMap, err := metav1.LabelSelectorAsMap(&oldMS.Spec.Selector)
109-
if err != nil {
110-
log.V(4).Info("Failed to convert MachineSet label selector to a map", "err", err)
111-
continue
112-
}
113-
log.V(4).Info("Fetching Machines associated with MachineSet")
114-
// Get all Machines linked to this MachineSet.
115-
allMachinesInOldMS := &clusterv1.MachineList{}
116-
if err := r.Client.List(ctx,
117-
allMachinesInOldMS,
118-
client.InNamespace(oldMS.Namespace),
119-
client.MatchingLabels(selectorMap),
120-
); err != nil {
121-
return errors.Wrap(err, "failed to list machines")
122-
}
123-
totalMachineCount := int32(len(allMachinesInOldMS.Items))
124-
log.V(4).Info("Retrieved machines", "totalMachineCount", totalMachineCount)
125-
updatedReplicaCount := totalMachineCount - mdutil.GetDeletingMachineCount(allMachinesInOldMS)
126-
if updatedReplicaCount < 0 {
127-
return errors.Errorf("negative updated replica count %d for MachineSet %q, this is unexpected", updatedReplicaCount, oldMS.Name)
128-
}
129-
machineSetScaleDownAmountDueToMachineDeletion := *oldMS.Spec.Replicas - updatedReplicaCount
130-
if machineSetScaleDownAmountDueToMachineDeletion < 0 {
131-
log.V(4).Info(fmt.Sprintf("Error reconciling MachineSet %s", oldMS.Name), "err", errors.Errorf("Unexpected negative scale down amount: %d", machineSetScaleDownAmountDueToMachineDeletion))
132-
}
133-
scaleDownAmount -= machineSetScaleDownAmountDueToMachineDeletion
134-
log.V(4).Info("Adjusting replica count for deleted machines", "oldReplicas", oldMS.Spec.Replicas, "newReplicas", updatedReplicaCount)
135-
log.V(4).Info("Scaling down", "replicas", updatedReplicaCount)
136-
if err := r.scaleMachineSet(ctx, oldMS, updatedReplicaCount, deployment); err != nil {
137-
return err
138-
}
139187
}
140-
log.V(4).Info("Finished reconcile of Old MachineSets to account for deleted machines. Now analyzing if there's more potential to scale down")
141-
for _, oldMS := range oldMSs {
142-
log := log.WithValues("MachineSet", klog.KObj(oldMS))
143-
if scaleDownAmount <= 0 {
144-
break
145-
}
146-
if oldMS.Spec.Replicas == nil || *oldMS.Spec.Replicas <= 0 {
147-
log.V(4).Info("Fully scaled down")
148-
continue
149-
}
150-
updatedReplicaCount := *oldMS.Spec.Replicas
151-
if updatedReplicaCount >= scaleDownAmount {
152-
updatedReplicaCount -= scaleDownAmount
153-
scaleDownAmount = 0
154-
} else {
155-
scaleDownAmount -= updatedReplicaCount
156-
updatedReplicaCount = 0
157-
}
158-
log.V(4).Info("Scaling down", "replicas", updatedReplicaCount)
159-
if err := r.scaleMachineSet(ctx, oldMS, updatedReplicaCount, deployment); err != nil {
160-
return err
161-
}
162-
}
163-
log.V(4).Info("Finished reconcile of all old MachineSets")
164188
return nil
165189
}
166-
167-
// reconcileNewMachineSetOnDelete handles reconciliation of the latest MachineSet associated with the MachineDeployment in the OnDelete rollout strategy.
168-
func (r *Reconciler) reconcileNewMachineSetOnDelete(ctx context.Context, md *clusterv1.MachineDeployment, oldMSs []*clusterv1.MachineSet, newMS *clusterv1.MachineSet) error {
169-
// TODO(in-place): also apply/remove labels should go into rolloutPlanner
170-
if err := r.cleanupDisableMachineCreateAnnotation(ctx, newMS); err != nil {
171-
return err
172-
}
173-
174-
planner := newRolloutPlanner()
175-
planner.md = md
176-
planner.newMS = newMS
177-
planner.oldMSs = oldMSs
178-
179-
if err := planner.reconcileNewMachineSet(ctx); err != nil {
180-
return err
181-
}
182-
183-
// TODO(in-place): this should be changed as soon as rolloutPlanner support MS creation and adding/removing labels from MS
184-
scaleIntent := ptr.Deref(newMS.Spec.Replicas, 0)
185-
if v, ok := planner.scaleIntents[newMS.Name]; ok {
186-
scaleIntent = v
187-
}
188-
return r.scaleMachineSet(ctx, newMS, scaleIntent, md)
189-
}

0 commit comments

Comments
 (0)