diff --git a/cmd/manager/main.go b/cmd/manager/main.go index db28cee..82a3f79 100644 --- a/cmd/manager/main.go +++ b/cmd/manager/main.go @@ -104,8 +104,9 @@ func main() { } if err = (&controllers.PatchJobReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Namespace: operatorNamespace, }).SetupWithManager(mgr); err != nil { setupLog.Error(err, "unable to create controller", "controller", "PatchJob") os.Exit(1) diff --git a/config/samples/simple-upgrade.yaml b/config/samples/simple-upgrade.yaml index c885cb2..ee3907a 100644 --- a/config/samples/simple-upgrade.yaml +++ b/config/samples/simple-upgrade.yaml @@ -1,10 +1,10 @@ apiVersion: kangalpatch.ozalp.dk/v1alpha1 kind: PatchPlan metadata: - name: simple-upgrade + name: simple-upgrade-existing spec: # Target Talos version to upgrade to - targetVersion: "factory.talos.dev/nocloud-installer-secureboot/95d432d6bb450a67e801a6ae77c96a67e38820b62ba4159ae7e997e1695207f7:v1.11.6" + targetVersion: "factory.talos.dev/nocloud-installer-secureboot/95d432d6bb450a67e801a6ae77c96a67e38820b62ba4159ae7e997e1695207f7:v1.11.3" # Node selection nodeSelector: @@ -14,7 +14,7 @@ spec: patchControlPlane: false patchWorkers: true - maxConcurrency: 2 + maxConcurrency: 1 # Upgrade control plane first, then workers controlPlaneFirst: false @@ -39,4 +39,4 @@ spec: - 192.168.1.250:50000 secretRef: name: talos-credentials - namespace: kangal-patch + namespace: default diff --git a/controllers/patchjob_controller.go b/controllers/patchjob_controller.go index b7c41e1..0243015 100644 --- a/controllers/patchjob_controller.go +++ b/controllers/patchjob_controller.go @@ -21,6 +21,7 @@ import ( "fmt" "time" + coordinationv1 "k8s.io/api/coordination/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime" @@ -38,7 +39,8 @@ import ( // PatchJobReconciler reconciles a PatchJob object type PatchJobReconciler struct { client.Client - Scheme *runtime.Scheme + Scheme *runtime.Scheme + Namespace string } // Requeue intervals for different phases @@ -135,6 +137,12 @@ func (r *PatchJobReconciler) initJob(ctx context.Context, patchJob *patchv1alpha // Continue anyway - node is at target version } + // Delete the lease since no patching is needed + if err := r.deletePatchJobLease(ctx, patchJob); err != nil { + logger.Error(err, "failed to delete lease", "node", patchJob.Spec.NodeName) + // Continue anyway - node is at target version + } + patchJob.Status.Phase = patchv1alpha1.PatchJobPhaseCompleted patchJob.Status.CurrentVersion = currentVersion patchJob.Status.TargetVersion = targetVersion @@ -416,3 +424,46 @@ func (r *PatchJobReconciler) waitForReboot(ctx context.Context, patchJob *patchv return ctrl.Result{}, nil } + +// deletePatchJobLease deletes the scheduling lease associated with this PatchJob. +func (r *PatchJobReconciler) deletePatchJobLease(ctx context.Context, patchJob *patchv1alpha1.PatchJob) error { + logger := log.FromContext(ctx) + + planName := patchJob.Spec.PatchPlanRef + if planName == "" { + return fmt.Errorf("patchPlanRef not set") + } + + var patchPlan patchv1alpha1.PatchPlan + if err := r.Get(ctx, types.NamespacedName{Name: planName}, &patchPlan); err != nil { + if errors.IsNotFound(err) { + return nil + } + return fmt.Errorf("failed to get PatchPlan %s: %w", planName, err) + } + + leaseName := fmt.Sprintf("%s-%s-scheduling", planName, patchJob.Spec.NodeName) + + lease := &coordinationv1.Lease{} + leaseKey := types.NamespacedName{ + Name: leaseName, + Namespace: r.Namespace, + } + + if err := r.Get(ctx, leaseKey, lease); err != nil { + if errors.IsNotFound(err) { + return nil + } + return fmt.Errorf("failed to get lease %s: %w", leaseName, err) + } + + if err := r.Delete(ctx, lease); err != nil { + if errors.IsNotFound(err) { + return nil + } + return fmt.Errorf("failed to delete lease %s: %w", leaseName, err) + } + + logger.Info("deleted lease for patch job", "lease", leaseName, "node", patchJob.Spec.NodeName) + return nil +}