diff --git a/internal/controller/bucket/observe.go b/internal/controller/bucket/observe.go index 0b264294..83dfaeb5 100644 --- a/internal/controller/bucket/observe.go +++ b/internal/controller/bucket/observe.go @@ -39,6 +39,29 @@ func (c *external) Observe(ctx context.Context, mg resource.Managed) (managed.Ex } if len(bucket.Status.AtProvider.Backends) == 0 { + if bucket.Spec.Disabled && bucket.GetDeletionTimestamp() == nil { + // This is an edge case: + // 1. The Bucket CR has no corresponding S3 bucket on any backends. + // 2. The Bucket CR is disabled. + // 3. The Bucket CR has NOT been deleted. + // + // Therefore we can assume its S3 buckets were never created on any backends or they + // were successfully removed when the CR was disabled. Either way, there is nothing + // for provider-ceph to do, so we return true/true for the external observation. + // + // This may seem counterintuitive given that the external resource does not exist. + // However, were we to return "ResourceExists: false" in this scenario, the crossplane + // runtime would update the "external-create-pending" annotation on the CR and call + // provider ceph's Create() method which will just return early as the CR is disabled. + // This results in an infinite loop of crossplane-runtime re-queuing the disabled CR + // and updating the same annotation repeatedly even though it is already in the desired + // state for a disabled CR. Over time these annotation writes will fill up the ETCD DB. + return managed.ExternalObservation{ + ResourceExists: true, + ResourceUpToDate: true, + }, nil + } + return managed.ExternalObservation{ ResourceExists: false, ResourceUpToDate: true, diff --git a/internal/controller/bucket/observe_test.go b/internal/controller/bucket/observe_test.go index 82931227..4ebae708 100644 --- a/internal/controller/bucket/observe_test.go +++ b/internal/controller/bucket/observe_test.go @@ -19,6 +19,8 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client/fake" ) var ( @@ -471,3 +473,70 @@ func TestObserve(t *testing.T) { }) } } + +// TestObserveDisabledBucketNoBackendsDoesNotTriggerCreate proves the fix for an etcd write loop. +// +// Root cause: a disabled bucket with empty backends caused Observe to return ResourceExists:false, +// which triggered the Crossplane reconciler to call Create on every cycle — 3 etcd writes per +// reconcile × thousands of buckets. The fix returns ResourceExists:true so Create is never invoked. +func TestObserveDisabledBucketNoBackendsDoesNotTriggerCreate(t *testing.T) { + t.Parallel() + + scheme := runtime.NewScheme() + scheme.AddKnownTypes(v1alpha1.SchemeGroupVersion, &v1alpha1.Bucket{}, &v1alpha1.BucketList{}) + + bs := backendstore.NewBackendStore() + bs.AddOrUpdateBackend("s3-backend-1", nil, nil, apisv1alpha1.HealthStatusHealthy) + + t.Run("disabled bucket with no backends returns ResourceExists:true preventing Create", func(t *testing.T) { + t.Parallel() + + // Exact state that caused the loop: disabled=true, backends={}, condition not yet Unavailable. + bucket := &v1alpha1.Bucket{ + ObjectMeta: metav1.ObjectMeta{Name: "disabled-no-backends"}, + Spec: v1alpha1.BucketSpec{Disabled: true}, + Status: v1alpha1.BucketStatus{ + ResourceStatus: v1.ResourceStatus{ + ConditionedStatus: v1.ConditionedStatus{ + Conditions: []v1.Condition{v1.Creating()}, + }, + }, + }, + } + + cl := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(bucket). + WithStatusSubresource(bucket). + Build() + + e := external{kubeClient: cl, kubeReader: cl, backendStore: bs, log: logr.Discard()} + + obs, err := e.Observe(context.Background(), bucket) + require.NoError(t, err) + // ResourceExists:true is the gate that stops the loop — the Crossplane reconciler only + // invokes Create (and its three annotation writes) when ResourceExists:false. + assert.True(t, obs.ResourceExists, "must be true to prevent Create being called") + assert.True(t, obs.ResourceUpToDate) + }) + + t.Run("non-disabled bucket with no backends still returns ResourceExists:false", func(t *testing.T) { + t.Parallel() + + bucket := &v1alpha1.Bucket{ + ObjectMeta: metav1.ObjectMeta{Name: "enabled-no-backends"}, + } + + cl := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(bucket). + WithStatusSubresource(bucket). + Build() + + e := external{kubeClient: cl, kubeReader: cl, backendStore: bs, log: logr.Discard()} + + obs, err := e.Observe(context.Background(), bucket) + require.NoError(t, err) + assert.False(t, obs.ResourceExists, "active bucket with no backends must still trigger Create") + }) +}