Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions internal/controller/bucket/observe.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,29 @@ func (c *external) Observe(ctx context.Context, mg resource.Managed) (managed.Ex
}

if len(bucket.Status.AtProvider.Backends) == 0 {
if bucket.Spec.Disabled && bucket.GetDeletionTimestamp() == nil {
// This is an edge case:
// 1. The Bucket CR has no corresponding S3 bucket on any backends.
// 2. The Bucket CR is disabled.
// 3. The Bucket CR has NOT been deleted.
//
// Therefore we can assume its S3 buckets were never created on any backends or they
// were successfully removed when the CR was disabled. Either way, there is nothing
// for provider-ceph to do, so we return true/true for the external observation.
//
// This may seem counterintuitive given that the external resource does not exist.
// However, were we to return "ResourceExists: false" in this scenario, the crossplane
// runtime would update the "external-create-pending" annotation on the CR and call
// provider ceph's Create() method which will just return early as the CR is disabled.
// This results in an infinite loop of crossplane-runtime re-queuing the disabled CR
// and updating the same annotation repeatedly even though it is already in the desired
// state for a disabled CR. Over time these annotation writes will fill up the ETCD DB.
return managed.ExternalObservation{
ResourceExists: true,
ResourceUpToDate: true,
}, nil
}

return managed.ExternalObservation{
ResourceExists: false,
ResourceUpToDate: true,
Expand Down
69 changes: 69 additions & 0 deletions internal/controller/bucket/observe_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ import (
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
)

var (
Expand Down Expand Up @@ -471,3 +473,70 @@ func TestObserve(t *testing.T) {
})
}
}

// TestObserveDisabledBucketNoBackendsDoesNotTriggerCreate proves the fix for an etcd write loop.
//
// Root cause: a disabled bucket with empty backends caused Observe to return ResourceExists:false,
// which triggered the Crossplane reconciler to call Create on every cycle — 3 etcd writes per
// reconcile × thousands of buckets. The fix returns ResourceExists:true so Create is never invoked.
func TestObserveDisabledBucketNoBackendsDoesNotTriggerCreate(t *testing.T) {
t.Parallel()

scheme := runtime.NewScheme()
scheme.AddKnownTypes(v1alpha1.SchemeGroupVersion, &v1alpha1.Bucket{}, &v1alpha1.BucketList{})

bs := backendstore.NewBackendStore()
bs.AddOrUpdateBackend("s3-backend-1", nil, nil, apisv1alpha1.HealthStatusHealthy)

t.Run("disabled bucket with no backends returns ResourceExists:true preventing Create", func(t *testing.T) {
t.Parallel()

// Exact state that caused the loop: disabled=true, backends={}, condition not yet Unavailable.
bucket := &v1alpha1.Bucket{
ObjectMeta: metav1.ObjectMeta{Name: "disabled-no-backends"},
Spec: v1alpha1.BucketSpec{Disabled: true},
Status: v1alpha1.BucketStatus{
ResourceStatus: v1.ResourceStatus{
ConditionedStatus: v1.ConditionedStatus{
Conditions: []v1.Condition{v1.Creating()},
},
},
},
}

cl := fake.NewClientBuilder().
WithScheme(scheme).
WithObjects(bucket).
WithStatusSubresource(bucket).
Build()

e := external{kubeClient: cl, kubeReader: cl, backendStore: bs, log: logr.Discard()}

obs, err := e.Observe(context.Background(), bucket)
require.NoError(t, err)
// ResourceExists:true is the gate that stops the loop — the Crossplane reconciler only
// invokes Create (and its three annotation writes) when ResourceExists:false.
assert.True(t, obs.ResourceExists, "must be true to prevent Create being called")
assert.True(t, obs.ResourceUpToDate)
})

t.Run("non-disabled bucket with no backends still returns ResourceExists:false", func(t *testing.T) {
t.Parallel()

bucket := &v1alpha1.Bucket{
ObjectMeta: metav1.ObjectMeta{Name: "enabled-no-backends"},
}

cl := fake.NewClientBuilder().
WithScheme(scheme).
WithObjects(bucket).
WithStatusSubresource(bucket).
Build()

e := external{kubeClient: cl, kubeReader: cl, backendStore: bs, log: logr.Discard()}

obs, err := e.Observe(context.Background(), bucket)
require.NoError(t, err)
assert.False(t, obs.ResourceExists, "active bucket with no backends must still trigger Create")
})
}
Loading