@@ -2,7 +2,9 @@ package operator
22
33import (
44 "context"
5+ goerrors "errors"
56 "fmt"
7+ "k8s.io/apimachinery/pkg/api/errors"
68 "slices"
79 "sort"
810 "strings"
@@ -11,7 +13,6 @@ import (
1113 "github.com/hashicorp/go-multierror"
1214 "go.uber.org/zap"
1315 "golang.org/x/xerrors"
14- "k8s.io/apimachinery/pkg/api/errors"
1516 "k8s.io/apimachinery/pkg/runtime"
1617 "k8s.io/apimachinery/pkg/util/intstr"
1718 "k8s.io/utils/ptr"
@@ -1611,6 +1612,7 @@ func (r *ShardedClusterReconcileHelper) cleanOpsManagerState(ctx context.Context
16111612 }
16121613
16131614 logDiffOfProcessNames (processNames , r .getHealthyProcessNames (), log .With ("ctx" , "cleanOpsManagerState" ))
1615+ // we're onDelete, we cannot requeue, so we need to wait
16141616 if err := om .WaitForReadyState (conn , r .getHealthyProcessNames (), false , log ); err != nil {
16151617 return err
16161618 }
@@ -1849,13 +1851,12 @@ func (r *ShardedClusterReconcileHelper) updateOmDeploymentShardedCluster(ctx con
18491851
18501852 healthyProcessesToWaitForReadyState := r .getHealthyProcessNamesToWaitForReadyState (conn , log )
18511853 logDiffOfProcessNames (processNames , healthyProcessesToWaitForReadyState , log .With ("ctx" , "updateOmDeploymentShardedCluster" ))
1852- if err = om .WaitForReadyState (conn , healthyProcessesToWaitForReadyState , isRecovering , log ); err != nil {
1853- if ! isRecovering {
1854- if shardsRemoving {
1855- return workflow .Pending ("automation agents haven't reached READY state: shards removal in progress: %v" , err )
1856- }
1857- return workflow .Failed (err )
1854+
1855+ if ! isRecovering {
1856+ if workflowStatus := om .CheckForReadyState (conn , healthyProcessesToWaitForReadyState , log ); ! workflowStatus .IsOK () {
1857+ return workflowStatus
18581858 }
1859+ } else {
18591860 logWarnIgnoredDueToRecovery (log , err )
18601861 }
18611862
@@ -1873,12 +1874,16 @@ func (r *ShardedClusterReconcileHelper) updateOmDeploymentShardedCluster(ctx con
18731874
18741875 healthyProcessesToWaitForReadyState := r .getHealthyProcessNamesToWaitForReadyState (conn , log )
18751876 logDiffOfProcessNames (processNames , healthyProcessesToWaitForReadyState , log .With ("ctx" , "shardsRemoving" ))
1876- if err = om .WaitForReadyState (conn , healthyProcessesToWaitForReadyState , isRecovering , log ); err != nil {
1877- if ! isRecovering {
1878- return workflow .Failed (xerrors .Errorf ("automation agents haven't reached READY state while cleaning replica set and processes: %w" , err ))
1879- }
1877+ if isRecovering {
18801878 logWarnIgnoredDueToRecovery (log , err )
18811879 }
1880+ if err = om .CheckForReadyStateReturningError (conn , healthyProcessesToWaitForReadyState , log ); err != nil {
1881+ pendingErr := om.PendingErr {}
1882+ if ok := goerrors .As (err , & pendingErr ); ok {
1883+ return workflow .Pending (pendingErr .Error ())
1884+ }
1885+ return workflow .Failed (err )
1886+ }
18821887 }
18831888
18841889 currentHosts := r .getAllHostnames (false )
@@ -2042,8 +2047,13 @@ func (r *ShardedClusterReconcileHelper) publishDeployment(ctx context.Context, c
20422047
20432048 healthyProcessesToWaitForReadyState = r .getHealthyProcessNamesToWaitForReadyState (conn , log )
20442049 logDiffOfProcessNames (opts .processNames , healthyProcessesToWaitForReadyState , log .With ("ctx" , "publishDeployment" ))
2045- if err := om .WaitForReadyState (conn , healthyProcessesToWaitForReadyState , isRecovering , log ); err != nil {
2046- return nil , shardsRemoving , workflow .Failed (err )
2050+
2051+ if ! isRecovering {
2052+ if workflowStatus := om .CheckForReadyState (conn , healthyProcessesToWaitForReadyState , log ); workflowStatus != workflow .OK () {
2053+ return nil , shardsRemoving , workflowStatus
2054+ }
2055+ } else {
2056+ log .Warnf ("Ignoring checking for ready state due to recovering" )
20472057 }
20482058
20492059 if additionalReconciliationRequired {
0 commit comments