Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .mise.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[tools]
go = "1.25.1"

# Keep Go from auto switching toolchains
[env]
GOTOOLCHAIN = "local"
2 changes: 1 addition & 1 deletion RELEASE_VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3.2.6
3.2.7
17 changes: 6 additions & 11 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,6 @@ function precheck() {
fi
fi

if [[ -z "$GOPATH" ]]; then
echo "GOPATH not set"
ok=1
fi

if [[ ! -x "$( which go )" ]]; then
echo "go binary not found in PATH"
ok=1
Expand Down Expand Up @@ -214,18 +209,18 @@ package_linux() {
[ $do_tar -eq 1 ] && tar -C $build_path/orchestrator -czf $release_base_path/orchestrator-"${RELEASE_VERSION}"-$target-$arch.tar.gz ./

debug "Creating Distro full packages"
[ $do_rpm -eq 1 ] && fpm -v "${RELEASE_VERSION}" --epoch 1 -f -s dir -n orchestrator -m shlomi-noach --description "MySQL replication topology management and HA" --url "https://github.com/openark/orchestrator" --vendor "GitHub" --license "Apache 2.0" -C $build_path/orchestrator --prefix=/ --config-files /usr/local/orchestrator/resources/public/css/custom.css --config-files /usr/local/orchestrator/resources/public/js/custom.js --depends 'jq >= 1.5' -t rpm .
[ $do_deb -eq 1 ] && fpm -v "${RELEASE_VERSION}" --epoch 1 -f -s dir -n orchestrator -m shlomi-noach --description "MySQL replication topology management and HA" --url "https://github.com/openark/orchestrator" --vendor "GitHub" --license "Apache 2.0" -C $build_path/orchestrator --prefix=/ --config-files /usr/local/orchestrator/resources/public/css/custom.css --config-files /usr/local/orchestrator/resources/public/js/custom.js --depends 'jq >= 1.5' -t deb --deb-no-default-config-files .
[ $do_rpm -eq 1 ] && fpm -a "$arch" -v "${RELEASE_VERSION}" --epoch 1 -f -s dir -n orchestrator -m shlomi-noach --description "MySQL replication topology management and HA" --url "https://github.com/openark/orchestrator" --vendor "GitHub" --license "Apache 2.0" -C $build_path/orchestrator --prefix=/ --config-files /usr/local/orchestrator/resources/public/css/custom.css --config-files /usr/local/orchestrator/resources/public/js/custom.js --depends 'jq >= 1.5' -t rpm .
[ $do_deb -eq 1 ] && fpm -a "$arch" -v "${RELEASE_VERSION}" --epoch 1 -f -s dir -n orchestrator -m shlomi-noach --description "MySQL replication topology management and HA" --url "https://github.com/openark/orchestrator" --vendor "GitHub" --license "Apache 2.0" -C $build_path/orchestrator --prefix=/ --config-files /usr/local/orchestrator/resources/public/css/custom.css --config-files /usr/local/orchestrator/resources/public/js/custom.js --depends 'jq >= 1.5' -t deb --deb-no-default-config-files .

debug "Creating Distro cli packages"
# orchestrator-cli packaging -- executable only
[ $do_rpm -eq 1 ] && fpm -v "${RELEASE_VERSION}" --epoch 1 -f -s dir -n orchestrator-cli -m shlomi-noach --description "MySQL replication topology management and HA: binary only" --url "https://github.com/openark/orchestrator" --vendor "GitHub" --license "Apache 2.0" -C $build_path/orchestrator-cli --prefix=/ --depends 'jq >= 1.5' -t rpm .
[ $do_deb -eq 1 ] && fpm -v "${RELEASE_VERSION}" --epoch 1 -f -s dir -n orchestrator-cli -m shlomi-noach --description "MySQL replication topology management and HA: binary only" --url "https://github.com/openark/orchestrator" --vendor "GitHub" --license "Apache 2.0" -C $build_path/orchestrator-cli --prefix=/ --depends 'jq >= 1.5' -t deb --deb-no-default-config-files .
[ $do_rpm -eq 1 ] && fpm -a "$arch" -v "${RELEASE_VERSION}" --epoch 1 -f -s dir -n orchestrator-cli -m shlomi-noach --description "MySQL replication topology management and HA: binary only" --url "https://github.com/openark/orchestrator" --vendor "GitHub" --license "Apache 2.0" -C $build_path/orchestrator-cli --prefix=/ --depends 'jq >= 1.5' -t rpm .
[ $do_deb -eq 1 ] && fpm -a "$arch" -v "${RELEASE_VERSION}" --epoch 1 -f -s dir -n orchestrator-cli -m shlomi-noach --description "MySQL replication topology management and HA: binary only" --url "https://github.com/openark/orchestrator" --vendor "GitHub" --license "Apache 2.0" -C $build_path/orchestrator-cli --prefix=/ --depends 'jq >= 1.5' -t deb --deb-no-default-config-files .

debug "Creating Distro orchestrator-client packages"
# orchestrator-client packaging -- shell script only
[ $do_rpm -eq 1 ] && fpm -v "${RELEASE_VERSION}" --epoch 1 -f -s dir -n orchestrator-client -m shlomi-noach --description "MySQL replication topology management and HA: client script" --url "https://github.com/openark/orchestrator" --vendor "GitHub" --license "Apache 2.0" -C $build_path/orchestrator-client --prefix=/ --depends 'jq >= 1.5' -t rpm .
[ $do_deb -eq 1 ] && fpm -v "${RELEASE_VERSION}" --epoch 1 -f -s dir -n orchestrator-client -m shlomi-noach --description "MySQL replication topology management and HA: client script" --url "https://github.com/openark/orchestrator" --vendor "GitHub" --license "Apache 2.0" -C $build_path/orchestrator-client --prefix=/ --depends 'jq >= 1.5' -t deb --deb-no-default-config-files .
[ $do_rpm -eq 1 ] && fpm -a "$arch" -v "${RELEASE_VERSION}" --epoch 1 -f -s dir -n orchestrator-client -m shlomi-noach --description "MySQL replication topology management and HA: client script" --url "https://github.com/openark/orchestrator" --vendor "GitHub" --license "Apache 2.0" -C $build_path/orchestrator-client --prefix=/ --depends 'jq >= 1.5' -t rpm .
[ $do_deb -eq 1 ] && fpm -a "$arch" -v "${RELEASE_VERSION}" --epoch 1 -f -s dir -n orchestrator-client -m shlomi-noach --description "MySQL replication topology management and HA: client script" --url "https://github.com/openark/orchestrator" --vendor "GitHub" --license "Apache 2.0" -C $build_path/orchestrator-client --prefix=/ --depends 'jq >= 1.5' -t deb --deb-no-default-config-files .

if [ -n "$package_name_extra" ] ; then
# Strip version core out of sting like "3.2.6-pre123+g1234567" to "3.2.6".
Expand Down
8 changes: 4 additions & 4 deletions go/app/cli.go
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ func Cli(command string, strict bool, instance string, destination string, owner
}
validateInstanceIsFound(instanceKey)

lostReplicas, equalReplicas, aheadReplicas, cannotReplicateReplicas, promotedReplica, err := inst.RegroupReplicas(instanceKey, false, func(candidateReplica *inst.Instance) { fmt.Println(candidateReplica.Key.DisplayString()) }, postponedFunctionsContainer)
lostReplicas, equalReplicas, aheadReplicas, cannotReplicateReplicas, promotedReplica, err := inst.RegroupReplicas(instanceKey, false, true, func(candidateReplica *inst.Instance) { fmt.Println(candidateReplica.Key.DisplayString()) }, postponedFunctionsContainer)
lostReplicas = append(lostReplicas, cannotReplicateReplicas...)

postponedFunctionsContainer.Wait()
Expand Down Expand Up @@ -387,7 +387,7 @@ func Cli(command string, strict bool, instance string, destination string, owner
log.Fatal("Cannot deduce instance:", instance)
}

instance, _, _, _, _, err := inst.GetCandidateReplica(instanceKey, false)
instance, _, _, _, _, err := inst.GetCandidateReplica(instanceKey, false, false)
if err != nil {
log.Fatale(err)
} else {
Expand Down Expand Up @@ -450,7 +450,7 @@ func Cli(command string, strict bool, instance string, destination string, owner
}
validateInstanceIsFound(instanceKey)

lostReplicas, movedReplicas, cannotReplicateReplicas, promotedReplica, err := inst.RegroupReplicasGTID(instanceKey, false, true, func(candidateReplica *inst.Instance) { fmt.Println(candidateReplica.Key.DisplayString()) }, postponedFunctionsContainer, nil)
lostReplicas, movedReplicas, cannotReplicateReplicas, promotedReplica, err := inst.RegroupReplicasGTID(instanceKey, false, true, true, func(candidateReplica *inst.Instance) { fmt.Println(candidateReplica.Key.DisplayString()) }, postponedFunctionsContainer, nil)
lostReplicas = append(lostReplicas, cannotReplicateReplicas...)

if promotedReplica == nil {
Expand Down Expand Up @@ -1474,7 +1474,7 @@ func Cli(command string, strict bool, instance string, destination string, owner
log.Fatal("Cannot deduce instance:", instance)
}

recoveryAttempted, promotedInstanceKey, err := logic.CheckAndRecover(instanceKey, destinationKey, (command == "recover-lite"))
recoveryAttempted, promotedInstanceKey, err := logic.CheckAndRecover(instanceKey, destinationKey, (command == "recover-lite"), true)
if err != nil {
log.Fatale(err)
}
Expand Down
1 change: 1 addition & 0 deletions go/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,7 @@ type Configuration struct {
SkipBinlogEventsContaining []string // When scanning/comparing binlogs for Pseudo-GTID, skip entries containing given texts. These are NOT regular expressions (would consume too much CPU while scanning binlogs), just substrings to find.
ReduceReplicationAnalysisCount bool // When true, replication analysis will only report instances where possibility of handled problems is possible in the first place (e.g. will not report most leaf nodes, that are mostly uninteresting). When false, provides an entry for every known instance
FailureDetectionPeriodBlockMinutes int // The time for which an instance's failure discovery is kept "active", so as to avoid concurrent "discoveries" of the instance's failure; this precedes any recovery process, if any.
RecoveryBlockCrossDatacenterFailovers bool // When true, recovery of a master in one datacenter will not result in failover to a replica in another datacenter. Such failovers must be triggered manually via the UI or API
RecoveryPeriodBlockMinutes int // (supported for backwards compatibility but please use newer `RecoveryPeriodBlockSeconds` instead) The time for which an instance's recovery is kept "active", so as to avoid concurrent recoveries on same instance as well as flapping
RecoveryPeriodBlockSeconds int // (overrides `RecoveryPeriodBlockMinutes`) The time for which an instance's recovery is kept "active", so as to avoid concurrent recoveries on same instance as well as flapping
RecoveryIgnoreHostnameFilters []string // Recovery analysis will completely ignore hosts matching given patterns
Expand Down
6 changes: 3 additions & 3 deletions go/http/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -1159,7 +1159,7 @@ func (this *HttpAPI) RegroupReplicas(params martini.Params, r render.Render, req
return
}

lostReplicas, equalReplicas, aheadReplicas, cannotReplicateReplicas, promotedReplica, err := inst.RegroupReplicas(&instanceKey, false, nil, nil)
lostReplicas, equalReplicas, aheadReplicas, cannotReplicateReplicas, promotedReplica, err := inst.RegroupReplicas(&instanceKey, false, true, nil, nil)
lostReplicas = append(lostReplicas, cannotReplicateReplicas...)
if err != nil {
Respond(r, &APIResponse{Code: ERROR, Message: err.Error()})
Expand Down Expand Up @@ -1207,7 +1207,7 @@ func (this *HttpAPI) RegroupReplicasGTID(params martini.Params, r render.Render,
return
}

lostReplicas, movedReplicas, cannotReplicateReplicas, promotedReplica, err := inst.RegroupReplicasGTID(&instanceKey, false, true, nil, nil, nil)
lostReplicas, movedReplicas, cannotReplicateReplicas, promotedReplica, err := inst.RegroupReplicasGTID(&instanceKey, false, true, true, nil, nil, nil)
lostReplicas = append(lostReplicas, cannotReplicateReplicas...)

if err != nil {
Expand Down Expand Up @@ -3237,7 +3237,7 @@ func (this *HttpAPI) Recover(params martini.Params, r render.Render, req *http.R
}

skipProcesses := (req.URL.Query().Get("skipProcesses") == "true") || (params["skipProcesses"] == "true")
recoveryAttempted, promotedInstanceKey, err := logic.CheckAndRecover(&instanceKey, candidateKey, skipProcesses)
recoveryAttempted, promotedInstanceKey, err := logic.CheckAndRecover(&instanceKey, candidateKey, skipProcesses, true)
if err != nil {
Respond(r, &APIResponse{Code: ERROR, Message: err.Error(), Details: instanceKey})
return
Expand Down
2 changes: 1 addition & 1 deletion go/inst/analysis_dao.go
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,7 @@ func GetReplicationAnalysis(clusterName string, hints *ReplicationAnalysisHints)
a.ClusterDetails.ClusterName, a.IsMaster, a.LastCheckValid, a.LastCheckPartialSuccess, a.CountReplicas, a.CountValidReplicas, a.CountValidReplicatingReplicas, a.CountLaggingReplicas, a.CountDelayedReplicas, a.CountReplicasFailingToConnectToMaster,
)
if util.ClearToLog("analysis_dao", analysisMessage) {
log.Debugf(analysisMessage)
log.Debug(analysisMessage)
}
}
if !a.IsReplicationGroupMember /* Traditional Async/Semi-sync replication issue detection */ {
Expand Down
2 changes: 1 addition & 1 deletion go/inst/audit_dao.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ func AuditOperation(auditType string, instanceKey *InstanceKey, message string)
}()
}
if !auditWrittenToFile {
log.Infof(logMessage)
log.Info(logMessage)
}
auditOperationCounter.Inc(1)

Expand Down
2 changes: 1 addition & 1 deletion go/inst/instance_dao.go
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ func logReadTopologyInstanceError(instanceKey *InstanceKey, hint string, err err
strings.Replace(hint, "%", "%%", -1), // escape %
err)
}
return log.Errorf(msg)
return log.Error(msg)
}

// ReadTopologyInstance collects information on the state of a MySQL
Expand Down
21 changes: 14 additions & 7 deletions go/inst/instance_topology.go
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ Cleanup:

if err == nil {
message := fmt.Sprintf("moved %+v via equivalence coordinates below %+v", *instanceKey, *otherKey)
log.Debugf(message)
log.Debug(message)
AuditOperation("move-equivalent", instanceKey, message)
}
return instance, err
Expand Down Expand Up @@ -2264,7 +2264,7 @@ func chooseCandidateReplica(replicas [](*Instance)) (candidateReplica *Instance,
}

// GetCandidateReplica chooses the best replica to promote given a (possibly dead) master
func GetCandidateReplica(masterKey *InstanceKey, forRematchPurposes bool) (*Instance, [](*Instance), [](*Instance), [](*Instance), [](*Instance), error) {
func GetCandidateReplica(masterKey *InstanceKey, forRematchPurposes bool, isGraceful bool) (*Instance, [](*Instance), [](*Instance), [](*Instance), [](*Instance), error) {
var candidateReplica *Instance
aheadReplicas := [](*Instance){}
equalReplicas := [](*Instance){}
Expand Down Expand Up @@ -2292,6 +2292,12 @@ func GetCandidateReplica(masterKey *InstanceKey, forRematchPurposes bool) (*Inst
return candidateReplica, aheadReplicas, equalReplicas, laterReplicas, cannotReplicateReplicas, err
}
if candidateReplica != nil {
AuditOperation("get-candidate-replica", masterKey, fmt.Sprintf("Graceful: %v, should block cross DC failovers: %v in DC: %v", isGraceful, config.Config.RecoveryBlockCrossDatacenterFailovers, dataCenterHint))
// In automatic failover cases, respect cross-datacenter failover configuration
if !isGraceful && config.Config.RecoveryBlockCrossDatacenterFailovers && dataCenterHint != "" && candidateReplica.DataCenter != dataCenterHint {
AuditOperation("get-candidate-replica", masterKey, fmt.Sprintf("Candidate replica %+v is in different data center (%v) than master %+v (%v), automatic failover blocked", candidateReplica.Key, candidateReplica.DataCenter, *masterKey, dataCenterHint))
return nil, aheadReplicas, equalReplicas, laterReplicas, cannotReplicateReplicas, fmt.Errorf("candidate replica %+v is in different data center (%v) than master %+v (%v), automatic failover blocked", candidateReplica.Key, candidateReplica.DataCenter, *masterKey, dataCenterHint)
}
mostUpToDateReplica := replicas[0]
if candidateReplica.ExecBinlogCoordinates.SmallerThan(&mostUpToDateReplica.ExecBinlogCoordinates) {
log.Warningf("GetCandidateReplica: chosen replica: %+v is behind most-up-to-date replica: %+v", candidateReplica.Key, mostUpToDateReplica.Key)
Expand Down Expand Up @@ -2344,7 +2350,7 @@ func RegroupReplicasPseudoGTID(
candidateReplica *Instance,
err error,
) {
candidateReplica, aheadReplicas, equalReplicas, laterReplicas, cannotReplicateReplicas, err = GetCandidateReplica(masterKey, true)
candidateReplica, aheadReplicas, equalReplicas, laterReplicas, cannotReplicateReplicas, err = GetCandidateReplica(masterKey, true, false)
if err != nil {
if !returnReplicaEvenOnFailureToRegroup {
candidateReplica = nil
Expand Down Expand Up @@ -2461,7 +2467,7 @@ func RegroupReplicasPseudoGTIDIncludingSubReplicasOfBinlogServers(
log.Debugf("RegroupReplicasIncludingSubReplicasOfBinlogServers: most up to date binlog server of %+v: %+v", *masterKey, mostUpToDateBinlogServer.Key)

// Find the most up to date candidate replica:
candidateReplica, _, _, _, _, err := GetCandidateReplica(masterKey, true)
candidateReplica, _, _, _, _, err := GetCandidateReplica(masterKey, true, false)
if err != nil {
return log.Errore(err)
}
Expand Down Expand Up @@ -2515,6 +2521,7 @@ func RegroupReplicasGTID(
masterKey *InstanceKey,
returnReplicaEvenOnFailureToRegroup bool,
startReplicationOnCandidate bool,
isGraceful bool,
onCandidateReplicaChosen func(*Instance),
postponedFunctionsContainer *PostponedFunctionsContainer,
postponeAllMatchOperations func(*Instance, bool) bool,
Expand All @@ -2527,7 +2534,7 @@ func RegroupReplicasGTID(
) {
var emptyReplicas [](*Instance)
var unmovedReplicas [](*Instance)
candidateReplica, aheadReplicas, equalReplicas, laterReplicas, cannotReplicateReplicas, err := GetCandidateReplica(masterKey, true)
candidateReplica, aheadReplicas, equalReplicas, laterReplicas, cannotReplicateReplicas, err := GetCandidateReplica(masterKey, true, isGraceful)
if err != nil {
if !returnReplicaEvenOnFailureToRegroup {
candidateReplica = nil
Expand Down Expand Up @@ -2597,7 +2604,7 @@ func RegroupReplicasBinlogServers(masterKey *InstanceKey, returnReplicaEvenOnFai

// RegroupReplicas is a "smart" method of promoting one replica over the others ("promoting" it on top of its siblings)
// This method decides which strategy to use: GTID, Pseudo-GTID, Binlog Servers.
func RegroupReplicas(masterKey *InstanceKey, returnReplicaEvenOnFailureToRegroup bool,
func RegroupReplicas(masterKey *InstanceKey, returnReplicaEvenOnFailureToRegroup bool, isGraceful bool,
onCandidateReplicaChosen func(*Instance),
postponedFunctionsContainer *PostponedFunctionsContainer) (

Expand Down Expand Up @@ -2637,7 +2644,7 @@ func RegroupReplicas(masterKey *InstanceKey, returnReplicaEvenOnFailureToRegroup
}
if allGTID {
log.Debugf("RegroupReplicas: using GTID to regroup replicas of %+v", *masterKey)
unmovedReplicas, movedReplicas, cannotReplicateReplicas, candidateReplica, err := RegroupReplicasGTID(masterKey, returnReplicaEvenOnFailureToRegroup, true, onCandidateReplicaChosen, nil, nil)
unmovedReplicas, movedReplicas, cannotReplicateReplicas, candidateReplica, err := RegroupReplicasGTID(masterKey, returnReplicaEvenOnFailureToRegroup, true, isGraceful, onCandidateReplicaChosen, nil, nil)
return unmovedReplicas, emptyReplicas, movedReplicas, cannotReplicateReplicas, candidateReplica, err
}
if allBinlogServers {
Expand Down
3 changes: 2 additions & 1 deletion go/inst/instance_topology_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@ package inst
import (
"math/rand"

"testing"

"github.com/openark/golib/log"
test "github.com/openark/golib/tests"
"github.com/openark/orchestrator/go/config"
"testing"
)

var (
Expand Down
2 changes: 1 addition & 1 deletion go/logic/orchestrator.go
Original file line number Diff line number Diff line change
Expand Up @@ -681,7 +681,7 @@ func ContinuousDiscovery() {
return
}
if runCheckAndRecoverOperationsTimeRipe() {
CheckAndRecover(nil, nil, false)
CheckAndRecover(nil, nil, false, false)
} else {
log.Debugf("Waiting for %+v seconds to pass before running failure detection/recovery", checkAndRecoverWaitPeriod.Seconds())
}
Expand Down
Loading
Loading