Skip to content

Commit 8bd3bba

Browse files
author
ukumawat
committed
HBASE-28158 handle balancer throttling
1 parent 99caa2c commit 8bd3bba

File tree

8 files changed

+38
-24
lines changed

8 files changed

+38
-24
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2007,7 +2007,7 @@ private void balanceThrottling(long nextBalanceStartTime, int maxRegionsInTransi
20072007
// But if there are zero regions in transition, it can skip sleep to speed up.
20082008
while (
20092009
!interrupted && EnvironmentEdgeManager.currentTime() < nextBalanceStartTime
2010-
&& this.assignmentManager.hasRegionsInTransition()
2010+
&& this.assignmentManager.getOngoingTRSPCount()>0
20112011
) {
20122012
try {
20132013
Thread.sleep(100);
@@ -2019,7 +2019,7 @@ private void balanceThrottling(long nextBalanceStartTime, int maxRegionsInTransi
20192019
// Throttling by max number regions in transition
20202020
while (
20212021
!interrupted && maxRegionsInTransition > 0
2022-
&& this.assignmentManager.getRegionsInTransitionCount() >= maxRegionsInTransition
2022+
&& this.assignmentManager.getOngoingTRSPCount() >= maxRegionsInTransition
20232023
&& EnvironmentEdgeManager.currentTime() <= cutoffTime
20242024
) {
20252025
try {
@@ -2098,7 +2098,7 @@ public BalanceResponse balance(BalanceRequest request) throws IOException {
20982098

20992099
synchronized (this.balancer) {
21002100
// Only allow one balance run at at time.
2101-
if (this.assignmentManager.hasRegionsInTransition()) {
2101+
if (this.assignmentManager.getOngoingTRSPCount() >0) {
21022102
List<RegionStateNode> regionsInTransition = assignmentManager.getRegionsInTransition();
21032103
// if hbase:meta region is in transition, result of assignment cannot be recorded
21042104
// ignore the force flag in that case
@@ -2113,7 +2113,7 @@ public BalanceResponse balance(BalanceRequest request) throws IOException {
21132113

21142114
if (!request.isIgnoreRegionsInTransition() || metaInTransition) {
21152115
LOG.info("Not running balancer (ignoreRIT=false" + ", metaRIT=" + metaInTransition
2116-
+ ") because " + regionsInTransition.size() + " region(s) in transition: " + toPrint
2116+
+ ") because " + assignmentManager.getOngoingTRSPCount() + " region(s) in transition: " + toPrint
21172117
+ (truncated ? "(truncated list)" : ""));
21182118
return responseBuilder.build();
21192119
}
@@ -2250,7 +2250,7 @@ public boolean normalizeRegions(final NormalizeTableFilterParams ntfp,
22502250
if (skipRegionManagementAction("region normalizer")) {
22512251
return false;
22522252
}
2253-
if (assignmentManager.hasRegionsInTransition()) {
2253+
if (assignmentManager.getOngoingTRSPCount() > 0) {
22542254
return false;
22552255
}
22562256

hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2080,14 +2080,16 @@ public boolean hasRegionsInTransition() {
20802080
}
20812081

20822082
public List<RegionStateNode> getRegionsInTransition() {
2083-
return new ArrayList<RegionStateNode>(
2084-
regionInTransitionTracker.getRegionsInTransition().values());
2083+
return regionInTransitionTracker.getRegionsInTransition();
20852084
}
20862085

20872086
public boolean isRegionInTransition(final RegionInfo regionInfo) {
20882087
return regionInTransitionTracker.isRegionInTransition(regionInfo);
20892088
}
20902089

2090+
public int getOngoingTRSPCount() {
2091+
return regionStates.getOngoingTRSPCount();
2092+
}
20912093
/**
20922094
* Get the number of regions in transition.
20932095
*/
@@ -2096,11 +2098,7 @@ public int getRegionsInTransitionCount() {
20962098
}
20972099

20982100
public List<RegionState> getRegionsStateInTransition() {
2099-
final List<RegionState> rit = new ArrayList<RegionState>(getRegionsInTransitionCount());
2100-
for (RegionStateNode node : getRegionsInTransition()) {
2101-
rit.add(node.toRegionState());
2102-
}
2103-
return rit;
2101+
return getRegionsInTransition().stream().map(RegionStateNode::toRegionState).toList();
21042102
}
21052103

21062104
public SortedSet<RegionState> getRegionsInTransitionOrderedByTimestamp() {
@@ -2365,16 +2363,14 @@ public void markRegionAsSplit(final RegionInfo parent, final ServerName serverNa
23652363
public void markRegionAsMerged(final RegionInfo child, final ServerName serverName,
23662364
RegionInfo[] mergeParents) throws IOException {
23672365
final RegionStateNode node = regionStates.getOrCreateRegionStateNode(child);
2368-
node.setState(State.MERGED);
23692366
for (RegionInfo ri : mergeParents) {
23702367
regionStates.deleteRegion(ri);
2368+
regionInTransitionTracker.handleRegionDelete(ri);
23712369
}
23722370
// TODO need to handle delete and new region
23732371
TableDescriptor td = master.getTableDescriptors().get(child.getTable());
23742372
regionStateStore.mergeRegions(child, mergeParents, serverName, td);
23752373
regionInTransitionTracker.handleRegionStateNodeOperation(node);
2376-
Arrays.stream(mergeParents).forEach(regionInfo -> regionInTransitionTracker
2377-
.handleRegionStateNodeOperation(regionStates.getRegionStateNode(regionInfo)));
23782374
if (shouldAssignFavoredNodes(child)) {
23792375
getFavoredNodePromoter().generateFavoredNodesForMergedRegion(child, mergeParents);
23802376
}

hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManagerUtil.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ private static void unlock(List<RegionStateNode> regionNodes) {
9090

9191
static TransitRegionStateProcedure[] createUnassignProceduresForSplitOrMerge(
9292
MasterProcedureEnv env, Stream<RegionInfo> regions, int regionReplication) throws IOException {
93+
//TODO code to get all the replica
9394
List<RegionStateNode> regionNodes = regions
9495
.flatMap(hri -> IntStream.range(0, regionReplication)
9596
.mapToObj(i -> RegionReplicaUtil.getRegionInfoForReplica(hri, i)))
@@ -281,6 +282,7 @@ static void reopenRegionsForRollback(MasterProcedureEnv env, List<RegionInfo> re
281282
static void removeNonDefaultReplicas(MasterProcedureEnv env, Stream<RegionInfo> regions,
282283
int regionReplication) {
283284
// Remove from in-memory states
285+
//TODO should we not confirm here that replica region are closed or not ?
284286
regions.flatMap(hri -> IntStream.range(1, regionReplication)
285287
.mapToObj(i -> RegionReplicaUtil.getRegionInfoForReplica(hri, i))).forEach(hri -> {
286288
env.getAssignmentManager().getRegionStates().deleteRegion(hri);

hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionInTransitionTracker.java

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
*/
1818
package org.apache.hadoop.hbase.master.assignment;
1919

20+
import java.util.ArrayList;
2021
import java.util.List;
2122
import java.util.concurrent.ConcurrentSkipListMap;
2223
import org.apache.hadoop.hbase.client.RegionInfo;
@@ -51,7 +52,11 @@ public boolean isRegionInTransition(final RegionInfo regionInfo) {
5152

5253
public void handleRegionStateNodeOperation(RegionStateNode regionStateNode) {
5354
RegionState.State currentState = regionStateNode.getState();
54-
// if reiong is merged or split it should not be in RIT list
55+
//only consider default replica for availability
56+
if(regionStateNode.getRegionInfo().getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID){
57+
return;
58+
}
59+
// if region is merged or split it should not be in RIT list
5560
if (
5661
currentState == RegionState.State.SPLIT || currentState == RegionState.State.MERGED
5762
|| regionStateNode.getRegionInfo().isSplit()
@@ -64,8 +69,8 @@ public void handleRegionStateNodeOperation(RegionStateNode regionStateNode) {
6469
}
6570
}
6671

67-
public void handleRegionDelete(RegionStateNode regionStateNode) {
68-
removeRegionInTransition(regionStateNode.getRegionInfo());
72+
public void handleRegionDelete(RegionInfo regionInfo) {
73+
removeRegionInTransition(regionInfo);
6974
}
7075

7176
private List<RegionState.State> getExceptedRegionStates(RegionStateNode regionStateNode) {
@@ -99,8 +104,8 @@ public boolean hasRegionsInTransition() {
99104
return !regionInTransition.isEmpty();
100105
}
101106

102-
public ConcurrentSkipListMap<RegionInfo, RegionStateNode> getRegionsInTransition() {
103-
return regionInTransition;
107+
public List<RegionStateNode> getRegionsInTransition() {
108+
return new ArrayList<>(regionInTransition.values());
104109
}
105110

106111
}

hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateNode.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
package org.apache.hadoop.hbase.master.assignment;
1919

2020
import java.util.Arrays;
21+
import java.util.concurrent.atomic.AtomicInteger;
2122
import org.apache.hadoop.hbase.HConstants;
2223
import org.apache.hadoop.hbase.ServerName;
2324
import org.apache.hadoop.hbase.TableName;
@@ -67,6 +68,7 @@
6768
public class RegionStateNode implements Comparable<RegionStateNode> {
6869

6970
private static final Logger LOG = LoggerFactory.getLogger(RegionStateNode.class);
71+
private final AtomicInteger trspCounter;
7072

7173
private static final class AssignmentProcedureEvent extends ProcedureEvent<RegionInfo> {
7274
public AssignmentProcedureEvent(final RegionInfo regionInfo) {
@@ -100,10 +102,11 @@ public AssignmentProcedureEvent(final RegionInfo regionInfo) {
100102

101103
private volatile long openSeqNum = HConstants.NO_SEQNUM;
102104

103-
RegionStateNode(RegionInfo regionInfo) {
105+
RegionStateNode(RegionInfo regionInfo, AtomicInteger trspCounter) {
104106
this.regionInfo = regionInfo;
105107
this.event = new AssignmentProcedureEvent(regionInfo);
106108
this.lock = new RegionStateNodeLock(regionInfo);
109+
this.trspCounter = trspCounter;
107110
}
108111

109112
/**
@@ -204,11 +207,13 @@ public ServerName setRegionLocation(final ServerName serverName) {
204207
public TransitRegionStateProcedure setProcedure(TransitRegionStateProcedure proc) {
205208
assert this.procedure == null;
206209
this.procedure = proc;
210+
trspCounter.incrementAndGet();
207211
return proc;
208212
}
209213

210214
public void unsetProcedure(TransitRegionStateProcedure proc) {
211215
assert this.procedure == proc;
216+
trspCounter.decrementAndGet();
212217
this.procedure = null;
213218
}
214219

hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ public class RegionStates {
5353

5454
private final Object regionsMapLock = new Object();
5555

56+
private final AtomicInteger trspCounter = new AtomicInteger(0);
57+
5658
// TODO: Replace the ConcurrentSkipListMaps
5759
/**
5860
* A Map from {@link RegionInfo#getRegionName()} to {@link RegionStateNode}
@@ -103,7 +105,7 @@ public boolean isRegionInRegionStates(final RegionInfo hri) {
103105
RegionStateNode createRegionStateNode(RegionInfo regionInfo) {
104106
synchronized (regionsMapLock) {
105107
RegionStateNode node = regionsMap.computeIfAbsent(regionInfo.getRegionName(),
106-
key -> new RegionStateNode(regionInfo));
108+
key -> new RegionStateNode(regionInfo,trspCounter));
107109

108110
if (encodedRegionsMap.get(regionInfo.getEncodedName()) != node) {
109111
encodedRegionsMap.put(regionInfo.getEncodedName(), node);
@@ -599,6 +601,10 @@ public void addToOfflineRegions(final RegionStateNode regionNode) {
599601
regionOffline.put(regionNode.getRegionInfo(), regionNode);
600602
}
601603

604+
public int getOngoingTRSPCount() {
605+
return trspCounter.get();
606+
}
607+
602608
// ==========================================================================
603609
// Region FAIL_OPEN helpers
604610
// ==========================================================================

hbase-server/src/main/resources/hbase-webapps/master/assignmentManagerStatus.jsp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
AssignmentManager assignmentManager = master.getAssignmentManager();
3333
int limit = 100;
3434
35-
SortedSet<RegionState> rit = assignmentManager.getRegionStates().getRegionsInTransitionOrderedByTimestamp();
35+
SortedSet<RegionState> rit = assignmentManager.getRegionsInTransitionOrderedByTimestamp();
3636
3737
if (!rit.isEmpty()) {
3838
long currentTime = System.currentTimeMillis();

hbase-server/src/test/java/org/apache/hadoop/hbase/master/http/TestMasterStatusUtil.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ public void setupBasicMocks() {
9797
regionsInTransition
9898
.add(new RegionState(FAKE_HRI, RegionState.State.CLOSING, 12345L, FAKE_HOST));
9999
Mockito.doReturn(rs).when(am).getRegionStates();
100-
Mockito.doReturn(regionsInTransition).when(rs).getRegionsInTransition();
100+
Mockito.doReturn(regionsInTransition).when(am).getRegionsInTransition();
101101
Mockito.doReturn(am).when(master).getAssignmentManager();
102102
Mockito.doReturn(serverManager).when(master).getServerManager();
103103

0 commit comments

Comments
 (0)