Skip to content

Commit e439efc

Browse files
author
ukumawat
committed
HBASE-28158 Decouple RIT list management from TRSP
1 parent a47fa6a commit e439efc

30 files changed

+321
-167
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1033,6 +1033,8 @@ private void finishActiveMasterInitialization() throws IOException, InterruptedE
10331033
Map<Class<?>, List<Procedure<MasterProcedureEnv>>> procsByType = procedureExecutor
10341034
.getActiveProceduresNoCopy().stream().collect(Collectors.groupingBy(p -> p.getClass()));
10351035

1036+
// This manager must be accessed AFTER hbase:meta is confirmed on line..
1037+
this.tableStateManager = new TableStateManager(this);
10361038
// Create Assignment Manager
10371039
this.assignmentManager = createAssignmentManager(this, masterRegion);
10381040
this.assignmentManager.start();
@@ -1056,8 +1058,6 @@ private void finishActiveMasterInitialization() throws IOException, InterruptedE
10561058
.map(p -> (ServerCrashProcedure) p).map(p -> p.getServerName()).collect(Collectors.toSet()),
10571059
Sets.union(rsListStorage.getAll(), walManager.getLiveServersFromWALDir()),
10581060
walManager.getSplittingServersFromWALDir());
1059-
// This manager must be accessed AFTER hbase:meta is confirmed on line..
1060-
this.tableStateManager = new TableStateManager(this);
10611061

10621062
startupTaskGroup.addTask("Initializing ZK system trackers");
10631063
initializeZKBasedSystemTrackers();
@@ -2007,7 +2007,7 @@ private void balanceThrottling(long nextBalanceStartTime, int maxRegionsInTransi
20072007
// But if there are zero regions in transition, it can skip sleep to speed up.
20082008
while (
20092009
!interrupted && EnvironmentEdgeManager.currentTime() < nextBalanceStartTime
2010-
&& this.assignmentManager.getRegionStates().hasRegionsInTransition()
2010+
&& this.assignmentManager.getRegionTransitScheduledCount() > 0
20112011
) {
20122012
try {
20132013
Thread.sleep(100);
@@ -2019,8 +2019,7 @@ private void balanceThrottling(long nextBalanceStartTime, int maxRegionsInTransi
20192019
// Throttling by max number regions in transition
20202020
while (
20212021
!interrupted && maxRegionsInTransition > 0
2022-
&& this.assignmentManager.getRegionStates().getRegionsInTransitionCount()
2023-
>= maxRegionsInTransition
2022+
&& this.assignmentManager.getRegionTransitScheduledCount() >= maxRegionsInTransition
20242023
&& EnvironmentEdgeManager.currentTime() <= cutoffTime
20252024
) {
20262025
try {
@@ -2099,7 +2098,7 @@ public BalanceResponse balance(BalanceRequest request) throws IOException {
20992098

21002099
synchronized (this.balancer) {
21012100
// Only allow one balance run at at time.
2102-
if (this.assignmentManager.hasRegionsInTransition()) {
2101+
if (this.assignmentManager.getRegionTransitScheduledCount() > 0) {
21032102
List<RegionStateNode> regionsInTransition = assignmentManager.getRegionsInTransition();
21042103
// if hbase:meta region is in transition, result of assignment cannot be recorded
21052104
// ignore the force flag in that case
@@ -2114,8 +2113,8 @@ public BalanceResponse balance(BalanceRequest request) throws IOException {
21142113

21152114
if (!request.isIgnoreRegionsInTransition() || metaInTransition) {
21162115
LOG.info("Not running balancer (ignoreRIT=false" + ", metaRIT=" + metaInTransition
2117-
+ ") because " + regionsInTransition.size() + " region(s) in transition: " + toPrint
2118-
+ (truncated ? "(truncated list)" : ""));
2116+
+ ") because " + assignmentManager.getRegionTransitScheduledCount()
2117+
+ " region(s) in transition: " + toPrint + (truncated ? "(truncated list)" : ""));
21192118
return responseBuilder.build();
21202119
}
21212120
}
@@ -2251,7 +2250,7 @@ public boolean normalizeRegions(final NormalizeTableFilterParams ntfp,
22512250
if (skipRegionManagementAction("region normalizer")) {
22522251
return false;
22532252
}
2254-
if (assignmentManager.hasRegionsInTransition()) {
2253+
if (assignmentManager.getRegionTransitScheduledCount() > 0) {
22552254
return false;
22562255
}
22572256

@@ -3081,7 +3080,7 @@ public ClusterMetrics getClusterMetricsWithoutCoprocessor(EnumSet<Option> option
30813080
case REGIONS_IN_TRANSITION: {
30823081
if (assignmentManager != null) {
30833082
builder.setRegionsInTransition(
3084-
assignmentManager.getRegionStates().getRegionsStateInTransition());
3083+
new ArrayList<>(assignmentManager.getRegionsStateInTransition()));
30853084
}
30863085
break;
30873086
}

hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java

Lines changed: 78 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,14 @@
2222
import java.util.ArrayList;
2323
import java.util.Collection;
2424
import java.util.Collections;
25+
import java.util.Comparator;
2526
import java.util.HashMap;
2627
import java.util.HashSet;
2728
import java.util.List;
2829
import java.util.Map;
2930
import java.util.Set;
31+
import java.util.SortedSet;
32+
import java.util.TreeSet;
3033
import java.util.concurrent.CompletableFuture;
3134
import java.util.concurrent.Future;
3235
import java.util.concurrent.TimeUnit;
@@ -232,12 +235,15 @@ public class AssignmentManager {
232235

233236
private final int forceRegionRetainmentRetries;
234237

238+
private final RegionInTransitionTracker regionInTransitionTracker;
239+
235240
public AssignmentManager(MasterServices master, MasterRegion masterRegion) {
236241
this(master, masterRegion, new RegionStateStore(master, masterRegion));
237242
}
238243

239244
AssignmentManager(MasterServices master, MasterRegion masterRegion, RegionStateStore stateStore) {
240245
this.master = master;
246+
regionInTransitionTracker = new RegionInTransitionTracker(master.getTableStateManager());
241247
this.regionStateStore = stateStore;
242248
this.metrics = new MetricsAssignmentManager();
243249
this.masterRegion = masterRegion;
@@ -331,6 +337,8 @@ public void start() throws IOException, KeeperException {
331337
regionNode.setLastHost(lastHost);
332338
regionNode.setRegionLocation(regionLocation);
333339
regionNode.setOpenSeqNum(openSeqNum);
340+
regionInTransitionTracker.handleRegionStateNodeOperation(regionNode);
341+
334342
if (regionNode.getProcedure() != null) {
335343
regionNode.getProcedure().stateLoaded(this, regionNode);
336344
}
@@ -382,7 +390,7 @@ public void setupRIT(List<TransitRegionStateProcedure> procs) {
382390
return;
383391
}
384392
}
385-
LOG.info("Attach {} to {} to restore RIT", proc, regionNode);
393+
LOG.info("Attach {} to {} to restore", proc, regionNode);
386394
regionNode.setProcedure(proc);
387395
});
388396
}
@@ -411,6 +419,7 @@ public void stop() {
411419

412420
// Stop the RegionStateStore
413421
regionStates.clear();
422+
regionInTransitionTracker.stop();
414423

415424
// Update meta events (for testing)
416425
if (hasProcExecutor) {
@@ -1093,7 +1102,7 @@ private int submitUnassignProcedure(TableName tableName,
10931102
regionNode.lock();
10941103
try {
10951104
if (shouldSubmit.apply(regionNode)) {
1096-
if (regionNode.isInTransition()) {
1105+
if (regionNode.isTransitionScheduled()) {
10971106
logRIT.accept(regionNode);
10981107
inTransitionCount++;
10991108
continue;
@@ -1702,10 +1711,8 @@ public boolean isRegionTwiceOverThreshold(final RegionInfo regionInfo) {
17021711
}
17031712

17041713
protected void update(final AssignmentManager am) {
1705-
final RegionStates regionStates = am.getRegionStates();
17061714
this.statTimestamp = EnvironmentEdgeManager.currentTime();
1707-
update(regionStates.getRegionsStateInTransition(), statTimestamp);
1708-
update(regionStates.getRegionFailedOpen(), statTimestamp);
1715+
update(am.getRegionsStateInTransition(), statTimestamp);
17091716

17101717
if (LOG.isDebugEnabled() && ritsOverThreshold != null && !ritsOverThreshold.isEmpty()) {
17111718
LOG.debug("RITs over threshold: {}",
@@ -1873,6 +1880,11 @@ public void visitRegionState(Result result, final RegionInfo regionInfo, final S
18731880
if (regionNode.getProcedure() != null) {
18741881
regionNode.getProcedure().stateLoaded(AssignmentManager.this, regionNode);
18751882
}
1883+
// add regions to RIT while visiting the meta
1884+
regionInTransitionTracker.handleRegionStateNodeOperation(regionNode);
1885+
if (master.getServerManager().isServerDead(regionNode.getRegionLocation())) {
1886+
regionInTransitionTracker.regionCrashed(regionNode);
1887+
}
18761888
}
18771889
};
18781890

@@ -2046,15 +2058,52 @@ public Pair<Integer, Integer> getReopenStatus(TableName tableName) {
20462058
return new Pair<Integer, Integer>(ritCount, states.size());
20472059
}
20482060

2061+
// This comparator sorts the RegionStates by time stamp then Region name.
2062+
// Comparing by timestamp alone can lead us to discard different RegionStates that happen
2063+
// to share a timestamp.
2064+
private static class RegionStateStampComparator implements Comparator<RegionState> {
2065+
@Override
2066+
public int compare(final RegionState l, final RegionState r) {
2067+
int stampCmp = Long.compare(l.getStamp(), r.getStamp());
2068+
return stampCmp != 0 ? stampCmp : RegionInfo.COMPARATOR.compare(l.getRegion(), r.getRegion());
2069+
}
2070+
}
2071+
2072+
public final static RegionStateStampComparator REGION_STATE_STAMP_COMPARATOR =
2073+
new RegionStateStampComparator();
2074+
20492075
// ============================================================================================
20502076
// TODO: Region State In Transition
20512077
// ============================================================================================
20522078
public boolean hasRegionsInTransition() {
2053-
return regionStates.hasRegionsInTransition();
2079+
return regionInTransitionTracker.hasRegionsInTransition();
20542080
}
20552081

20562082
public List<RegionStateNode> getRegionsInTransition() {
2057-
return regionStates.getRegionsInTransition();
2083+
return regionInTransitionTracker.getRegionsInTransition();
2084+
}
2085+
2086+
public boolean isRegionInTransition(final RegionInfo regionInfo) {
2087+
return regionInTransitionTracker.isRegionInTransition(regionInfo);
2088+
}
2089+
2090+
public int getRegionTransitScheduledCount() {
2091+
return regionStates.getRegionTransitScheduledCount();
2092+
}
2093+
2094+
/**
2095+
* Get the number of regions in transition.
2096+
*/
2097+
public int getRegionsInTransitionCount() {
2098+
return regionInTransitionTracker.getRegionsInTransition().size();
2099+
}
2100+
2101+
public SortedSet<RegionState> getRegionsStateInTransition() {
2102+
final SortedSet<RegionState> rit = new TreeSet<RegionState>(REGION_STATE_STAMP_COMPARATOR);
2103+
for (RegionStateNode node : getRegionsInTransition()) {
2104+
rit.add(node.toRegionState());
2105+
}
2106+
return rit;
20582107
}
20592108

20602109
public List<RegionInfo> getAssignedRegions() {
@@ -2122,6 +2171,8 @@ private CompletableFuture<Void> transitStateAndUpdate(RegionStateNode regionNode
21222171
if (e != null) {
21232172
// revert
21242173
regionNode.setState(state);
2174+
} else {
2175+
regionInTransitionTracker.handleRegionStateNodeOperation(regionNode);
21252176
}
21262177
});
21272178
return future;
@@ -2170,6 +2221,7 @@ CompletableFuture<Void> regionFailedOpen(RegionStateNode regionNode, boolean giv
21702221
if (regionLocation != null) {
21712222
regionStates.removeRegionFromServer(regionLocation, regionNode);
21722223
}
2224+
regionInTransitionTracker.handleRegionStateNodeOperation(regionNode);
21732225
} else {
21742226
// revert
21752227
regionNode.setState(state);
@@ -2230,6 +2282,7 @@ CompletableFuture<Void> persistToMeta(RegionStateNode regionNode) {
22302282
// on table that contains state.
22312283
setMetaAssigned(regionInfo, true);
22322284
}
2285+
regionInTransitionTracker.handleRegionStateNodeOperation(regionNode);
22332286
});
22342287
}
22352288

@@ -2247,6 +2300,7 @@ public CompletableFuture<Void> regionClosedAbnormally(RegionStateNode regionNode
22472300
regionNode.setLastHost(regionLocation);
22482301
regionStates.removeRegionFromServer(regionLocation, regionNode);
22492302
}
2303+
regionInTransitionTracker.handleRegionStateNodeOperation(regionNode);
22502304
} else {
22512305
// revert
22522306
regionNode.setState(state);
@@ -2260,6 +2314,17 @@ public CompletableFuture<Void> regionClosedAbnormally(RegionStateNode regionNode
22602314
// The above methods can only be called in TransitRegionStateProcedure(and related procedures)
22612315
// ============================================================================================
22622316

2317+
// As soon as a server a crashed, region hosting on that are un-available, this method helps to
2318+
// track those un-available regions. This method can only be called from ServerCrashProcedure.
2319+
public void markRegionsAsCrashed(List<RegionInfo> regionsOnCrashedServer,
2320+
ServerName crashedServerName) {
2321+
for (RegionInfo regionInfo : regionsOnCrashedServer) {
2322+
RegionStateNode node = regionStates.getOrCreateRegionStateNode(regionInfo);
2323+
if (node.getRegionLocation() == crashedServerName)
2324+
regionInTransitionTracker.regionCrashed(node);
2325+
}
2326+
}
2327+
22632328
public void markRegionAsSplit(final RegionInfo parent, final ServerName serverName,
22642329
final RegionInfo daughterA, final RegionInfo daughterB) throws IOException {
22652330
// Update hbase:meta. Parent will be marked offline and split up in hbase:meta.
@@ -2284,6 +2349,9 @@ public void markRegionAsSplit(final RegionInfo parent, final ServerName serverNa
22842349
// it is a split parent. And usually only one of them can match, as after restart, the region
22852350
// state will be changed from SPLIT to CLOSED.
22862351
regionStateStore.splitRegion(parent, daughterA, daughterB, serverName, td);
2352+
regionInTransitionTracker.handleRegionStateNodeOperation(node);
2353+
regionInTransitionTracker.handleRegionStateNodeOperation(nodeA);
2354+
regionInTransitionTracker.handleRegionStateNodeOperation(nodeB);
22872355
if (shouldAssignFavoredNodes(parent)) {
22882356
List<ServerName> onlineServers = this.master.getServerManager().getOnlineServersList();
22892357
getFavoredNodePromoter().generateFavoredNodesForDaughter(onlineServers, parent, daughterA,
@@ -2303,12 +2371,14 @@ public void markRegionAsSplit(final RegionInfo parent, final ServerName serverNa
23032371
public void markRegionAsMerged(final RegionInfo child, final ServerName serverName,
23042372
RegionInfo[] mergeParents) throws IOException {
23052373
final RegionStateNode node = regionStates.getOrCreateRegionStateNode(child);
2306-
node.setState(State.MERGED);
23072374
for (RegionInfo ri : mergeParents) {
23082375
regionStates.deleteRegion(ri);
2376+
regionInTransitionTracker.handleRegionDelete(ri);
23092377
}
2378+
23102379
TableDescriptor td = master.getTableDescriptors().get(child.getTable());
23112380
regionStateStore.mergeRegions(child, mergeParents, serverName, td);
2381+
regionInTransitionTracker.handleRegionStateNodeOperation(node);
23122382
if (shouldAssignFavoredNodes(child)) {
23132383
getFavoredNodePromoter().generateFavoredNodesForMergedRegion(child, mergeParents);
23142384
}

hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManagerUtil.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -154,15 +154,15 @@ private static TransitRegionStateProcedure[] createAssignProcedures(MasterProced
154154
regionNode.lock();
155155
try {
156156
if (ignoreIfInTransition) {
157-
if (regionNode.isInTransition()) {
157+
if (regionNode.isTransitionScheduled()) {
158158
return null;
159159
}
160160
} else {
161161
// should never fail, as we have the exclusive region lock, and the region is newly
162162
// created, or has been successfully closed so should not be on any servers, so SCP
163163
// will
164164
// not process it either.
165-
assert !regionNode.isInTransition();
165+
assert !regionNode.isTransitionScheduled();
166166
}
167167
regionNode.setProcedure(proc);
168168
} finally {
@@ -184,7 +184,7 @@ private static TransitRegionStateProcedure[] createAssignProcedures(MasterProced
184184
// apply ignoreRITs to replica regions as well.
185185
if (
186186
!ignoreIfInTransition || !env.getAssignmentManager().getRegionStates()
187-
.getOrCreateRegionStateNode(ri).isInTransition()
187+
.getOrCreateRegionStateNode(ri).isTransitionScheduled()
188188
) {
189189
replicaRegionInfos.add(ri);
190190
}
@@ -232,7 +232,7 @@ private static TransitRegionStateProcedure[] createRoundRobinAssignProcedures(
232232
for (RegionInfo region : regionsAndReplicas) {
233233
if (
234234
env.getAssignmentManager().getRegionStates().getOrCreateRegionStateNode(region)
235-
.isInTransition()
235+
.isTransitionScheduled()
236236
) {
237237
return null;
238238
}

0 commit comments

Comments
 (0)