Skip to content

Commit 36f2eba

Browse files
author
ukumawat
committed
HBASE-28158 resolve Test failures
1 parent 09ae915 commit 36f2eba

File tree

11 files changed

+97
-27
lines changed

11 files changed

+97
-27
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2007,7 +2007,7 @@ private void balanceThrottling(long nextBalanceStartTime, int maxRegionsInTransi
20072007
// But if there are zero regions in transition, it can skip sleep to speed up.
20082008
while (
20092009
!interrupted && EnvironmentEdgeManager.currentTime() < nextBalanceStartTime
2010-
&& this.assignmentManager.getScheduledRegionTransitionCount() > 0
2010+
&& this.assignmentManager.getRegionTransitScheduledCount() > 0
20112011
) {
20122012
try {
20132013
Thread.sleep(100);
@@ -2019,7 +2019,7 @@ private void balanceThrottling(long nextBalanceStartTime, int maxRegionsInTransi
20192019
// Throttling by max number regions in transition
20202020
while (
20212021
!interrupted && maxRegionsInTransition > 0
2022-
&& this.assignmentManager.getScheduledRegionTransitionCount() >= maxRegionsInTransition
2022+
&& this.assignmentManager.getRegionTransitScheduledCount() >= maxRegionsInTransition
20232023
&& EnvironmentEdgeManager.currentTime() <= cutoffTime
20242024
) {
20252025
try {
@@ -2098,7 +2098,7 @@ public BalanceResponse balance(BalanceRequest request) throws IOException {
20982098

20992099
synchronized (this.balancer) {
21002100
// Only allow one balance run at at time.
2101-
if (this.assignmentManager.getScheduledRegionTransitionCount() > 0) {
2101+
if (this.assignmentManager.getRegionTransitScheduledCount() > 0) {
21022102
List<RegionStateNode> regionsInTransition = assignmentManager.getRegionsInTransition();
21032103
// if hbase:meta region is in transition, result of assignment cannot be recorded
21042104
// ignore the force flag in that case
@@ -2113,8 +2113,8 @@ public BalanceResponse balance(BalanceRequest request) throws IOException {
21132113

21142114
if (!request.isIgnoreRegionsInTransition() || metaInTransition) {
21152115
LOG.info("Not running balancer (ignoreRIT=false" + ", metaRIT=" + metaInTransition
2116-
+ ") because " + assignmentManager.getScheduledRegionTransitionCount() + " region(s) in transition: "
2117-
+ toPrint + (truncated ? "(truncated list)" : ""));
2116+
+ ") because " + assignmentManager.getRegionTransitScheduledCount()
2117+
+ " region(s) in transition: " + toPrint + (truncated ? "(truncated list)" : ""));
21182118
return responseBuilder.build();
21192119
}
21202120
}
@@ -2250,7 +2250,7 @@ public boolean normalizeRegions(final NormalizeTableFilterParams ntfp,
22502250
if (skipRegionManagementAction("region normalizer")) {
22512251
return false;
22522252
}
2253-
if (assignmentManager.getScheduledRegionTransitionCount() > 0) {
2253+
if (assignmentManager.getRegionTransitScheduledCount() > 0) {
22542254
return false;
22552255
}
22562256

hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -390,7 +390,7 @@ public void setupRIT(List<TransitRegionStateProcedure> procs) {
390390
return;
391391
}
392392
}
393-
LOG.info("Attach {} to {} to restore RIT", proc, regionNode);
393+
LOG.info("Attach {} to {} to restore", proc, regionNode);
394394
regionNode.setProcedure(proc);
395395
});
396396
}
@@ -1714,7 +1714,6 @@ protected void update(final AssignmentManager am) {
17141714
final RegionStates regionStates = am.getRegionStates();
17151715
this.statTimestamp = EnvironmentEdgeManager.currentTime();
17161716
update(am.getRegionsStateInTransition(), statTimestamp);
1717-
update(regionStates.getRegionFailedOpen(), statTimestamp);
17181717

17191718
if (LOG.isDebugEnabled() && ritsOverThreshold != null && !ritsOverThreshold.isEmpty()) {
17201719
LOG.debug("RITs over threshold: {}",
@@ -1882,7 +1881,11 @@ public void visitRegionState(Result result, final RegionInfo regionInfo, final S
18821881
if (regionNode.getProcedure() != null) {
18831882
regionNode.getProcedure().stateLoaded(AssignmentManager.this, regionNode);
18841883
}
1884+
// add regions to RIT while visiting the meta
18851885
regionInTransitionTracker.handleRegionStateNodeOperation(regionNode);
1886+
if (master.getServerManager().isServerDead(regionNode.getRegionLocation())) {
1887+
regionInTransitionTracker.regionCrashed(regionNode);
1888+
}
18861889
}
18871890
};
18881891

@@ -2085,8 +2088,8 @@ public boolean isRegionInTransition(final RegionInfo regionInfo) {
20852088
return regionInTransitionTracker.isRegionInTransition(regionInfo);
20862089
}
20872090

2088-
public int getScheduledRegionTransitionCount() {
2089-
return regionStates.getScheduledTransitionCount();
2091+
public int getRegionTransitScheduledCount() {
2092+
return regionStates.getRegionTransitScheduledCount();
20902093
}
20912094

20922095
/**
@@ -2096,7 +2099,7 @@ public int getRegionsInTransitionCount() {
20962099
return regionInTransitionTracker.getRegionsInTransition().size();
20972100
}
20982101

2099-
public SortedSet<RegionState> getRegionsStateInTransition() {
2102+
public SortedSet<RegionState> getRegionsStateInTransition() {
21002103
final SortedSet<RegionState> rit = new TreeSet<RegionState>(REGION_STATE_STAMP_COMPARATOR);
21012104
for (RegionStateNode node : getRegionsInTransition()) {
21022105
rit.add(node.toRegionState());
@@ -2312,6 +2315,17 @@ public CompletableFuture<Void> regionClosedAbnormally(RegionStateNode regionNode
23122315
// The above methods can only be called in TransitRegionStateProcedure(and related procedures)
23132316
// ============================================================================================
23142317

2318+
// As soon as a server a crashed, region hosting on that are un-available, this method helps to
2319+
// track those un-available regions. This method can only be called from ServerCrashProcedure.
2320+
public void markRegionsAsCrashed(List<RegionInfo> regionsOnCrashedServer,
2321+
ServerName crashedServerName) {
2322+
for (RegionInfo regionInfo : regionsOnCrashedServer) {
2323+
RegionStateNode node = regionStates.getOrCreateRegionStateNode(regionInfo);
2324+
if (node.getRegionLocation() == crashedServerName)
2325+
regionInTransitionTracker.regionCrashed(node);
2326+
}
2327+
}
2328+
23152329
public void markRegionAsSplit(final RegionInfo parent, final ServerName serverName,
23162330
final RegionInfo daughterA, final RegionInfo daughterB) throws IOException {
23172331
// Update hbase:meta. Parent will be marked offline and split up in hbase:meta.

hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionInTransitionTracker.java

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -50,32 +50,43 @@ public boolean isRegionInTransition(final RegionInfo regionInfo) {
5050
return regionInTransition.containsKey(regionInfo);
5151
}
5252

53+
public void regionCrashed(RegionStateNode regionStateNode) {
54+
if (addRegionInTransition(regionStateNode)) {
55+
LOG.debug("{} added to RIT list because hosting region server is crashed ",
56+
regionStateNode.getRegionInfo().getEncodedName());
57+
}
58+
}
59+
5360
public void handleRegionStateNodeOperation(RegionStateNode regionStateNode) {
5461
// only consider default replica for availability
5562
if (regionStateNode.getRegionInfo().getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
5663
return;
5764
}
5865

5966
RegionState.State currentState = regionStateNode.getState();
60-
boolean isTableEnabled = tableStateManager.isTableState(regionStateNode.getTable(), TableState.State.ENABLED,
61-
TableState.State.ENABLING);
62-
List<RegionState.State> terminalStates = isTableEnabled ? ENABLE_TABLE_REGION_STATE : DISABLE_TABLE_REGION_STATE;
67+
boolean isTableEnabled = tableStateManager.isTableState(regionStateNode.getTable(),
68+
TableState.State.ENABLED, TableState.State.ENABLING);
69+
List<RegionState.State> terminalStates =
70+
isTableEnabled ? ENABLE_TABLE_REGION_STATE : DISABLE_TABLE_REGION_STATE;
6371

6472
// if region is merged or split it should not be in RIT list
6573
if (
6674
currentState == RegionState.State.SPLIT || currentState == RegionState.State.MERGED
6775
|| regionStateNode.getRegionInfo().isSplit()
6876
) {
69-
if (removeRegionInTransition(regionStateNode.getRegionInfo())){
70-
LOG.debug("Removed {} from RIT list as it is split or merged",regionStateNode.getRegionInfo().getEncodedName());
77+
if (removeRegionInTransition(regionStateNode.getRegionInfo())) {
78+
LOG.debug("Removed {} from RIT list as it is split or merged",
79+
regionStateNode.getRegionInfo().getEncodedName());
7180
}
7281
} else if (!terminalStates.contains(currentState)) {
73-
if (addRegionInTransition(regionStateNode)){
74-
LOG.debug("{} added to RIT list because it is in-between state, region state : {} ", regionStateNode.getRegionInfo().getEncodedName(), currentState);
82+
if (addRegionInTransition(regionStateNode)) {
83+
LOG.debug("{} added to RIT list because it is in-between state, region state : {} ",
84+
regionStateNode.getRegionInfo().getEncodedName(), currentState);
7585
}
7686
} else {
77-
if (removeRegionInTransition(regionStateNode.getRegionInfo())){
78-
LOG.debug("Removed {} from RIT list as reached to terminal state {}",regionStateNode.getRegionInfo().getEncodedName(), currentState);
87+
if (removeRegionInTransition(regionStateNode.getRegionInfo())) {
88+
LOG.debug("Removed {} from RIT list as reached to terminal state {}",
89+
regionStateNode.getRegionInfo().getEncodedName(), currentState);
7990
}
8091
}
8192
}

hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -601,7 +601,7 @@ public void addToOfflineRegions(final RegionStateNode regionNode) {
601601
regionOffline.put(regionNode.getRegionInfo(), regionNode);
602602
}
603603

604-
public int getScheduledTransitionCount() {
604+
public int getRegionTransitScheduledCount() {
605605
return trspCounter.get();
606606
}
607607

hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedureSyncWait.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,8 @@ protected static void waitRegionInTransition(final MasterProcedureEnv env,
252252
new ProcedureSyncWait.Predicate<Boolean>() {
253253
@Override
254254
public Boolean evaluate() throws IOException {
255-
return !assignmentManager.getRegionStates().getRegionStateNode(region).isTransitionScheduled();
255+
return !assignmentManager.getRegionStates().getRegionStateNode(region)
256+
.isTransitionScheduled();
256257
}
257258
});
258259
}

hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,7 @@ protected Flow executeFromState(MasterProcedureEnv env, ServerCrashState state)
201201
break;
202202
case SERVER_CRASH_GET_REGIONS:
203203
this.regionsOnCrashedServer = getRegionsOnCrashedServer(env);
204+
env.getAssignmentManager().markRegionsAsCrashed(regionsOnCrashedServer, this.serverName);
204205
// Where to go next? Depends on whether we should split logs at all or
205206
// if we should do distributed log splitting.
206207
if (regionsOnCrashedServer != null) {

hbase-server/src/main/resources/hbase-webapps/master/assignmentManagerStatus.jsp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
AssignmentManager assignmentManager = master.getAssignmentManager();
3333
int limit = 100;
3434
35-
SortedSet<RegionState> rit = assignmentManager.getRegionsInTransitionOrderedByTimestamp();
35+
SortedSet<RegionState> rit = assignmentManager.getRegionsStateInTransition();
3636
3737
if (!rit.isEmpty()) {
3838
long currentTime = System.currentTimeMillis();

hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtil.java

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3448,6 +3448,30 @@ public boolean evaluate() throws IOException {
34483448
};
34493449
}
34503450

3451+
/**
3452+
* Returns a {@link Predicate} for checking that there are no procedure to region transition in
3453+
* master
3454+
*/
3455+
public ExplainingPredicate<IOException> predicateNoRegionTransitScheduled() {
3456+
return new ExplainingPredicate<IOException>() {
3457+
@Override
3458+
public String explainFailure() throws IOException {
3459+
final AssignmentManager am = getMiniHBaseCluster().getMaster().getAssignmentManager();
3460+
return "Number of procedure scheduled for region transit: "
3461+
+ am.getRegionTransitScheduledCount();
3462+
}
3463+
3464+
@Override
3465+
public boolean evaluate() throws IOException {
3466+
HMaster master = getMiniHBaseCluster().getMaster();
3467+
if (master == null) return false;
3468+
AssignmentManager am = master.getAssignmentManager();
3469+
if (am == null) return false;
3470+
return am.getRegionTransitScheduledCount() == 0;
3471+
}
3472+
};
3473+
}
3474+
34513475
/**
34523476
* Returns a {@link Predicate} for checking that table is enabled
34533477
*/
@@ -3525,13 +3549,28 @@ public void waitUntilNoRegionsInTransition(final long timeout) throws IOExceptio
35253549
waitFor(timeout, predicateNoRegionsInTransition());
35263550
}
35273551

3552+
/**
3553+
* Wait until no regions in transition.
3554+
* @param timeout How long to wait.
3555+
*/
3556+
public void waitUntilNoRegionTransitScheduled(final long timeout) throws IOException {
3557+
waitFor(timeout, predicateNoRegionTransitScheduled());
3558+
}
3559+
35283560
/**
35293561
* Wait until no regions in transition. (time limit 15min)
35303562
*/
35313563
public void waitUntilNoRegionsInTransition() throws IOException {
35323564
waitUntilNoRegionsInTransition(15 * 60000);
35333565
}
35343566

3567+
/**
3568+
* Wait until no TRSP is present
3569+
*/
3570+
public void waitUntilNoRegionTransitScheduled() throws IOException {
3571+
waitUntilNoRegionTransitScheduled(15 * 60000);
3572+
}
3573+
35353574
/**
35363575
* Wait until labels is ready in VisibilityLabelsCache.
35373576
*/

hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerMetrics.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ public void testRITAssignmentManagerMetrics() throws Exception {
154154
// Sleep 5 seconds, wait for doMetrics chore catching up
155155
// the rit count consists of rit and failed opens. see RegionInTransitionStat#update
156156
// Waiting for the completion of rit makes the assert stable.
157-
TEST_UTIL.waitUntilNoRegionsInTransition();
157+
TEST_UTIL.waitUntilNoRegionTransitScheduled();
158158
Thread.sleep(MSG_INTERVAL * 5);
159159
METRICS_HELPER.assertGauge(MetricsAssignmentManagerSource.RIT_COUNT_NAME, 1, amSource);
160160
METRICS_HELPER.assertGauge(MetricsAssignmentManagerSource.RIT_COUNT_OVER_THRESHOLD_NAME, 1,

hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSnapshotProcedureWithLockTimeout.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ public void teardown() throws Exception {
9393
}
9494

9595
@Test
96+
// umesh testing
9697
public void testTakeZkCoordinatedSnapshot() {
9798
for (int i = 0; i < 10; i++) {
9899
try {

0 commit comments

Comments
 (0)