Skip to content

Commit 69b2bf2

Browse files
committed
<fix>[compute]: Fix cancelLongJob error
The retry mechanism is added to the cancellation of the hot migration host. The default retry is 3 times, and the interval between each time is 1s. Resolves: ZSTAC-53214 Change-Id: I7174900153214677a6b74646a786c7065696b62 <fix>[compute]: Fix cancelLongJob error Cancel the migration host task, if the result of the migration task is canceled, the cancellation task is successful Update internal message parameter name Resolves: ZSTAC-53214 Change-Id: I7174900153214677a6b74646a786c7062696b62
1 parent 1ad9a57 commit 69b2bf2

File tree

9 files changed

+141
-6
lines changed

9 files changed

+141
-6
lines changed

compute/src/main/java/org/zstack/compute/host/HostManagerImpl.java

+4
Original file line numberDiff line numberDiff line change
@@ -510,6 +510,8 @@ private void handle(CancelHostTasksMsg msg) {
510510
nmsg.setHostUuids(msg.getHostUuids());
511511
nmsg.setSearchedMnIds(msg.getSearchedMnIds());
512512
nmsg.setCancellationApiId(msg.getCancellationApiId());
513+
nmsg.setInterval(msg.getInterval());
514+
nmsg.setTimes(msg.getTimes());
513515
bus.makeServiceIdByManagementNodeId(nmsg, HostConstant.SERVICE_ID, restMnIds.get(0));
514516
bus.send(nmsg, new CloudBusCallBack(msg) {
515517
@Override
@@ -534,6 +536,8 @@ public void run(MessageReply r) {
534536
CancelHostTaskMsg cmsg = new CancelHostTaskMsg();
535537
cmsg.setHostUuid(hostUuid);
536538
cmsg.setCancellationApiId(msg.getCancellationApiId());
539+
cmsg.setInterval(msg.getInterval());
540+
cmsg.setTimes(msg.getTimes());
537541
bus.makeLocalServiceId(cmsg, HostConstant.SERVICE_ID);
538542
bus.send(cmsg, new CloudBusCallBack(compl) {
539543
@Override

compute/src/main/java/org/zstack/compute/vm/VmInstanceBase.java

+2
Original file line numberDiff line numberDiff line change
@@ -6672,6 +6672,8 @@ protected void handle(CancelMigrateVmMsg msg) {
66726672

66736673
CancelHostTasksMsg cmsg = new CancelHostTasksMsg();
66746674
cmsg.setCancellationApiId(msg.getCancellationApiId());
6675+
cmsg.setInterval(1);
6676+
cmsg.setTimes(3);
66756677
bus.makeLocalServiceId(cmsg, HostConstant.SERVICE_ID);
66766678
bus.send(cmsg, new CloudBusCallBack(msg) {
66776679
@Override

header/src/main/java/org/zstack/header/host/CancelHostTaskMsg.java

+19
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,26 @@
77
*/
88
public class CancelHostTaskMsg extends CancelMessage implements HostMessage {
99
private String hostUuid;
10+
private Integer times;
11+
private Integer interval;
1012

13+
public Integer getInterval() {
14+
return interval;
15+
}
16+
17+
public void setInterval(Integer interval) {
18+
this.interval = interval;
19+
}
20+
21+
public Integer getTimes() {
22+
return times;
23+
}
24+
25+
public void setTimes(Integer times) {
26+
this.times = times;
27+
}
28+
29+
@Override
1130
public String getHostUuid() {
1231
return hostUuid;
1332
}

header/src/main/java/org/zstack/header/host/CancelHostTasksMsg.java

+18
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,24 @@
1313
public class CancelHostTasksMsg extends CancelMessage {
1414
private List<String> searchedMnIds = new ArrayList<>();
1515
private List<String> hostUuids = new ArrayList<>();
16+
private Integer times;
17+
private Integer interval;
18+
19+
public Integer getInterval() {
20+
return interval;
21+
}
22+
23+
public void setInterval(Integer interval) {
24+
this.interval = interval;
25+
}
26+
27+
public Integer getTimes() {
28+
return times;
29+
}
30+
31+
public void setTimes(Integer times) {
32+
this.times = times;
33+
}
1634

1735
public List<String> getSearchedMnIds() {
1836
return searchedMnIds;

header/src/main/java/org/zstack/header/longjob/LongJobConstants.java

+2
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ public class LongJobConstants {
77
public static final String SERVICE_ID = "longjob";
88
public static final String ACTION_CATEGORY = "longjob";
99

10+
public static final String NO_JOB_TO_CANCEL = "no matched job to cancel";
11+
1012
public enum LongJobOperation {
1113
Start,
1214
Resume,

longjob/src/main/java/org/zstack/longjob/LongJobManagerImpl.java

+5
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,11 @@ public void success(Boolean cancelled) {
335335

336336
@Override
337337
public void fail(ErrorCode errorCode) {
338+
if (Q.New(LongJobVO.class).eq(LongJobVO_.uuid, vo.getUuid()).eq(LongJobVO_.state, LongJobState.Canceled).isExists()){
339+
completion.success();
340+
return;
341+
}
342+
338343
logger.error(String.format("failed to cancel longjob [uuid:%s, name:%s]", vo.getUuid(), vo.getName()));
339344
completion.fail(errorCode);
340345
}

plugin/kvm/src/main/java/org/zstack/kvm/KVMAgentCommands.java

+18
Original file line numberDiff line numberDiff line change
@@ -3575,6 +3575,24 @@ public static class PrimaryStorageCommand extends AgentCommand {
35753575

35763576
public static class CancelCmd extends AgentCommand implements CancelCommand {
35773577
private String cancellationApiId;
3578+
private Integer times;
3579+
private Integer interval;
3580+
3581+
public Integer getTimes() {
3582+
return times;
3583+
}
3584+
3585+
public void setTimes(Integer times) {
3586+
this.times = times;
3587+
}
3588+
3589+
public Integer getInterval() {
3590+
return interval;
3591+
}
3592+
3593+
public void setInterval(Integer interval) {
3594+
this.interval = interval;
3595+
}
35783596

35793597
@Override
35803598
public void setCancellationApiId(String cancellationApiId) {

plugin/kvm/src/main/java/org/zstack/kvm/KVMHost.java

+7-3
Original file line numberDiff line numberDiff line change
@@ -4554,7 +4554,7 @@ public String getName() {
45544554

45554555
private void handle(final CancelHostTaskMsg msg) {
45564556
CancelHostTaskReply reply = new CancelHostTaskReply();
4557-
cancelJob(msg.getCancellationApiId(), new Completion(msg) {
4557+
cancelJob(msg, new Completion(msg) {
45584558
@Override
45594559
public void success() {
45604560
bus.reply(msg, reply);
@@ -4568,9 +4568,13 @@ public void fail(ErrorCode errorCode) {
45684568
});
45694569
}
45704570

4571-
private void cancelJob(String apiId, Completion completion) {
4571+
private void cancelJob(CancelHostTaskMsg msg, Completion completion) {
45724572
CancelCmd cmd = new CancelCmd();
4573-
cmd.setCancellationApiId(apiId);
4573+
cmd.setCancellationApiId(msg.getCancellationApiId());
4574+
if (msg.getInterval() != null && msg.getTimes() != null) {
4575+
cmd.setInterval(msg.getInterval());
4576+
cmd.setTimes(msg.getTimes());
4577+
}
45744578
new Http<>(cancelJob, cmd, CancelRsp.class).call(new ReturnValueCompletion<CancelRsp>(completion) {
45754579
@Override
45764580
public void success(CancelRsp ret) {

test/src/test/groovy/org/zstack/test/integration/longjob/LiveMigrateVmJobCase.groovy

+66-3
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import org.zstack.header.allocator.HostCapacityVO
1313
import org.zstack.header.allocator.HostCapacityVO_
1414
import org.zstack.header.host.HostVO
1515
import org.zstack.header.host.HostVO_
16+
import org.zstack.header.longjob.LongJobConstants
1617
import org.zstack.header.longjob.LongJobVO
1718
import org.zstack.header.longjob.LongJobVO_
1819
import org.zstack.header.network.service.NetworkServiceType
@@ -28,7 +29,6 @@ import org.zstack.test.integration.ZStackTest
2829
import org.zstack.testlib.EnvSpec
2930
import org.zstack.testlib.SubCase
3031
import org.zstack.utils.data.SizeUnit
31-
import org.zstack.utils.gson.JSONObjectUtil
3232

3333
/**
3434
* Created by camile on 18-3-7.
@@ -190,6 +190,7 @@ class LiveMigrateVmJobCase extends SubCase {
190190
testLiveMigrateVmLongJobCancel()
191191
testLiveMigrateVmLongJobCancelFail()
192192
testLiveMigrateVmLongJobCancelBeforeMigrate()
193+
testLiveMigrateVmNoJobToCancel()
193194
}
194195
}
195196

@@ -294,6 +295,68 @@ class LiveMigrateVmJobCase extends SubCase {
294295
assert canceled
295296
}
296297

298+
void testLiveMigrateVmNoJobToCancel() {
299+
env.cleanSimulatorHandlers()
300+
301+
APIMigrateVmMsg msg = new APIMigrateVmMsg()
302+
msg.hostUuid = host2.uuid
303+
msg.vmInstanceUuid = vm1.uuid
304+
def canceled = false
305+
def migrating = false
306+
307+
env.simulator(KVMConstant.KVM_MIGRATE_VM_PATH) { HttpEntity<String> e ->
308+
migrating = true
309+
while (!canceled) {
310+
sleep(500)
311+
}
312+
return new KVMAgentCommands.MigrateVmResponse()
313+
}
314+
KVMAgentCommands.CancelCmd cmd = null
315+
env.simulator(AgentConstant.CANCEL_JOB) { HttpEntity<String> e ->
316+
cmd = json(e.body, KVMAgentCommands.CancelCmd.class)
317+
canceled = true
318+
def rsp = new KVMAgentCommands.CancelRsp()
319+
rsp.setError(LongJobConstants.NO_JOB_TO_CANCEL)
320+
return rsp
321+
}
322+
323+
LongJobInventory jobInv = submitLongJob {
324+
jobName = msg.getClass().getSimpleName()
325+
jobData = gson.toJson(msg)
326+
} as LongJobInventory
327+
328+
assert jobInv.jobName == msg.getClass().getSimpleName()
329+
assert jobInv.state == LongJobState.Running
330+
331+
while (!migrating) {
332+
sleep(500)
333+
}
334+
expectError {
335+
cancelLongJob {
336+
uuid = jobInv.uuid
337+
}
338+
}
339+
340+
retryInSecs() {
341+
LongJobVO job = dbFindByUuid(jobInv.getUuid(), LongJobVO.class)
342+
assert job.state.toString() == LongJobState.Succeeded.toString()
343+
}
344+
assert host2.uuid == dbf.findByUuid(vm1.uuid, VmInstanceVO.class).hostUuid
345+
assert canceled
346+
assert cmd.getInterval() == 1
347+
assert cmd.getTimes() == 3
348+
349+
SQL.New(LongJobVO.class).eq(LongJobVO_.uuid, jobInv.getUuid()).set(LongJobVO_.state, org.zstack.header.longjob.LongJobState.Canceled).update();
350+
351+
cancelLongJob {
352+
uuid = jobInv.uuid
353+
}
354+
355+
SQL.New(LongJobVO.class).eq(LongJobVO_.uuid, jobInv.getUuid()).set(LongJobVO_.state, org.zstack.header.longjob.LongJobState.Succeeded).update();
356+
357+
env.cleanSimulatorHandlers()
358+
}
359+
297360
void testLiveMigrateVmLongJobCancelFail() {
298361
APIMigrateVmMsg msg = new APIMigrateVmMsg()
299362
msg.hostUuid = host1.uuid
@@ -366,7 +429,7 @@ class LiveMigrateVmJobCase extends SubCase {
366429
return new KVMAgentCommands.MigrateVmResponse()
367430
}
368431

369-
env.message(AllocateHostMsg.class){ AllocateHostMsg amsg, CloudBus bus ->
432+
env.message(AllocateHostMsg.class) { AllocateHostMsg amsg, CloudBus bus ->
370433
while (!canceled) {
371434
sleep(500)
372435
}
@@ -376,7 +439,7 @@ class LiveMigrateVmJobCase extends SubCase {
376439
long afterAllocateMemInByte = mem - instance.memorySize
377440
long afterAllocateCpuCount = cpu - instance.cpuNum
378441

379-
SQL.New(HostCapacityVO.class).eq(HostCapacityVO_.uuid, host2.uuid)
442+
SQL.New(HostCapacityVO.class).eq(HostCapacityVO_.uuid, host2.uuid)
380443
.set(HostCapacityVO_.availableMemory, afterAllocateMemInByte)
381444
.set(HostCapacityVO_.availableCpu, afterAllocateCpuCount)
382445
.update()

0 commit comments

Comments
 (0)