Skip to content

Commit 96edadc

Browse files
committed
add support for proper cleanup of snapshots and prevent vol snapshot of running vm
1 parent 9e03f4b commit 96edadc

File tree

2 files changed

+462
-44
lines changed

2 files changed

+462
-44
lines changed

plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/KVMStorageProcessor.java

Lines changed: 293 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
import com.fasterxml.jackson.core.JsonProcessingException;
5959
import com.fasterxml.jackson.databind.JsonNode;
6060
import com.fasterxml.jackson.databind.ObjectMapper;
61+
import com.cloud.utils.script.OutputInterpreter;
6162
import org.apache.cloudstack.agent.directdownload.DirectDownloadAnswer;
6263
import org.apache.cloudstack.agent.directdownload.DirectDownloadCommand;
6364
import org.apache.cloudstack.direct.download.DirectDownloadHelper;
@@ -1193,61 +1194,304 @@ private void deleteSnapshotOnPrimary(final CopyCommand cmd, final SnapshotObject
11931194
}
11941195

11951196
/**
1196-
* Delete a CLVM snapshot using lvremove command.
1197+
* Delete a CLVM snapshot using comprehensive cleanup.
11971198
* For CLVM, the snapshot path stored in DB is: /dev/vgname/volumeuuid/snapshotuuid
1198-
* However, managesnapshot.sh creates the actual snapshot using MD5 hash of the snapshot UUID.
1199-
* The actual device is at: /dev/mapper/vgname-MD5(snapshotuuid)
1200-
* We need to compute the MD5 hash and remove both the snapshot LV and its COW volume.
1199+
* This method handles:
1200+
* 1. Checking if snapshot artifacts still exist
1201+
* 2. Device-mapper snapshot entry removal
1202+
* 3. COW volume removal
1203+
* 4. -real device restoration if this is the last snapshot
1204+
*
1205+
* @param snapshotPath The snapshot path from database
1206+
* @param checkExistence If true, checks if snapshot exists before cleanup (for explicit deletion)
1207+
* If false, always performs cleanup (for post-backup cleanup)
1208+
* @return true if cleanup was performed, false if snapshot didn't exist (when checkExistence=true)
12011209
*/
1202-
private void deleteClvmSnapshot(String snapshotPath) {
1210+
private boolean deleteClvmSnapshot(String snapshotPath, boolean checkExistence) {
1211+
logger.info("Starting CLVM snapshot deletion for path: {}, checkExistence: {}", snapshotPath, checkExistence);
1212+
12031213
try {
12041214
// Parse the snapshot path: /dev/acsvg/volume-uuid/snapshot-uuid
1205-
// Extract VG name and snapshot UUID
12061215
String[] pathParts = snapshotPath.split("/");
12071216
if (pathParts.length < 5) {
1208-
logger.warn("Invalid CLVM snapshot path format: " + snapshotPath + ", skipping deletion");
1209-
return;
1217+
logger.warn("Invalid CLVM snapshot path format: {}, expected format: /dev/vgname/volume-uuid/snapshot-uuid", snapshotPath);
1218+
return false;
12101219
}
12111220

12121221
String vgName = pathParts[2];
1222+
String volumeUuid = pathParts[3];
12131223
String snapshotUuid = pathParts[4];
12141224

1225+
logger.info("Parsed snapshot path - VG: {}, Volume: {}, Snapshot: {}", vgName, volumeUuid, snapshotUuid);
1226+
12151227
// Compute MD5 hash of snapshot UUID (same as managesnapshot.sh does)
12161228
String md5Hash = computeMd5Hash(snapshotUuid);
1229+
logger.debug("Computed MD5 hash for snapshot UUID {}: {}", snapshotUuid, md5Hash);
12171230

1218-
logger.debug("Deleting CLVM snapshot for UUID: " + snapshotUuid + " (MD5: " + md5Hash + ")");
1231+
// Check if snapshot exists (if requested)
1232+
if (checkExistence) {
1233+
String cowLvPath = "/dev/" + vgName + "/" + md5Hash + "-cow";
1234+
Script checkCow = new Script("/usr/sbin/lvs", 5000, logger);
1235+
checkCow.add("--noheadings");
1236+
checkCow.add(cowLvPath);
1237+
String checkResult = checkCow.execute();
12191238

1220-
// Remove the snapshot device mapper entry
1221-
// The snapshot device is at: /dev/mapper/vgname-md5hash
1222-
String vgNameEscaped = vgName.replace("-", "--");
1223-
String snapshotDevice = vgNameEscaped + "-" + md5Hash;
1239+
if (checkResult != null) {
1240+
// COW volume doesn't exist - snapshot was already cleaned up
1241+
logger.info("CLVM snapshot {} was already deleted, no cleanup needed", snapshotUuid);
1242+
return false;
1243+
}
1244+
logger.info("CLVM snapshot artifacts still exist for {}, performing cleanup", snapshotUuid);
1245+
}
1246+
1247+
// Check if this is the last snapshot for the volume
1248+
boolean isLastSnapshot = isLastSnapshotForVolume(vgName, volumeUuid);
1249+
logger.info("Is last snapshot for volume {}: {}", volumeUuid, isLastSnapshot);
1250+
1251+
// Perform clean-up
1252+
cleanupClvmSnapshotArtifacts(vgName, volumeUuid, md5Hash, isLastSnapshot);
1253+
1254+
logger.info("Successfully deleted CLVM snapshot: {}", snapshotPath);
1255+
return true;
12241256

1225-
Script dmRemoveCmd = new Script("/usr/sbin/dmsetup", 30000, logger);
1226-
dmRemoveCmd.add("remove");
1227-
dmRemoveCmd.add(snapshotDevice);
1228-
String dmResult = dmRemoveCmd.execute();
1229-
if (dmResult != null) {
1230-
logger.debug("dmsetup remove returned: {} (may already be removed)", dmResult);
1257+
} catch (Exception ex) {
1258+
logger.error("Exception while deleting CLVM snapshot {}", snapshotPath, ex);
1259+
return false;
1260+
}
1261+
}
1262+
1263+
/**
1264+
* Delete a CLVM snapshot after backup (always performs cleanup without checking existence).
1265+
* Convenience method for backward compatibility.
1266+
*/
1267+
private void deleteClvmSnapshot(String snapshotPath) {
1268+
deleteClvmSnapshot(snapshotPath, false);
1269+
}
1270+
1271+
/**
1272+
* Check if this is the last snapshot for a given volume in the VG.
1273+
*
1274+
* @param vgName The volume group name
1275+
* @param volumeUuid The origin volume UUID
1276+
* @return true if this is the last (or only) snapshot for the volume
1277+
*/
1278+
private boolean isLastSnapshotForVolume(String vgName, String volumeUuid) {
1279+
try {
1280+
Script listSnapshots = new Script("/usr/sbin/lvs", 10000, logger);
1281+
listSnapshots.add("--noheadings");
1282+
listSnapshots.add("-o");
1283+
listSnapshots.add("lv_name,origin");
1284+
listSnapshots.add(vgName);
1285+
1286+
logger.debug("Checking snapshot count for volume {} in VG {}", volumeUuid, vgName);
1287+
1288+
final OutputInterpreter.AllLinesParser parser = new OutputInterpreter.AllLinesParser();
1289+
String result = listSnapshots.execute(parser);
1290+
1291+
if (result == null) {
1292+
String output = parser.getLines();
1293+
if (output != null && !output.isEmpty()) {
1294+
int snapshotCount = 0;
1295+
String[] lines = output.split("\n");
1296+
String escapedUuid = volumeUuid.replace("-", "--");
1297+
1298+
for (String line : lines) {
1299+
String trimmedLine = line.trim();
1300+
if (!trimmedLine.isEmpty()) {
1301+
String[] parts = trimmedLine.split("\\s+");
1302+
if (parts.length >= 2) {
1303+
String origin = parts[1];
1304+
if (origin.equals(volumeUuid) || origin.equals(escapedUuid)) {
1305+
snapshotCount++;
1306+
}
1307+
}
1308+
}
1309+
}
1310+
1311+
logger.debug("Found {} snapshot(s) for volume {}", snapshotCount, volumeUuid);
1312+
return snapshotCount <= 1;
1313+
}
12311314
}
1315+
logger.debug("Could not determine snapshot count, assuming not last snapshot");
1316+
return false;
1317+
1318+
} catch (Exception e) {
1319+
logger.warn("Exception while checking if last snapshot: {}", e.getMessage());
1320+
return false;
1321+
}
1322+
}
1323+
1324+
/**
1325+
* Clean up CLVM snapshot artifacts including device-mapper entries, COW volumes,
1326+
* and potentially restore the -real device if this is the last snapshot.
1327+
*
1328+
* @param vgName The volume group name
1329+
* @param originVolumeUuid The UUID of the origin volume
1330+
* @param snapshotMd5Hash The MD5 hash of the snapshot UUID
1331+
* @param isLastSnapshot Whether this is the last snapshot of the origin volume
1332+
*/
1333+
private void cleanupClvmSnapshotArtifacts(String vgName, String originVolumeUuid, String snapshotMd5Hash, boolean isLastSnapshot) {
1334+
logger.info("Cleaning up CLVM snapshot artifacts: VG={}, Origin={}, SnapshotHash={}, IsLastSnapshot={}",
1335+
vgName, originVolumeUuid, snapshotMd5Hash, isLastSnapshot);
1336+
1337+
try {
1338+
String vgNameEscaped = vgName.replace("-", "--");
1339+
String originEscaped = originVolumeUuid.replace("-", "--");
1340+
1341+
String snapshotDevice = vgNameEscaped + "-" + snapshotMd5Hash;
12321342

1233-
// Remove the COW (copy-on-write) volume: /dev/vgname/md5hash-cow
1234-
String cowLvPath = "/dev/" + vgName + "/" + md5Hash + "-cow";
1235-
Script removeCowCmd = new Script("/usr/sbin/lvremove", 30000, logger);
1236-
removeCowCmd.add("-f");
1237-
removeCowCmd.add(cowLvPath);
1343+
removeSnapshotDeviceMapperEntry(snapshotDevice);
12381344

1239-
String cowResult = removeCowCmd.execute();
1240-
if (cowResult != null) {
1241-
logger.warn("Failed to remove CLVM COW volume {} : {}",cowLvPath, cowResult);
1345+
removeCowVolume(vgName, snapshotMd5Hash);
1346+
1347+
if (isLastSnapshot) {
1348+
logger.info("Step 3: This is the last snapshot, restoring origin volume {} from snapshot-origin state", originVolumeUuid);
1349+
restoreOriginVolumeFromSnapshotState(vgName, originVolumeUuid, vgNameEscaped, originEscaped);
12421350
} else {
1243-
logger.debug("Successfully deleted CLVM snapshot COW volume: {}", cowLvPath);
1351+
logger.info("Step 3: Skipped - other snapshots still exist for volume {}", originVolumeUuid);
12441352
}
12451353

1354+
logger.info("Successfully cleaned up CLVM snapshot artifacts");
1355+
1356+
} catch (Exception ex) {kvmstoragep
1357+
logger.error("Exception during CLVM snapshot artifact cleanup: {}", ex.getMessage(), ex);
1358+
}
1359+
}
1360+
1361+
private void removeSnapshotDeviceMapperEntry(String snapshotDevice) {
1362+
logger.info("Step 1: Removing snapshot device-mapper entry: {}", snapshotDevice);
1363+
1364+
Script dmRemoveSnapshot = new Script("/usr/sbin/dmsetup", 10000, logger);
1365+
dmRemoveSnapshot.add("remove");
1366+
dmRemoveSnapshot.add(snapshotDevice);
1367+
1368+
logger.debug("Executing: dmsetup remove {}", snapshotDevice);
1369+
String dmResult = dmRemoveSnapshot.execute();
1370+
if (dmResult == null) {
1371+
logger.info("Successfully removed device-mapper entry: {}", snapshotDevice);
1372+
} else {
1373+
logger.debug("dmsetup remove returned: {} (may already be removed)", dmResult);
1374+
}
1375+
}
1376+
1377+
private void removeCowVolume(String vgName, String snapshotMd5Hash) {
1378+
String cowLvName = snapshotMd5Hash + "-cow";
1379+
String cowLvPath = "/dev/" + vgName + "/" + cowLvName;
1380+
logger.info("Step 2: Removing COW volume: {}", cowLvPath);
1381+
1382+
Script removeCow = new Script("/usr/sbin/lvremove", 10000, logger);
1383+
removeCow.add("-f");
1384+
removeCow.add(cowLvPath);
1385+
1386+
logger.debug("Executing: lvremove -f {}", cowLvPath);
1387+
String cowResult = removeCow.execute();
1388+
if (cowResult == null) {
1389+
logger.info("Successfully removed COW volume: {}", cowLvPath);
1390+
} else {
1391+
logger.warn("Failed to remove COW volume {}: {}", cowLvPath, cowResult);
1392+
}
1393+
}
1394+
1395+
/**
1396+
* Restore an origin volume from snapshot-origin state back to normal state.
1397+
* This removes the -real device and reconfigures the volume device-mapper entry.
1398+
* Should only be called when deleting the last snapshot of a volume.
1399+
*
1400+
* @param vgName The volume group name
1401+
* @param volumeUuid The volume UUID
1402+
* @param vgNameEscaped The VG name with hyphens doubled for device-mapper
1403+
* @param volumeEscaped The volume UUID with hyphens doubled for device-mapper
1404+
*/
1405+
private void restoreOriginVolumeFromSnapshotState(String vgName, String volumeUuid, String vgNameEscaped, String volumeEscaped) {
1406+
try {
1407+
String originDevice = vgNameEscaped + "-" + volumeEscaped;
1408+
String realDevice = originDevice + "-real";
1409+
1410+
logger.info("Restoring volume {} from snapshot-origin state", volumeUuid);
1411+
1412+
// Check if -real device exists
1413+
Script checkReal = new Script("/usr/sbin/dmsetup", 5000, logger);
1414+
checkReal.add("info");
1415+
checkReal.add(realDevice);
1416+
1417+
logger.debug("Checking if -real device exists: dmsetup info {}", realDevice);
1418+
String checkResult = checkReal.execute();
1419+
if (checkResult != null) {
1420+
logger.debug("No -real device found for {}, volume may already be in normal state", volumeUuid);
1421+
return;
1422+
}
1423+
1424+
logger.info("Found -real device, proceeding with restoration");
1425+
1426+
suspendOriginDevice(originDevice);
1427+
1428+
logger.debug("Getting device-mapper table from -real device");
1429+
Script getTable = new Script("/usr/sbin/dmsetup", 5000, logger);
1430+
getTable.add("table");
1431+
getTable.add(realDevice);
1432+
1433+
OutputInterpreter.AllLinesParser tableParser = new OutputInterpreter.AllLinesParser();
1434+
String tableResult = getTable.execute(tableParser);
1435+
String realTable = tableParser.getLines();
1436+
1437+
resumeAndRemoveRealDevice(originDevice, realDevice, tableResult, realTable, volumeUuid);
1438+
12461439
} catch (Exception ex) {
1247-
logger.error("Exception while deleting CLVM snapshot {}", snapshotPath, ex);
1440+
logger.error("Exception during volume restoration from snapshot-origin state: {}", ex.getMessage(), ex);
12481441
}
12491442
}
12501443

1444+
private void suspendOriginDevice(String originDevice) {
1445+
logger.debug("Suspending origin device: {}", originDevice);
1446+
Script suspendOrigin = new Script("/usr/sbin/dmsetup", 5000, logger);
1447+
suspendOrigin.add("suspend");
1448+
suspendOrigin.add(originDevice);
1449+
String suspendResult = suspendOrigin.execute();
1450+
if (suspendResult != null) {
1451+
logger.warn("Failed to suspend origin device {}: {}", originDevice, suspendResult);
1452+
}
1453+
}
1454+
1455+
private void resumeAndRemoveRealDevice(String originDevice, String realDevice, String tableResult, String realTable, String volumeUuid) {
1456+
if (tableResult == null && realTable != null && !realTable.isEmpty()) {
1457+
logger.debug("Restoring original table to origin device: {}", realTable);
1458+
1459+
Script loadTable = new Script("/bin/bash", 10000, logger);
1460+
loadTable.add("-c");
1461+
loadTable.add("echo '" + realTable + "' | /usr/sbin/dmsetup load " + originDevice);
1462+
1463+
String loadResult = loadTable.execute();
1464+
if (loadResult != null) {
1465+
logger.warn("Failed to load table to origin device: {}", loadResult);
1466+
}
1467+
1468+
logger.debug("Resuming origin device");
1469+
Script resumeOrigin = new Script("/usr/sbin/dmsetup", 5000, logger);
1470+
resumeOrigin.add("resume");
1471+
resumeOrigin.add(originDevice);
1472+
String resumeResult = resumeOrigin.execute();
1473+
if (resumeResult != null) {
1474+
logger.warn("Failed to resume origin device: {}", resumeResult);
1475+
}
1476+
1477+
logger.debug("Removing -real device");
1478+
Script removeReal = new Script("/usr/sbin/dmsetup", 5000, logger);
1479+
removeReal.add("remove");
1480+
removeReal.add(realDevice);
1481+
String removeResult = removeReal.execute();
1482+
if (removeResult == null) {
1483+
logger.info("Successfully removed -real device and restored origin volume {}", volumeUuid);
1484+
} else {
1485+
logger.warn("Failed to remove -real device: {}", removeResult);
1486+
}
1487+
} else {
1488+
logger.warn("Failed to get table from -real device, aborting restoration");
1489+
Script resumeOrigin = new Script("/usr/sbin/dmsetup", 5000, logger);
1490+
resumeOrigin.add("resume");
1491+
resumeOrigin.add(originDevice);
1492+
resumeOrigin.execute();
1493+
}
1494+
}
12511495
/**
12521496
* Compute MD5 hash of a string, matching what managesnapshot.sh does:
12531497
* echo "${snapshot}" | md5sum -t | awk '{ print $1 }'
@@ -1933,7 +2177,7 @@ public Answer createSnapshot(final CreateObjectCommand cmd) {
19332177
throw new CloudRuntimeException("VM is running, encrypted volume snapshots aren't supported");
19342178
}
19352179

1936-
if (StoragePoolType.CLVM.name().equals(primaryStore.getType())) {
2180+
if (StoragePoolType.CLVM == primaryStore.getPoolType()) {
19372181
throw new CloudRuntimeException("VM is running, live snapshots aren't supported with CLVM primary storage");
19382182
}
19392183
}
@@ -2972,6 +3216,25 @@ public Answer deleteSnapshot(final DeleteCommand cmd) {
29723216
if (snapshotTO.isKvmIncrementalSnapshot()) {
29733217
deleteCheckpoint(snapshotTO);
29743218
}
3219+
} else if (primaryPool.getType() == StoragePoolType.CLVM) {
3220+
// For CLVM, snapshots are typically already deleted from primary storage during backup
3221+
// via deleteSnapshotOnPrimary in the backupSnapshot finally block.
3222+
// This is called when the user explicitly deletes the snapshot via UI/API.
3223+
// We check if the snapshot still exists and clean it up if needed.
3224+
logger.info("Processing CLVM snapshot deletion (id={}, name={}, path={}) on primary storage",
3225+
snapshotTO.getId(), snapshotTO.getName(), snapshotTO.getPath());
3226+
3227+
String snapshotPath = snapshotTO.getPath();
3228+
if (snapshotPath != null && !snapshotPath.isEmpty()) {
3229+
boolean wasDeleted = deleteClvmSnapshot(snapshotPath, true);
3230+
if (wasDeleted) {
3231+
logger.info("Successfully cleaned up CLVM snapshot {} from primary storage", snapshotName);
3232+
} else {
3233+
logger.info("CLVM snapshot {} was already deleted from primary storage during backup, no cleanup needed", snapshotName);
3234+
}
3235+
} else {
3236+
logger.debug("CLVM snapshot path is null or empty, assuming already cleaned up");
3237+
}
29753238
} else {
29763239
logger.warn("Operation not implemented for storage pool type of " + primaryPool.getType().toString());
29773240
throw new InternalErrorException("Operation not implemented for storage pool type of " + primaryPool.getType().toString());

0 commit comments

Comments
 (0)