Skip to content

Commit

Permalink
HDDS-10590. [Snapshot] Synchronized snapshot purge, set snapshot prop…
Browse files Browse the repository at this point in the history
…erty and SstFilteringService (apache#6456)

(cherry picked from commit 7da5ecb)
  • Loading branch information
hemantk-12 authored and xichen01 committed Apr 23, 2024
1 parent 2f002cd commit 190dcde
Show file tree
Hide file tree
Showing 4 changed files with 174 additions and 87 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -761,7 +761,7 @@ public SnapshotInfo copyObject() {
public String toString() {
return "SnapshotInfo{" +
"snapshotId: '" + snapshotId + '\'' +
", name: '" + name + "'," +
", name: '" + name + '\'' +
", volumeName: '" + volumeName + '\'' +
", bucketName: '" + bucketName + '\'' +
", snapshotStatus: '" + snapshotStatus + '\'' +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@

package org.apache.hadoop.ozone.om.request.snapshot;

import org.apache.commons.lang3.tuple.Triple;
import org.apache.hadoop.ozone.om.OMMetadataManager;
import org.apache.hadoop.ozone.om.exceptions.OMException;
import org.apache.ratis.server.protocol.TermIndex;
import org.apache.hadoop.hdds.utils.db.cache.CacheKey;
import org.apache.hadoop.hdds.utils.db.cache.CacheValue;
import org.apache.hadoop.ozone.om.OmMetadataManagerImpl;
Expand All @@ -39,11 +43,15 @@

import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.UUID;

import static org.apache.hadoop.ozone.om.lock.OzoneManagerLock.Resource.SNAPSHOT_LOCK;

/**
* Handles OMSnapshotPurge Request.
* This is an OM internal request. Does not need @RequireSnapshotFeatureState.
Expand Down Expand Up @@ -79,34 +87,63 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager,
Map<String, SnapshotInfo> updatedPathPreviousAndGlobalSnapshots =
new HashMap<>();

// Snapshots that are purged by the SnapshotDeletingService
// will update the next snapshot so that is can be deep cleaned
// by the KeyDeletingService in the next run.
// Each snapshot purge operation does three things:
// 1. Update the snapshot chain,
// 2. Update the deep clean flag for the next active snapshot (So that it can be
// deep cleaned by the KeyDeletingService in the next run),
// 3. Finally, purge the snapshot.
// All of these steps have to be performed only when it acquires all the necessary
// locks (lock on the snapshot to be purged, lock on the next active snapshot, and
// lock on the next path and global previous snapshots). Ideally, there is no need
// for locks for snapshot purge and can rely on OMStateMachine because OMStateMachine
// is going to process each request sequentially.
//
// But there is a problem with that. After filtering unnecessary SST files for a snapshot,
// SstFilteringService updates that snapshot's SstFilter flag. SstFilteringService cannot
// use SetSnapshotProperty API because it runs on each OM independently and One OM does
// not know if the snapshot has been filtered on the other OM in HA environment.
//
// If locks are not taken snapshot purge and SstFilteringService will cause a race condition
// and override one's update with another.
for (String snapTableKey : snapshotDbKeys) {
SnapshotInfo fromSnapshot = omMetadataManager.getSnapshotInfoTable()
.get(snapTableKey);

if (fromSnapshot == null) {
// Snapshot may have been purged in the previous iteration of SnapshotDeletingService.
LOG.warn("The snapshot {} is not longer in snapshot table, It maybe removed in the previous " +
"Snapshot purge request.", snapTableKey);
continue;
}
// To acquire all the locks, a set is maintained which is keyed by snapshotTableKey.
// snapshotTableKey is nothing but /volumeName/bucketName/snapshotName.
// Once all the locks are acquired, it performs the three steps mentioned above and
// release all the locks after that.
Set<Triple<String, String, String>> lockSet = new HashSet<>(4, 1);
try {
if (omMetadataManager.getSnapshotInfoTable().get(snapTableKey) == null) {
// Snapshot may have been purged in the previous iteration of SnapshotDeletingService.
LOG.warn("The snapshot {} is not longer in snapshot table, It maybe removed in the previous " +
"Snapshot purge request.", snapTableKey);
continue;
}

acquireLock(lockSet, snapTableKey, omMetadataManager);
SnapshotInfo fromSnapshot = omMetadataManager.getSnapshotInfoTable().get(snapTableKey);

SnapshotInfo nextSnapshot =
SnapshotUtils.getNextActiveSnapshot(fromSnapshot, snapshotChainManager, omSnapshotManager);

SnapshotInfo nextSnapshot = SnapshotUtils
.getNextActiveSnapshot(fromSnapshot,
snapshotChainManager, omSnapshotManager);

updateSnapshotInfoAndCache(nextSnapshot, omMetadataManager,
trxnLogIndex, updatedSnapInfos);
updateSnapshotChainAndCache(omMetadataManager, fromSnapshot,
trxnLogIndex, updatedPathPreviousAndGlobalSnapshots);
// Remove and close snapshot's RocksDB instance from SnapshotCache.
ozoneManager.getOmSnapshotManager().getSnapshotCache()
.invalidate(snapTableKey);
// Update SnapshotInfoTable cache.
ozoneManager.getMetadataManager().getSnapshotInfoTable()
.addCacheEntry(new CacheKey<>(fromSnapshot.getTableKey()), CacheValue.get(trxnLogIndex));
if (nextSnapshot != null) {
acquireLock(lockSet, nextSnapshot.getTableKey(), omMetadataManager);
}

// Update the chain first so that it has all the necessary locks before updating deep clean.
updateSnapshotChainAndCache(lockSet, omMetadataManager, fromSnapshot, trxnLogIndex,
updatedPathPreviousAndGlobalSnapshots);
updateSnapshotInfoAndCache(nextSnapshot, omMetadataManager, trxnLogIndex, updatedSnapInfos);
// Remove and close snapshot's RocksDB instance from SnapshotCache.
omSnapshotManager.getSnapshotCache().invalidate(snapTableKey);
// Update SnapshotInfoTable cache.
omMetadataManager.getSnapshotInfoTable()
.addCacheEntry(new CacheKey<>(fromSnapshot.getTableKey()), CacheValue.get(trxnLogIndex));
} finally {
for (Triple<String, String, String> lockKey: lockSet) {
omMetadataManager.getLock()
.releaseWriteLock(SNAPSHOT_LOCK, lockKey.getLeft(), lockKey.getMiddle(), lockKey.getRight());
}
}
}

omClientResponse = new OMSnapshotPurgeResponse(omResponse.build(),
Expand All @@ -120,20 +157,41 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager,
return omClientResponse;
}

private void acquireLock(Set<Triple<String, String, String>> lockSet, String snapshotTableKey,
OMMetadataManager omMetadataManager) throws IOException {
SnapshotInfo snapshotInfo = omMetadataManager.getSnapshotInfoTable().get(snapshotTableKey);

// It should not be the case that lock is required for non-existing snapshot.
if (snapshotInfo == null) {
LOG.error("Snapshot: '{}' doesn't not exist in snapshot table.", snapshotTableKey);
throw new OMException("Snapshot: '{" + snapshotTableKey + "}' doesn't not exist in snapshot table.",
OMException.ResultCodes.FILE_NOT_FOUND);
}
Triple<String, String, String> lockKey = Triple.of(snapshotInfo.getVolumeName(), snapshotInfo.getBucketName(),
snapshotInfo.getName());
if (!lockSet.contains(lockKey)) {
mergeOmLockDetails(omMetadataManager.getLock()
.acquireWriteLock(SNAPSHOT_LOCK, lockKey.getLeft(), lockKey.getMiddle(), lockKey.getRight()));
lockSet.add(lockKey);
}
}

private void updateSnapshotInfoAndCache(SnapshotInfo snapInfo,
OmMetadataManagerImpl omMetadataManager, long trxnLogIndex,
Map<String, SnapshotInfo> updatedSnapInfos) {
Map<String, SnapshotInfo> updatedSnapInfos) throws IOException {
if (snapInfo != null) {
// Fetch the latest value again after acquiring lock.
SnapshotInfo updatedSnapshotInfo = omMetadataManager.getSnapshotInfoTable().get(snapInfo.getTableKey());

// Setting next snapshot deep clean to false, Since the
// current snapshot is deleted. We can potentially
// reclaim more keys in the next snapshot.
snapInfo.setDeepClean(false);
updatedSnapshotInfo.setDeepClean(false);

// Update table cache first
omMetadataManager.getSnapshotInfoTable().addCacheEntry(
new CacheKey<>(snapInfo.getTableKey()),
CacheValue.get(trxnLogIndex, snapInfo));
updatedSnapInfos.put(snapInfo.getTableKey(), snapInfo);
omMetadataManager.getSnapshotInfoTable().addCacheEntry(new CacheKey<>(updatedSnapshotInfo.getTableKey()),
CacheValue.get(trxnLogIndex, updatedSnapshotInfo));
updatedSnapInfos.put(updatedSnapshotInfo.getTableKey(), updatedSnapshotInfo);
}
}

Expand All @@ -144,6 +202,7 @@ private void updateSnapshotInfoAndCache(SnapshotInfo snapInfo,
* update in DB.
*/
private void updateSnapshotChainAndCache(
Set<Triple<String, String, String>> lockSet,
OmMetadataManagerImpl metadataManager,
SnapshotInfo snapInfo,
long trxnLogIndex,
Expand All @@ -155,7 +214,6 @@ private void updateSnapshotChainAndCache(

SnapshotChainManager snapshotChainManager = metadataManager
.getSnapshotChainManager();
SnapshotInfo nextPathSnapInfo = null;

// If the snapshot is deleted in the previous run, then the in-memory
// SnapshotChainManager might throw NoSuchElementException as the snapshot
Expand All @@ -171,58 +229,63 @@ private void updateSnapshotChainAndCache(
return;
}

// Updates next path snapshot's previous snapshot ID
String nextPathSnapshotKey = null;

if (hasNextPathSnapshot) {
UUID nextPathSnapshotId = snapshotChainManager.nextPathSnapshot(
snapInfo.getSnapshotPath(), snapInfo.getSnapshotId());

String snapshotTableKey = snapshotChainManager
nextPathSnapshotKey = snapshotChainManager
.getTableKey(nextPathSnapshotId);
nextPathSnapInfo = metadataManager.getSnapshotInfoTable()
.get(snapshotTableKey);
if (nextPathSnapInfo != null) {
nextPathSnapInfo.setPathPreviousSnapshotId(
snapInfo.getPathPreviousSnapshotId());
metadataManager.getSnapshotInfoTable().addCacheEntry(
new CacheKey<>(nextPathSnapInfo.getTableKey()),
CacheValue.get(trxnLogIndex, nextPathSnapInfo));
updatedPathPreviousAndGlobalSnapshots
.put(nextPathSnapInfo.getTableKey(), nextPathSnapInfo);
}

// Acquire lock from the snapshot
acquireLock(lockSet, nextPathSnapshotKey, metadataManager);
}

// Updates next global snapshot's previous snapshot ID
String nextGlobalSnapshotKey = null;
if (hasNextGlobalSnapshot) {
UUID nextGlobalSnapshotId =
snapshotChainManager.nextGlobalSnapshot(snapInfo.getSnapshotId());

String snapshotTableKey = snapshotChainManager
.getTableKey(nextGlobalSnapshotId);

SnapshotInfo nextGlobalSnapInfo = metadataManager.getSnapshotInfoTable()
.get(snapshotTableKey);
// If both next global and path snapshot are same, it may overwrite
// nextPathSnapInfo.setPathPreviousSnapshotID(), adding this check
// will prevent it.
if (nextGlobalSnapInfo != null && nextPathSnapInfo != null &&
nextGlobalSnapInfo.getSnapshotId().equals(
nextPathSnapInfo.getSnapshotId())) {
nextPathSnapInfo.setGlobalPreviousSnapshotId(
snapInfo.getGlobalPreviousSnapshotId());
metadataManager.getSnapshotInfoTable().addCacheEntry(
new CacheKey<>(nextPathSnapInfo.getTableKey()),
CacheValue.get(trxnLogIndex, nextPathSnapInfo));
updatedPathPreviousAndGlobalSnapshots
.put(nextPathSnapInfo.getTableKey(), nextPathSnapInfo);
} else if (nextGlobalSnapInfo != null) {
nextGlobalSnapInfo.setGlobalPreviousSnapshotId(
snapInfo.getGlobalPreviousSnapshotId());
metadataManager.getSnapshotInfoTable().addCacheEntry(
new CacheKey<>(nextGlobalSnapInfo.getTableKey()),
CacheValue.get(trxnLogIndex, nextGlobalSnapInfo));
updatedPathPreviousAndGlobalSnapshots
.put(nextGlobalSnapInfo.getTableKey(), nextGlobalSnapInfo);
}
UUID nextGlobalSnapshotId = snapshotChainManager.nextGlobalSnapshot(snapInfo.getSnapshotId());
nextGlobalSnapshotKey = snapshotChainManager.getTableKey(nextGlobalSnapshotId);

// Acquire lock from the snapshot
acquireLock(lockSet, nextGlobalSnapshotKey, metadataManager);
}

SnapshotInfo nextPathSnapInfo =
nextPathSnapshotKey != null ? metadataManager.getSnapshotInfoTable().get(nextPathSnapshotKey) : null;

SnapshotInfo nextGlobalSnapInfo =
nextGlobalSnapshotKey != null ? metadataManager.getSnapshotInfoTable().get(nextGlobalSnapshotKey) : null;

// Updates next path snapshot's previous snapshot ID
if (nextPathSnapInfo != null) {
nextPathSnapInfo.setPathPreviousSnapshotId(snapInfo.getPathPreviousSnapshotId());
metadataManager.getSnapshotInfoTable().addCacheEntry(
new CacheKey<>(nextPathSnapInfo.getTableKey()),
CacheValue.get(trxnLogIndex, nextPathSnapInfo));
updatedPathPreviousAndGlobalSnapshots
.put(nextPathSnapInfo.getTableKey(), nextPathSnapInfo);
}

// Updates next global snapshot's previous snapshot ID
// If both next global and path snapshot are same, it may overwrite
// nextPathSnapInfo.setPathPreviousSnapshotID(), adding this check
// will prevent it.
if (nextGlobalSnapInfo != null && nextPathSnapInfo != null &&
nextGlobalSnapInfo.getSnapshotId().equals(nextPathSnapInfo.getSnapshotId())) {
nextPathSnapInfo.setGlobalPreviousSnapshotId(snapInfo.getGlobalPreviousSnapshotId());
metadataManager.getSnapshotInfoTable().addCacheEntry(
new CacheKey<>(nextPathSnapInfo.getTableKey()),
CacheValue.get(trxnLogIndex, nextPathSnapInfo));
updatedPathPreviousAndGlobalSnapshots
.put(nextPathSnapInfo.getTableKey(), nextPathSnapInfo);
} else if (nextGlobalSnapInfo != null) {
nextGlobalSnapInfo.setGlobalPreviousSnapshotId(
snapInfo.getGlobalPreviousSnapshotId());
metadataManager.getSnapshotInfoTable().addCacheEntry(
new CacheKey<>(nextGlobalSnapInfo.getTableKey()),
CacheValue.get(trxnLogIndex, nextGlobalSnapInfo));
updatedPathPreviousAndGlobalSnapshots
.put(nextGlobalSnapInfo.getTableKey(), nextGlobalSnapInfo);
}

snapshotChainManager.deleteSnapshot(snapInfo);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@

import java.io.IOException;

import static org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes.INVALID_SNAPSHOT_ERROR;
import static org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes.FILE_NOT_FOUND;
import static org.apache.hadoop.ozone.om.lock.OzoneManagerLock.Resource.SNAPSHOT_LOCK;

/**
* Updates the exclusive size of the snapshot.
Expand All @@ -62,16 +63,31 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager,
.getSetSnapshotPropertyRequest();
SnapshotInfo updatedSnapInfo = null;

String snapshotKey = setSnapshotPropertyRequest.getSnapshotKey();
boolean acquiredSnapshotLock = false;
String volumeName = null;
String bucketName = null;
String snapshotName = null;

try {
String snapshotKey = setSnapshotPropertyRequest.getSnapshotKey();
SnapshotInfo snapshotInfo = metadataManager.getSnapshotInfoTable().get(snapshotKey);
if (snapshotInfo == null) {
LOG.error("Snapshot: '{}' doesn't not exist in snapshot table.", snapshotKey);
throw new OMException("Snapshot: '{" + snapshotKey + "}' doesn't not exist in snapshot table.", FILE_NOT_FOUND);
}

volumeName = snapshotInfo.getVolumeName();
bucketName = snapshotInfo.getBucketName();
snapshotName = snapshotInfo.getName();

mergeOmLockDetails(metadataManager.getLock()
.acquireWriteLock(SNAPSHOT_LOCK, volumeName, bucketName, snapshotName));

acquiredSnapshotLock = getOmLockDetails().isLockAcquired();

updatedSnapInfo = metadataManager.getSnapshotInfoTable()
.get(snapshotKey);

if (updatedSnapInfo == null) {
LOG.error("SnapshotInfo for Snapshot: {} is not found", snapshotKey);
throw new OMException("SnapshotInfo for Snapshot: " + snapshotKey +
" is not found", INVALID_SNAPSHOT_ERROR);
}

if (setSnapshotPropertyRequest.hasDeepCleanedDeletedDir()) {
updatedSnapInfo.setDeepCleanedDeletedDir(setSnapshotPropertyRequest
Expand Down Expand Up @@ -104,6 +120,14 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager,
} catch (IOException ex) {
omClientResponse = new OMSnapshotSetPropertyResponse(
createErrorOMResponse(omResponse, ex));
} finally {
if (acquiredSnapshotLock) {
mergeOmLockDetails(metadataManager.getLock()
.releaseWriteLock(SNAPSHOT_LOCK, volumeName, bucketName, snapshotName));
}
if (omClientResponse != null) {
omClientResponse.setOmLockDetails(getOmLockDetails());
}
}

return omClientResponse;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,9 @@ protected void addToDBBatch(OMMetadataManager omMetadataManager,

OmMetadataManagerImpl metadataManager = (OmMetadataManagerImpl)
omMetadataManager;
updateSnapInfo(metadataManager, batchOperation, updatedSnapInfos);
updateSnapInfo(metadataManager, batchOperation,
updatedPreviousAndGlobalSnapInfos);
updateSnapInfo(metadataManager, batchOperation, updatedSnapInfos);
for (String dbKey: snapshotDbKeys) {
// Skip the cache here because snapshot is purged from cache in OMSnapshotPurgeRequest.
SnapshotInfo snapshotInfo = omMetadataManager
Expand Down

0 comments on commit 190dcde

Please sign in to comment.