Skip to content

Commit

Permalink
HDDS-11137. Removed locks from SnapshotPurge and SnapshotSetProperty …
Browse files Browse the repository at this point in the history
…APIs
  • Loading branch information
hemantk-12 committed Aug 6, 2024
1 parent 24b6849 commit e85485d
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 113 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,7 @@
package org.apache.hadoop.ozone.om.request.snapshot;

import org.apache.commons.lang3.tuple.Pair;
import org.apache.commons.lang3.tuple.Triple;
import org.apache.hadoop.hdds.utils.db.Table;
import org.apache.hadoop.ozone.om.OMMetadataManager;
import org.apache.hadoop.ozone.om.OMMetrics;
import org.apache.hadoop.ozone.om.exceptions.OMException;
import org.apache.hadoop.ozone.om.response.snapshot.OMSnapshotPurgeResponse.OmPurgeResponse;
Expand All @@ -47,15 +45,11 @@

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.Objects;
import java.util.Set;
import java.util.UUID;

import static org.apache.hadoop.ozone.om.lock.OzoneManagerLock.Resource.SNAPSHOT_LOCK;

/**
* Handles OMSnapshotPurge Request.
* This is an OM internal request. Does not need @RequireSnapshotFeatureState.
Expand Down Expand Up @@ -118,19 +112,9 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIn
// 2. Update the deep clean flag for the next active snapshot (So that it can be
// deep cleaned by the KeyDeletingService in the next run),
// 3. Finally, purge the snapshot.
// All of these steps have to be performed only when it acquires all the necessary
// locks (lock on the snapshot to be purged, lock on the next active snapshot, and
// lock on the next path and global previous snapshots). Ideally, there is no need
// for locks for snapshot purge and can rely on OMStateMachine because OMStateMachine
// is going to process each request sequentially.
//
// But there is a problem with that. After filtering unnecessary SST files for a snapshot,
// SstFilteringService updates that snapshot's SstFilter flag. SstFilteringService cannot
// use SetSnapshotProperty API because it runs on each OM independently and One OM does
// not know if the snapshot has been filtered on the other OM in HA environment.
//
// If locks are not taken snapshot purge and SstFilteringService will cause a race condition
// and override one's update with another.
// There is no need to take lock for snapshot purge as of now. We can simply rely on OMStateMachine
// because it executes transaction sequentially.
private OmPurgeResponse purgeSnapshot(String snapshotKey,
OzoneManager ozoneManager,
long trxnLogIndex) throws IOException {
Expand All @@ -139,97 +123,58 @@ private OmPurgeResponse purgeSnapshot(String snapshotKey,
OmMetadataManagerImpl omMetadataManager = (OmMetadataManagerImpl) ozoneManager.getMetadataManager();
SnapshotChainManager snapshotChainManager = omMetadataManager.getSnapshotChainManager();

if (omMetadataManager.getSnapshotInfoTable().get(snapshotKey) == null) {
SnapshotInfo fromSnapshot = omMetadataManager.getSnapshotInfoTable().get(snapshotKey);
if (fromSnapshot == null) {
// Snapshot may have been purged in the previous iteration of SnapshotDeletingService.
throw new OMException("Snapshot: '" + snapshotKey + "}' is no longer exist " +
"in snapshot table. Might be removed in previous run.", OMException.ResultCodes.FILE_NOT_FOUND);
}

// To acquire all the locks, a set is maintained which is keyed by a triple of volumeName, bucketName and
// snapshotName. SnapshotInfoTable key (which is /volumeName/bucketName/snapshotName) is not directly
// because volumeName, bucketName and snapshotName can't be obtained after purging snapshot from cache.
// Once all the necessary locks are acquired, the three steps mentioned above are performed and
// locks are release after that.
Set<Triple<String, String, String>> lockSet = new HashSet<>(4, 1);

try {
acquireLock(lockSet, snapshotKey, omMetadataManager);

SnapshotInfo fromSnapshot = omMetadataManager.getSnapshotInfoTable().get(snapshotKey);
SnapshotInfo nextSnapshot =
SnapshotUtils.getNextActiveSnapshot(fromSnapshot, snapshotChainManager, omSnapshotManager);
SnapshotInfo nextSnapshot =
SnapshotUtils.getNextActiveSnapshot(fromSnapshot, snapshotChainManager, omSnapshotManager);

if (nextSnapshot != null) {
acquireLock(lockSet, nextSnapshot.getTableKey(), omMetadataManager);
}

// Step 1: Update the snapshot chain.
Pair<SnapshotInfo, SnapshotInfo> pathToGlobalSnapshotInto =
updateSnapshotChainAndCache(lockSet, omMetadataManager, fromSnapshot, trxnLogIndex);
SnapshotInfo nextPathSnapshotInfo = null;
SnapshotInfo nextGlobalSnapshotInfo = null;

if (pathToGlobalSnapshotInto != null) {
nextPathSnapshotInfo = pathToGlobalSnapshotInto.getLeft();
nextGlobalSnapshotInfo = pathToGlobalSnapshotInto.getRight();
}
// Step 1: Update the snapshot chain.
Pair<SnapshotInfo, SnapshotInfo> pathToGlobalSnapshotInto =
updateSnapshotChainAndCache(omMetadataManager, fromSnapshot, trxnLogIndex);
SnapshotInfo nextPathSnapshotInfo = null;
SnapshotInfo nextGlobalSnapshotInfo = null;

// Step 2: Update the deep clean flag for the next active snapshot
SnapshotInfo nextActiveSnapshotInfo = updateSnapshotInfoAndCache(nextSnapshot, omMetadataManager, trxnLogIndex);
if (pathToGlobalSnapshotInto != null) {
nextPathSnapshotInfo = pathToGlobalSnapshotInto.getLeft();
nextGlobalSnapshotInfo = pathToGlobalSnapshotInto.getRight();
}

// Remove and close snapshot's RocksDB instance from SnapshotCache.
ozoneManager.getOmSnapshotManager().invalidateCacheEntry(fromSnapshot.getSnapshotId());
// Step 2: Update the deep clean flag for the next active snapshot
SnapshotInfo nextActiveSnapshotInfo = updateSnapshotInfoAndCache(nextSnapshot, omMetadataManager, trxnLogIndex);

// Step 3: Purge the snapshot from SnapshotInfoTable cache.
ozoneManager.getMetadataManager().getSnapshotInfoTable()
.addCacheEntry(new CacheKey<>(fromSnapshot.getTableKey()), CacheValue.get(trxnLogIndex));
// Remove and close snapshot's RocksDB instance from SnapshotCache.
ozoneManager.getOmSnapshotManager().invalidateCacheEntry(fromSnapshot.getSnapshotId());

return new OmPurgeResponse(snapshotKey, nextPathSnapshotInfo, nextGlobalSnapshotInfo,
nextActiveSnapshotInfo);
} finally {
lockSet.forEach(lockKey -> omMetadataManager.getLock()
.releaseWriteLock(SNAPSHOT_LOCK, lockKey.getLeft(), lockKey.getMiddle(), lockKey.getRight()));
}
}
// Step 3: Purge the snapshot from SnapshotInfoTable cache.
ozoneManager.getMetadataManager().getSnapshotInfoTable()
.addCacheEntry(new CacheKey<>(fromSnapshot.getTableKey()), CacheValue.get(trxnLogIndex));

private void acquireLock(Set<Triple<String, String, String>> lockSet, String snapshotTableKey,
OMMetadataManager omMetadataManager) throws IOException {
SnapshotInfo snapshotInfo = omMetadataManager.getSnapshotInfoTable().get(snapshotTableKey);
return new OmPurgeResponse(snapshotKey, nextPathSnapshotInfo, nextGlobalSnapshotInfo,
nextActiveSnapshotInfo);

// It should not be the case that lock is required for non-existing snapshot.
if (snapshotInfo == null) {
LOG.error("Snapshot: '{}' doesn't not exist in snapshot table.", snapshotTableKey);
throw new OMException("Snapshot: '{" + snapshotTableKey + "}' doesn't not exist in snapshot table.",
OMException.ResultCodes.FILE_NOT_FOUND);
}
Triple<String, String, String> lockKey = Triple.of(snapshotInfo.getVolumeName(), snapshotInfo.getBucketName(),
snapshotInfo.getName());
if (!lockSet.contains(lockKey)) {
mergeOmLockDetails(omMetadataManager.getLock()
.acquireWriteLock(SNAPSHOT_LOCK, lockKey.getLeft(), lockKey.getMiddle(), lockKey.getRight()));
lockSet.add(lockKey);
}
}

private SnapshotInfo updateSnapshotInfoAndCache(SnapshotInfo snapInfo, OmMetadataManagerImpl omMetadataManager,
long trxnLogIndex) throws IOException {
long trxnLogIndex) {
if (snapInfo == null) {
return null;
}

// Fetch the latest value again after acquiring lock.
SnapshotInfo updatedSnapshotInfo = omMetadataManager.getSnapshotInfoTable().get(snapInfo.getTableKey());

// Setting next snapshot deep clean to false, Since the
// current snapshot is deleted. We can potentially
// reclaim more keys in the next snapshot.
updatedSnapshotInfo.setDeepClean(false);
snapInfo.setDeepClean(false);

// Update table cache first
omMetadataManager.getSnapshotInfoTable()
.addCacheEntry(new CacheKey<>(updatedSnapshotInfo.getTableKey()),
CacheValue.get(trxnLogIndex, updatedSnapshotInfo));
return updatedSnapshotInfo;
.addCacheEntry(new CacheKey<>(snapInfo.getTableKey()),
CacheValue.get(trxnLogIndex, snapInfo));
return snapInfo;
}

/**
Expand All @@ -239,7 +184,6 @@ private SnapshotInfo updateSnapshotInfoAndCache(SnapshotInfo snapInfo, OmMetadat
* update in DB.
*/
private Pair<SnapshotInfo, SnapshotInfo> updateSnapshotChainAndCache(
Set<Triple<String, String, String>> lockSet,
OmMetadataManagerImpl metadataManager,
SnapshotInfo snapInfo,
long trxnLogIndex
Expand Down Expand Up @@ -267,18 +211,12 @@ private Pair<SnapshotInfo, SnapshotInfo> updateSnapshotChainAndCache(
UUID nextPathSnapshotId = snapshotChainManager.nextPathSnapshot(
snapInfo.getSnapshotPath(), snapInfo.getSnapshotId());
nextPathSnapshotKey = snapshotChainManager.getTableKey(nextPathSnapshotId);

// Acquire lock from the snapshot
acquireLock(lockSet, nextPathSnapshotKey, metadataManager);
}

String nextGlobalSnapshotKey = null;
if (hasNextGlobalSnapshot) {
UUID nextGlobalSnapshotId = snapshotChainManager.nextGlobalSnapshot(snapInfo.getSnapshotId());
nextGlobalSnapshotKey = snapshotChainManager.getTableKey(nextGlobalSnapshotId);

// Acquire lock from the snapshot
acquireLock(lockSet, nextGlobalSnapshotKey, metadataManager);
}

SnapshotInfo nextPathSnapInfo =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
import java.io.IOException;

import static org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes.FILE_NOT_FOUND;
import static org.apache.hadoop.ozone.om.lock.OzoneManagerLock.Resource.SNAPSHOT_LOCK;

/**
* Updates the exclusive size of the snapshot.
Expand All @@ -63,13 +62,8 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIn
OzoneManagerProtocolProtos.SetSnapshotPropertyRequest
setSnapshotPropertyRequest = getOmRequest()
.getSetSnapshotPropertyRequest();
SnapshotInfo updatedSnapInfo = null;

String snapshotKey = setSnapshotPropertyRequest.getSnapshotKey();
boolean acquiredSnapshotLock = false;
String volumeName = null;
String bucketName = null;
String snapshotName = null;

try {
SnapshotInfo snapshotInfo = metadataManager.getSnapshotInfoTable().get(snapshotKey);
Expand All @@ -78,18 +72,7 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIn
throw new OMException("Snapshot: '{" + snapshotKey + "}' doesn't not exist in snapshot table.", FILE_NOT_FOUND);
}

volumeName = snapshotInfo.getVolumeName();
bucketName = snapshotInfo.getBucketName();
snapshotName = snapshotInfo.getName();

mergeOmLockDetails(metadataManager.getLock()
.acquireWriteLock(SNAPSHOT_LOCK, volumeName, bucketName, snapshotName));

acquiredSnapshotLock = getOmLockDetails().isLockAcquired();

updatedSnapInfo = metadataManager.getSnapshotInfoTable()
.get(snapshotKey);

SnapshotInfo updatedSnapInfo = metadataManager.getSnapshotInfoTable().get(snapshotKey);

if (setSnapshotPropertyRequest.hasDeepCleanedDeletedDir()) {
updatedSnapInfo.setDeepCleanedDeletedDir(setSnapshotPropertyRequest
Expand Down Expand Up @@ -127,10 +110,6 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIn
omMetrics.incNumSnapshotSetPropertyFails();
LOG.error("Failed to execute snapshotSetPropertyRequest: {{}}.", setSnapshotPropertyRequest, ex);
} finally {
if (acquiredSnapshotLock) {
mergeOmLockDetails(metadataManager.getLock()
.releaseWriteLock(SNAPSHOT_LOCK, volumeName, bucketName, snapshotName));
}
if (omClientResponse != null) {
omClientResponse.setOmLockDetails(getOmLockDetails());
}
Expand Down

0 comments on commit e85485d

Please sign in to comment.