Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HDDS-10590. [Snapshot] Synchronized snapshot purge, set snapshot property and SstFilteringService #6456

Merged
merged 3 commits into from
Apr 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -719,7 +719,7 @@ public SnapshotInfo copyObject() {
public String toString() {
return "SnapshotInfo{" +
"snapshotId: '" + snapshotId + '\'' +
", name: '" + name + "'," +
", name: '" + name + '\'' +
", volumeName: '" + volumeName + '\'' +
", bucketName: '" + bucketName + '\'' +
", snapshotStatus: '" + snapshotStatus + '\'' +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@

package org.apache.hadoop.ozone.om.request.snapshot;

import org.apache.commons.lang3.tuple.Triple;
import org.apache.hadoop.ozone.om.OMMetadataManager;
import org.apache.hadoop.ozone.om.exceptions.OMException;
import org.apache.ratis.server.protocol.TermIndex;
import org.apache.hadoop.hdds.utils.db.cache.CacheKey;
import org.apache.hadoop.hdds.utils.db.cache.CacheValue;
Expand All @@ -40,11 +43,15 @@

import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.UUID;

import static org.apache.hadoop.ozone.om.lock.OzoneManagerLock.Resource.SNAPSHOT_LOCK;

/**
* Handles OMSnapshotPurge Request.
* This is an OM internal request. Does not need @RequireSnapshotFeatureState.
Expand All @@ -60,6 +67,7 @@ public OMSnapshotPurgeRequest(OMRequest omRequest) {
@Override
public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIndex termIndex) {
final long trxnLogIndex = termIndex.getIndex();

OmSnapshotManager omSnapshotManager = ozoneManager.getOmSnapshotManager();
OmMetadataManagerImpl omMetadataManager = (OmMetadataManagerImpl)
ozoneManager.getMetadataManager();
Expand All @@ -80,33 +88,63 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIn
Map<String, SnapshotInfo> updatedPathPreviousAndGlobalSnapshots =
new HashMap<>();

// Snapshots that are purged by the SnapshotDeletingService
// will update the next snapshot so that is can be deep cleaned
// by the KeyDeletingService in the next run.
// Each snapshot purge operation does three things:
// 1. Update the snapshot chain,
// 2. Update the deep clean flag for the next active snapshot (So that it can be
// deep cleaned by the KeyDeletingService in the next run),
// 3. Finally, purge the snapshot.
// All of these steps have to be performed only when it acquires all the necessary
// locks (lock on the snapshot to be purged, lock on the next active snapshot, and
// lock on the next path and global previous snapshots). Ideally, there is no need
// for locks for snapshot purge and can rely on OMStateMachine because OMStateMachine
// is going to process each request sequentially.
//
// But there is a problem with that. After filtering unnecessary SST files for a snapshot,
// SstFilteringService updates that snapshot's SstFilter flag. SstFilteringService cannot
// use SetSnapshotProperty API because it runs on each OM independently and One OM does
// not know if the snapshot has been filtered on the other OM in HA environment.
//
// If locks are not taken snapshot purge and SstFilteringService will cause a race condition
// and override one's update with another.
for (String snapTableKey : snapshotDbKeys) {
SnapshotInfo fromSnapshot = omMetadataManager.getSnapshotInfoTable()
.get(snapTableKey);

if (fromSnapshot == null) {
// Snapshot may have been purged in the previous iteration of SnapshotDeletingService.
LOG.warn("The snapshot {} is not longer in snapshot table, It maybe removed in the previous " +
"Snapshot purge request.", snapTableKey);
continue;
}
// To acquire all the locks, a set is maintained which is keyed by snapshotTableKey.
// snapshotTableKey is nothing but /volumeName/bucketName/snapshotName.
// Once all the locks are acquired, it performs the three steps mentioned above and
// release all the locks after that.
Set<Triple<String, String, String>> lockSet = new HashSet<>(4, 1);
try {
if (omMetadataManager.getSnapshotInfoTable().get(snapTableKey) == null) {
// Snapshot may have been purged in the previous iteration of SnapshotDeletingService.
LOG.warn("The snapshot {} is not longer in snapshot table, It maybe removed in the previous " +
"Snapshot purge request.", snapTableKey);
continue;
}

acquireLock(lockSet, snapTableKey, omMetadataManager);
SnapshotInfo fromSnapshot = omMetadataManager.getSnapshotInfoTable().get(snapTableKey);

SnapshotInfo nextSnapshot = SnapshotUtils
.getNextActiveSnapshot(fromSnapshot,
snapshotChainManager, omSnapshotManager);

updateSnapshotInfoAndCache(nextSnapshot, omMetadataManager,
trxnLogIndex, updatedSnapInfos);
updateSnapshotChainAndCache(omMetadataManager, fromSnapshot,
trxnLogIndex, updatedPathPreviousAndGlobalSnapshots);
// Remove and close snapshot's RocksDB instance from SnapshotCache.
ozoneManager.getOmSnapshotManager().invalidateCacheEntry(fromSnapshot.getSnapshotId());
// Update SnapshotInfoTable cache.
ozoneManager.getMetadataManager().getSnapshotInfoTable()
.addCacheEntry(new CacheKey<>(fromSnapshot.getTableKey()), CacheValue.get(trxnLogIndex));
SnapshotInfo nextSnapshot =
SnapshotUtils.getNextActiveSnapshot(fromSnapshot, snapshotChainManager, omSnapshotManager);

if (nextSnapshot != null) {
acquireLock(lockSet, nextSnapshot.getTableKey(), omMetadataManager);
}

// Update the chain first so that it has all the necessary locks before updating deep clean.
updateSnapshotChainAndCache(lockSet, omMetadataManager, fromSnapshot, trxnLogIndex,
updatedPathPreviousAndGlobalSnapshots);
updateSnapshotInfoAndCache(nextSnapshot, omMetadataManager, trxnLogIndex, updatedSnapInfos);
// Remove and close snapshot's RocksDB instance from SnapshotCache.
omSnapshotManager.invalidateCacheEntry(fromSnapshot.getSnapshotId());
// Update SnapshotInfoTable cache.
omMetadataManager.getSnapshotInfoTable()
.addCacheEntry(new CacheKey<>(fromSnapshot.getTableKey()), CacheValue.get(trxnLogIndex));
} finally {
for (Triple<String, String, String> lockKey: lockSet) {
omMetadataManager.getLock()
.releaseWriteLock(SNAPSHOT_LOCK, lockKey.getLeft(), lockKey.getMiddle(), lockKey.getRight());
}
}
}

omClientResponse = new OMSnapshotPurgeResponse(omResponse.build(),
Expand All @@ -120,20 +158,41 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIn
return omClientResponse;
}

private void acquireLock(Set<Triple<String, String, String>> lockSet, String snapshotTableKey,
OMMetadataManager omMetadataManager) throws IOException {
SnapshotInfo snapshotInfo = omMetadataManager.getSnapshotInfoTable().get(snapshotTableKey);

// It should not be the case that lock is required for non-existing snapshot.
if (snapshotInfo == null) {
LOG.error("Snapshot: '{}' doesn't not exist in snapshot table.", snapshotTableKey);
throw new OMException("Snapshot: '{" + snapshotTableKey + "}' doesn't not exist in snapshot table.",
OMException.ResultCodes.FILE_NOT_FOUND);
}
Triple<String, String, String> lockKey = Triple.of(snapshotInfo.getVolumeName(), snapshotInfo.getBucketName(),
snapshotInfo.getName());
if (!lockSet.contains(lockKey)) {
mergeOmLockDetails(omMetadataManager.getLock()
.acquireWriteLock(SNAPSHOT_LOCK, lockKey.getLeft(), lockKey.getMiddle(), lockKey.getRight()));
lockSet.add(lockKey);
}
}

private void updateSnapshotInfoAndCache(SnapshotInfo snapInfo,
OmMetadataManagerImpl omMetadataManager, long trxnLogIndex,
Map<String, SnapshotInfo> updatedSnapInfos) {
Map<String, SnapshotInfo> updatedSnapInfos) throws IOException {
if (snapInfo != null) {
// Fetch the latest value again after acquiring lock.
SnapshotInfo updatedSnapshotInfo = omMetadataManager.getSnapshotInfoTable().get(snapInfo.getTableKey());

// Setting next snapshot deep clean to false, Since the
// current snapshot is deleted. We can potentially
// reclaim more keys in the next snapshot.
snapInfo.setDeepClean(false);
updatedSnapshotInfo.setDeepClean(false);

// Update table cache first
omMetadataManager.getSnapshotInfoTable().addCacheEntry(
new CacheKey<>(snapInfo.getTableKey()),
CacheValue.get(trxnLogIndex, snapInfo));
updatedSnapInfos.put(snapInfo.getTableKey(), snapInfo);
omMetadataManager.getSnapshotInfoTable().addCacheEntry(new CacheKey<>(updatedSnapshotInfo.getTableKey()),
CacheValue.get(trxnLogIndex, updatedSnapshotInfo));
updatedSnapInfos.put(updatedSnapshotInfo.getTableKey(), updatedSnapshotInfo);
}
}

Expand All @@ -144,6 +203,7 @@ private void updateSnapshotInfoAndCache(SnapshotInfo snapInfo,
* update in DB.
*/
private void updateSnapshotChainAndCache(
Set<Triple<String, String, String>> lockSet,
OmMetadataManagerImpl metadataManager,
SnapshotInfo snapInfo,
long trxnLogIndex,
Expand All @@ -155,7 +215,6 @@ private void updateSnapshotChainAndCache(

SnapshotChainManager snapshotChainManager = metadataManager
.getSnapshotChainManager();
SnapshotInfo nextPathSnapInfo = null;

// If the snapshot is deleted in the previous run, then the in-memory
// SnapshotChainManager might throw NoSuchElementException as the snapshot
Expand All @@ -171,58 +230,63 @@ private void updateSnapshotChainAndCache(
return;
}

// Updates next path snapshot's previous snapshot ID
String nextPathSnapshotKey = null;

if (hasNextPathSnapshot) {
UUID nextPathSnapshotId = snapshotChainManager.nextPathSnapshot(
snapInfo.getSnapshotPath(), snapInfo.getSnapshotId());

String snapshotTableKey = snapshotChainManager
nextPathSnapshotKey = snapshotChainManager
.getTableKey(nextPathSnapshotId);
nextPathSnapInfo = metadataManager.getSnapshotInfoTable()
.get(snapshotTableKey);
if (nextPathSnapInfo != null) {
nextPathSnapInfo.setPathPreviousSnapshotId(
snapInfo.getPathPreviousSnapshotId());
metadataManager.getSnapshotInfoTable().addCacheEntry(
new CacheKey<>(nextPathSnapInfo.getTableKey()),
CacheValue.get(trxnLogIndex, nextPathSnapInfo));
updatedPathPreviousAndGlobalSnapshots
.put(nextPathSnapInfo.getTableKey(), nextPathSnapInfo);
}

// Acquire lock from the snapshot
acquireLock(lockSet, nextPathSnapshotKey, metadataManager);
}

// Updates next global snapshot's previous snapshot ID
String nextGlobalSnapshotKey = null;
if (hasNextGlobalSnapshot) {
UUID nextGlobalSnapshotId =
snapshotChainManager.nextGlobalSnapshot(snapInfo.getSnapshotId());

String snapshotTableKey = snapshotChainManager
.getTableKey(nextGlobalSnapshotId);

SnapshotInfo nextGlobalSnapInfo = metadataManager.getSnapshotInfoTable()
.get(snapshotTableKey);
// If both next global and path snapshot are same, it may overwrite
// nextPathSnapInfo.setPathPreviousSnapshotID(), adding this check
// will prevent it.
if (nextGlobalSnapInfo != null && nextPathSnapInfo != null &&
nextGlobalSnapInfo.getSnapshotId().equals(
nextPathSnapInfo.getSnapshotId())) {
nextPathSnapInfo.setGlobalPreviousSnapshotId(
snapInfo.getGlobalPreviousSnapshotId());
metadataManager.getSnapshotInfoTable().addCacheEntry(
new CacheKey<>(nextPathSnapInfo.getTableKey()),
CacheValue.get(trxnLogIndex, nextPathSnapInfo));
updatedPathPreviousAndGlobalSnapshots
.put(nextPathSnapInfo.getTableKey(), nextPathSnapInfo);
} else if (nextGlobalSnapInfo != null) {
nextGlobalSnapInfo.setGlobalPreviousSnapshotId(
snapInfo.getGlobalPreviousSnapshotId());
metadataManager.getSnapshotInfoTable().addCacheEntry(
new CacheKey<>(nextGlobalSnapInfo.getTableKey()),
CacheValue.get(trxnLogIndex, nextGlobalSnapInfo));
updatedPathPreviousAndGlobalSnapshots
.put(nextGlobalSnapInfo.getTableKey(), nextGlobalSnapInfo);
}
UUID nextGlobalSnapshotId = snapshotChainManager.nextGlobalSnapshot(snapInfo.getSnapshotId());
nextGlobalSnapshotKey = snapshotChainManager.getTableKey(nextGlobalSnapshotId);

// Acquire lock from the snapshot
acquireLock(lockSet, nextGlobalSnapshotKey, metadataManager);
}

SnapshotInfo nextPathSnapInfo =
nextPathSnapshotKey != null ? metadataManager.getSnapshotInfoTable().get(nextPathSnapshotKey) : null;

SnapshotInfo nextGlobalSnapInfo =
nextGlobalSnapshotKey != null ? metadataManager.getSnapshotInfoTable().get(nextGlobalSnapshotKey) : null;

// Updates next path snapshot's previous snapshot ID
if (nextPathSnapInfo != null) {
nextPathSnapInfo.setPathPreviousSnapshotId(snapInfo.getPathPreviousSnapshotId());
metadataManager.getSnapshotInfoTable().addCacheEntry(
new CacheKey<>(nextPathSnapInfo.getTableKey()),
CacheValue.get(trxnLogIndex, nextPathSnapInfo));
updatedPathPreviousAndGlobalSnapshots
.put(nextPathSnapInfo.getTableKey(), nextPathSnapInfo);
}

// Updates next global snapshot's previous snapshot ID
// If both next global and path snapshot are same, it may overwrite
// nextPathSnapInfo.setPathPreviousSnapshotID(), adding this check
// will prevent it.
if (nextGlobalSnapInfo != null && nextPathSnapInfo != null &&
nextGlobalSnapInfo.getSnapshotId().equals(nextPathSnapInfo.getSnapshotId())) {
nextPathSnapInfo.setGlobalPreviousSnapshotId(snapInfo.getGlobalPreviousSnapshotId());
metadataManager.getSnapshotInfoTable().addCacheEntry(
new CacheKey<>(nextPathSnapInfo.getTableKey()),
CacheValue.get(trxnLogIndex, nextPathSnapInfo));
updatedPathPreviousAndGlobalSnapshots
.put(nextPathSnapInfo.getTableKey(), nextPathSnapInfo);
} else if (nextGlobalSnapInfo != null) {
nextGlobalSnapInfo.setGlobalPreviousSnapshotId(
snapInfo.getGlobalPreviousSnapshotId());
metadataManager.getSnapshotInfoTable().addCacheEntry(
new CacheKey<>(nextGlobalSnapInfo.getTableKey()),
CacheValue.get(trxnLogIndex, nextGlobalSnapInfo));
updatedPathPreviousAndGlobalSnapshots
.put(nextGlobalSnapInfo.getTableKey(), nextGlobalSnapInfo);
}

snapshotChainManager.deleteSnapshot(snapInfo);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@

import java.io.IOException;

import static org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes.INVALID_SNAPSHOT_ERROR;
import static org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes.FILE_NOT_FOUND;
import static org.apache.hadoop.ozone.om.lock.OzoneManagerLock.Resource.SNAPSHOT_LOCK;

/**
* Updates the exclusive size of the snapshot.
Expand All @@ -62,16 +63,31 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIn
.getSetSnapshotPropertyRequest();
SnapshotInfo updatedSnapInfo = null;

String snapshotKey = setSnapshotPropertyRequest.getSnapshotKey();
boolean acquiredSnapshotLock = false;
String volumeName = null;
String bucketName = null;
String snapshotName = null;

try {
String snapshotKey = setSnapshotPropertyRequest.getSnapshotKey();
SnapshotInfo snapshotInfo = metadataManager.getSnapshotInfoTable().get(snapshotKey);
if (snapshotInfo == null) {
LOG.error("Snapshot: '{}' doesn't not exist in snapshot table.", snapshotKey);
throw new OMException("Snapshot: '{" + snapshotKey + "}' doesn't not exist in snapshot table.", FILE_NOT_FOUND);
}

volumeName = snapshotInfo.getVolumeName();
bucketName = snapshotInfo.getBucketName();
snapshotName = snapshotInfo.getName();

mergeOmLockDetails(metadataManager.getLock()
.acquireWriteLock(SNAPSHOT_LOCK, volumeName, bucketName, snapshotName));

acquiredSnapshotLock = getOmLockDetails().isLockAcquired();

updatedSnapInfo = metadataManager.getSnapshotInfoTable()
.get(snapshotKey);

if (updatedSnapInfo == null) {
LOG.error("SnapshotInfo for Snapshot: {} is not found", snapshotKey);
throw new OMException("SnapshotInfo for Snapshot: " + snapshotKey +
" is not found", INVALID_SNAPSHOT_ERROR);
}

if (setSnapshotPropertyRequest.hasDeepCleanedDeletedDir()) {
updatedSnapInfo.setDeepCleanedDeletedDir(setSnapshotPropertyRequest
Expand Down Expand Up @@ -104,6 +120,14 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIn
} catch (IOException ex) {
omClientResponse = new OMSnapshotSetPropertyResponse(
createErrorOMResponse(omResponse, ex));
} finally {
if (acquiredSnapshotLock) {
mergeOmLockDetails(metadataManager.getLock()
.releaseWriteLock(SNAPSHOT_LOCK, volumeName, bucketName, snapshotName));
}
if (omClientResponse != null) {
omClientResponse.setOmLockDetails(getOmLockDetails());
}
}

return omClientResponse;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,9 @@ protected void addToDBBatch(OMMetadataManager omMetadataManager,

OmMetadataManagerImpl metadataManager = (OmMetadataManagerImpl)
omMetadataManager;
updateSnapInfo(metadataManager, batchOperation, updatedSnapInfos);
updateSnapInfo(metadataManager, batchOperation,
updatedPreviousAndGlobalSnapInfos);
updateSnapInfo(metadataManager, batchOperation, updatedSnapInfos);
for (String dbKey: snapshotDbKeys) {
// Skip the cache here because snapshot is purged from cache in OMSnapshotPurgeRequest.
SnapshotInfo snapshotInfo = omMetadataManager
Expand Down