Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HDDS-11408. Snapshot rename table entries are propagated incorrectly on snapshot deletes #7200

Merged
merged 38 commits into from
Sep 20, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
0335ed2
HDDS-11408. Snapshot rename table entries are propogated incorrectly …
swamirishi Sep 13, 2024
53a5b5d
HDDS-11408. Add test cases
swamirishi Sep 16, 2024
87ea520
HDDS-11408. Address review comments
swamirishi Sep 16, 2024
6675538
HDDS-11408. Address review comments
swamirishi Sep 16, 2024
96c3daa
HDDS-11408. Address review comments
swamirishi Sep 16, 2024
535a6cd
HDDS-11408. Address review comments
swamirishi Sep 16, 2024
7d49b64
HDDS-11408. Address review comments
swamirishi Sep 17, 2024
f79c5c7
HDDS-11408. Address review comments
swamirishi Sep 17, 2024
7124b1e
HDDS-11408. Address review comments
swamirishi Sep 17, 2024
ae0085c
HDDS-11408. Add test cases
swamirishi Sep 17, 2024
5fb5d8c
HDDS-11408. Add test cases
swamirishi Sep 17, 2024
67b5a05
HDDS-11408. Add race condition test case
swamirishi Sep 18, 2024
4b724fe
HDDS-11408. fix findbugs
swamirishi Sep 18, 2024
e2f0461
HDDS-11408. Address review comments
swamirishi Sep 18, 2024
8600803
HDDS-11408. Address review comments
swamirishi Sep 18, 2024
8b477b6
HDDS-11408. Address review comments
swamirishi Sep 18, 2024
09b9523
HDDS-11408. Address review comments
swamirishi Sep 18, 2024
7ee74dd
HDDS-11408. Address review comments
swamirishi Sep 18, 2024
da7701f
HDDS-11408. Address review comments
swamirishi Sep 18, 2024
b7380e1
HDDS-11408. Address review comments
swamirishi Sep 18, 2024
e31c153
HDDS-11408. Address review comments
swamirishi Sep 18, 2024
f6104fd
HDDS-11408. Address review comments
swamirishi Sep 18, 2024
bc4de22
HDDS-11408. Address review comments
swamirishi Sep 18, 2024
57d665a
HDDS-11408. Address review comments
swamirishi Sep 18, 2024
075441b
HDDS-11408. Address review comments
swamirishi Sep 19, 2024
c3a11a5
HDDS-11408. Address review comments
swamirishi Sep 19, 2024
bd28a4c
HDDS-11408. Address review comments
swamirishi Sep 19, 2024
1fd5f1d
HDDS-11408. Address review comments
swamirishi Sep 19, 2024
d1c0a77
HDDS-11408. Debug test failure
swamirishi Sep 19, 2024
0d0e3e7
HDDS-11408. Debug test failure
swamirishi Sep 19, 2024
554e452
HDDS-11408. Debug test failure
swamirishi Sep 19, 2024
1aa55ff
HDDS-11408. Debug test failure
swamirishi Sep 19, 2024
364663c
HDDS-11408. Debug test failure
swamirishi Sep 19, 2024
9af1e99
HDDS-11408. Add log
swamirishi Sep 19, 2024
0a8183d
HDDS-11408. Add log
swamirishi Sep 19, 2024
73572ad
HDDS-11408. Add log
swamirishi Sep 19, 2024
063f7b7
HDDS-11408. Address review comments
swamirishi Sep 19, 2024
302491d
HDDS-11408. Address review comments
swamirishi Sep 20, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ enum Type {
GetServerDefaults = 134;
GetQuotaRepairStatus = 135;
StartQuotaRepair = 136;
SnapshotMoveTableKeys = 137;
}

enum SafeMode {
Expand Down Expand Up @@ -295,6 +296,7 @@ message OMRequest {
optional ServerDefaultsRequest ServerDefaultsRequest = 132;
optional GetQuotaRepairStatusRequest GetQuotaRepairStatusRequest = 133;
optional StartQuotaRepairRequest StartQuotaRepairRequest = 134;
optional SnapshotMoveTableKeysRequest SnapshotMoveTableKeysRequest = 135;
}

message OMResponse {
Expand Down Expand Up @@ -1981,6 +1983,13 @@ message SnapshotMoveDeletedKeysRequest {
repeated string deletedDirsToMove = 5;
}

message SnapshotMoveTableKeysRequest {
required hadoop.hdds.UUID fromSnapshotID = 1;
repeated SnapshotMoveKeyInfos deletedKeys = 2;
repeated SnapshotMoveKeyInfos deletedDirs = 3;
repeated hadoop.hdds.KeyValue renamedKeys = 4;
}

message SnapshotMoveKeyInfos {
optional string key = 1;
repeated KeyInfo keyInfos = 2;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,22 @@ public interface OMMetadataManager extends DBStoreHAManager {
*/
String getBucketKey(String volume, String bucket);

/**
* Given a volume and bucket, return the corresponding DB key prefix.
*
* @param volume - Volume name
* @param bucket - Bucket name
*/
String getBucketKeyPrefix(String volume, String bucket);

/**
* Given a volume and bucket, return the corresponding DB key prefix for FSO buckets.
*
* @param volume - Volume name
* @param bucket - Bucket name
*/
String getBucketKeyPrefixFSO(String volume, String bucket) throws IOException;

/**
* Given a volume, bucket and a key, return the corresponding DB key.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.utils.db.Table;
import org.apache.hadoop.hdds.utils.db.TableIterator;
import org.apache.hadoop.ozone.common.BlockGroup;
import org.apache.hadoop.ozone.om.exceptions.OMException;
import org.apache.hadoop.ozone.om.helpers.BucketLayout;
Expand All @@ -28,13 +29,15 @@
import org.apache.hadoop.ozone.om.helpers.OmMultipartUploadListParts;
import org.apache.hadoop.ozone.om.fs.OzoneManagerFS;
import org.apache.hadoop.hdds.utils.BackgroundService;
import org.apache.hadoop.ozone.om.service.DirectoryDeletingService;
import org.apache.hadoop.ozone.om.service.KeyDeletingService;
import org.apache.hadoop.ozone.om.service.SnapshotDeletingService;
import org.apache.hadoop.ozone.om.service.SnapshotDirectoryCleaningService;
import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.ExpiredMultipartUploadsBucket;

import java.io.IOException;
import java.time.Duration;
import java.util.ArrayList;
import java.util.List;

/**
Expand Down Expand Up @@ -119,6 +122,29 @@ ListKeysResult listKeys(String volumeName, String bucketName, String startKey,
*/
PendingKeysDeletion getPendingDeletionKeys(int count) throws IOException;

/**
* Returns a list rename entries from the snapshotRenamedTable.
*
* @param count max number of keys to return.
* @return a Pair of list of {@link org.apache.hadoop.hdds.utils.db.Table.KeyValue} representing the keys in the
* underlying metadataManager.
* @throws IOException
*/
List<Table.KeyValue<String, String>> getRenamesKeyEntries(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Is there a reason to return the List of Pairs and not the Map? If it is just to maintain the order, LinkedHashMap could be used. I feel that the map improves the readability.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is no reason to creating another map since we are anyways going iterate over it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not asking to create another map. I'm saying the return type could be a map rather than a list of pairs.

String volume, String bucket, String startKey, int count) throws IOException;


/**
* Returns a list rename entries from the snapshotRenamedTable.
swamirishi marked this conversation as resolved.
Show resolved Hide resolved
*
* @param count max number of keys to return.
* @return a Pair of list of {@link org.apache.hadoop.hdds.utils.db.Table.KeyValue} representing the keys in the
* underlying metadataManager.
* @throws IOException
*/
List<Table.KeyValue<String, List<OmKeyInfo>>> getDeletedKeyEntries(
String volume, String bucket, String startKey, int count) throws IOException;

/**
* Returns the names of up to {@code count} open keys whose age is
* greater than or equal to {@code expireThreshold}.
Expand Down Expand Up @@ -216,6 +242,26 @@ OmMultipartUploadListParts listParts(String volumeName, String bucketName,
*/
Table.KeyValue<String, OmKeyInfo> getPendingDeletionDir() throws IOException;

/**
* Returns an iterator for pending deleted directories.
* @throws IOException
*/
TableIterator<String, ? extends Table.KeyValue<String, OmKeyInfo>> getDeletedDirEntries(
String volume, String bucket) throws IOException;

default List<Table.KeyValue<String, OmKeyInfo>> getDeletedDirEntries(String volume, String bucket, int count)
throws IOException {
List<Table.KeyValue<String, OmKeyInfo>> deletedDirEntries = new ArrayList<>(count);
try (TableIterator<String, ? extends Table.KeyValue<String, OmKeyInfo>> iterator =
getDeletedDirEntries(volume, bucket)) {
while (deletedDirEntries.size() < count && iterator.hasNext()) {
Table.KeyValue<String, OmKeyInfo> kv = iterator.next();
deletedDirEntries.add(Table.newKeyValue(kv.getKey(), kv.getValue()));
}
return deletedDirEntries;
}
}

/**
* Returns all sub directories under the given parent directory.
*
Expand Down Expand Up @@ -243,7 +289,7 @@ List<OmKeyInfo> getPendingDeletionSubFiles(long volumeId,
* Returns the instance of Directory Deleting Service.
* @return Background service.
*/
BackgroundService getDirDeletingService();
DirectoryDeletingService getDirDeletingService();

/**
* Returns the instance of Open Key Cleanup Service.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import java.util.Stack;
import java.util.TreeMap;
import java.util.concurrent.TimeUnit;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;

Expand Down Expand Up @@ -86,6 +87,7 @@
import org.apache.hadoop.ozone.om.helpers.OzoneFSUtils;
import org.apache.hadoop.ozone.om.helpers.OzoneFileStatus;
import org.apache.hadoop.ozone.om.helpers.BucketLayout;
import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo;
import org.apache.hadoop.ozone.om.request.OMClientRequest;
import org.apache.hadoop.ozone.om.request.file.OMFileRequest;
import org.apache.hadoop.ozone.om.request.util.OMMultipartUploadUtils;
Expand Down Expand Up @@ -189,7 +191,7 @@ public class KeyManagerImpl implements KeyManager {

private final KeyProviderCryptoExtension kmsProvider;
private final boolean enableFileSystemPaths;
private BackgroundService dirDeletingService;
private DirectoryDeletingService dirDeletingService;
private final OMPerformanceMetrics metrics;

private BackgroundService openKeyCleanupService;
Expand Down Expand Up @@ -662,6 +664,53 @@ public PendingKeysDeletion getPendingDeletionKeys(final int count)
.getPendingDeletionKeys(count, ozoneManager.getOmSnapshotManager());
}

private <V, R> List<Table.KeyValue<String, R>> getTableEntries(String startKey,
TableIterator<String, ? extends Table.KeyValue<String, V>> tableIterator,
Function<V, R> valueFunction, int count) throws IOException {
List<Table.KeyValue<String, R>> entries = new ArrayList<>();
/* Seeking to the start key if it not null. The next key picked up would be ensured to start with the bucket
swamirishi marked this conversation as resolved.
Show resolved Hide resolved
prefix, {@link org.apache.hadoop.hdds.utils.db.Table#iterator(bucketPrefix)} would ensure this.
*/
if (startKey != null) {
swamirishi marked this conversation as resolved.
Show resolved Hide resolved
tableIterator.seek(startKey);
}
int currentCount = 0;
while (tableIterator.hasNext() && currentCount < count) {
Table.KeyValue<String, V> kv = tableIterator.next();
if (kv != null) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need this null check? tableIterator.hasNext() should do that check?

Copy link
Contributor Author

@swamirishi swamirishi Sep 18, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No harm in having one.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, there is no harm but it isn't how an iterator is supposed to be used unless null is allowed value in the collection.

entries.add(Table.newKeyValue(kv.getKey(), valueFunction.apply(kv.getValue())));
currentCount++;
}
}
return entries;
}


swamirishi marked this conversation as resolved.
Show resolved Hide resolved
@Override
public List<Table.KeyValue<String, String>> getRenamesKeyEntries(
String volume, String bucket, String startKey, int count) throws IOException {
swamirishi marked this conversation as resolved.
Show resolved Hide resolved
// Bucket prefix would be empty if volume is empty i.e. either null or "".
Optional<String> bucketPrefix = Optional.ofNullable(volume).map(vol -> vol.isEmpty() ? null : vol)
hemantk-12 marked this conversation as resolved.
Show resolved Hide resolved
.map(vol -> metadataManager.getBucketKeyPrefix(vol, bucket));
try (TableIterator<String, ? extends Table.KeyValue<String, String>>
renamedKeyIter = metadataManager.getSnapshotRenamedTable().iterator(bucketPrefix.orElse(""))) {
return getTableEntries(startKey, renamedKeyIter, Function.identity(), count);
}
}

@Override
public List<Table.KeyValue<String, List<OmKeyInfo>>> getDeletedKeyEntries(
String volume, String bucket, String startKey, int count) throws IOException {
// Bucket prefix would be empty if volume is empty i.e. either null or "".
Optional<String> bucketPrefix = Optional.ofNullable(volume).map(vol -> vol.isEmpty() ? null : vol)
.map(vol -> metadataManager.getBucketKeyPrefix(vol, bucket));
hemantk-12 marked this conversation as resolved.
Show resolved Hide resolved
List<Table.KeyValue<String, List<OmKeyInfo>>> deletedKeyEntries = new ArrayList<>(count);
swamirishi marked this conversation as resolved.
Show resolved Hide resolved
try (TableIterator<String, ? extends Table.KeyValue<String, RepeatedOmKeyInfo>>
delKeyIter = metadataManager.getDeletedTable().iterator(bucketPrefix.orElse(""))) {
return getTableEntries(startKey, delKeyIter, RepeatedOmKeyInfo::cloneOmKeyInfoList, count);
}
}

@Override
public ExpiredOpenKeys getExpiredOpenKeys(Duration expireThreshold,
int count, BucketLayout bucketLayout, Duration leaseThreshold) throws IOException {
Expand All @@ -688,7 +737,7 @@ public KeyDeletingService getDeletingService() {
}

@Override
public BackgroundService getDirDeletingService() {
public DirectoryDeletingService getDirDeletingService() {
return dirDeletingService;
}

Expand Down Expand Up @@ -1976,6 +2025,20 @@ public Table.KeyValue<String, OmKeyInfo> getPendingDeletionDir()
return null;
}

@Override
public TableIterator<String, ? extends Table.KeyValue<String, OmKeyInfo>> getDeletedDirEntries(
String volume, String bucket) throws IOException {

// Either both volume & bucket should be null or none of them should be null.
if (!StringUtils.isBlank(volume) && StringUtils.isBlank(bucket) ||
hemantk-12 marked this conversation as resolved.
Show resolved Hide resolved
StringUtils.isBlank(volume) && !StringUtils.isBlank(bucket)) {
throw new IOException("One of volume : " + volume + ", bucket: " + bucket + " is empty. Either both should be " +
"empty or none of the arguments should be empty");
}
return StringUtils.isBlank(volume) ? metadataManager.getDeletedDirTable().iterator() :
swamirishi marked this conversation as resolved.
Show resolved Hide resolved
metadataManager.getDeletedDirTable().iterator(metadataManager.getBucketKeyPrefixFSO(volume, bucket));
}

@Override
public List<OmKeyInfo> getPendingDeletionSubDirs(long volumeId, long bucketId,
OmKeyInfo parentInfo, long numEntries) throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -824,7 +824,7 @@ public String getUserKey(String user) {
/**
* Given a volume and bucket, return the corresponding DB key.
*
* @param volume - User name
* @param volume - Volume name
* @param bucket - Bucket name
*/
@Override
Expand All @@ -838,6 +838,28 @@ public String getBucketKey(String volume, String bucket) {
return builder.toString();
}

/**
* Given a volume and bucket, return the corresponding DB key prefix.
*
* @param volume - Volume name
* @param bucket - Bucket name
*/
swamirishi marked this conversation as resolved.
Show resolved Hide resolved
@Override
public String getBucketKeyPrefix(String volume, String bucket) {
return OzoneFSUtils.addTrailingSlashIfNeeded(getBucketKey(volume, bucket));
swamirishi marked this conversation as resolved.
Show resolved Hide resolved
}

/**
* Given a volume and bucket, return the corresponding DB key prefix.
swamirishi marked this conversation as resolved.
Show resolved Hide resolved
*
* @param volume - Volume name
* @param bucket - Bucket name
*/
@Override
public String getBucketKeyPrefixFSO(String volume, String bucket) throws IOException {
return getOzoneKeyFSO(volume, bucket, OM_KEY_PREFIX);
}

@Override
public String getOzoneKey(String volume, String bucket, String key) {
StringBuilder builder = new StringBuilder()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,10 @@
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.NoSuchElementException;
Expand Down Expand Up @@ -56,6 +58,7 @@ public class SnapshotChainManager {
private final ConcurrentMap<UUID, String> snapshotIdToTableKey;
private UUID latestGlobalSnapshotId;
private final boolean snapshotChainCorrupted;
private UUID oldestGlobalSnapshotId;

public SnapshotChainManager(OMMetadataManager metadataManager) {
globalSnapshotChain = Collections.synchronizedMap(new LinkedHashMap<>());
Expand Down Expand Up @@ -104,6 +107,8 @@ private void addSnapshotGlobal(UUID snapshotID, UUID prevGlobalID)
// On add snapshot, set previous snapshot entry nextSnapshotID =
// snapshotID
globalSnapshotChain.get(prevGlobalID).setNextSnapshotId(snapshotID);
} else {
oldestGlobalSnapshotId = snapshotID;
}

globalSnapshotChain.put(snapshotID,
Expand Down Expand Up @@ -171,7 +176,9 @@ private boolean deleteSnapshotGlobal(UUID snapshotID) throws IOException {
// for node removal
UUID next = globalSnapshotChain.get(snapshotID).getNextSnapshotId();
UUID prev = globalSnapshotChain.get(snapshotID).getPreviousSnapshotId();

if (snapshotID.equals(oldestGlobalSnapshotId)) {
oldestGlobalSnapshotId = next;
swamirishi marked this conversation as resolved.
Show resolved Hide resolved
}
if (prev != null && !globalSnapshotChain.containsKey(prev)) {
throw new IOException(String.format(
"Global snapshot chain corruption. " +
Expand Down Expand Up @@ -382,6 +389,41 @@ public UUID getLatestGlobalSnapshotId() throws IOException {
return latestGlobalSnapshotId;
}

/**
* Get oldest of global snapshot in snapshot chain.
*/
public UUID getOldestGlobalSnapshotId() throws IOException {
validateSnapshotChain();
return oldestGlobalSnapshotId;
}

public Iterator<UUID> iterator(final boolean reverse) throws IOException {
swamirishi marked this conversation as resolved.
Show resolved Hide resolved
validateSnapshotChain();
return new Iterator<UUID>() {
private UUID currentSnapshotId = reverse ? getLatestGlobalSnapshotId() : getOldestGlobalSnapshotId();
@Override
public boolean hasNext() {
try {
return reverse ? hasPreviousGlobalSnapshot(currentSnapshotId) : hasNextGlobalSnapshot(currentSnapshotId);
} catch (IOException e) {
return false;
}
}

@Override
public UUID next() {
try {
UUID prevSnapshotId = currentSnapshotId;
currentSnapshotId =
reverse ? previousGlobalSnapshot(currentSnapshotId) : nextGlobalSnapshot(currentSnapshotId);
return prevSnapshotId;
} catch (IOException e) {
throw new UncheckedIOException("Error while getting next snapshot for " + currentSnapshotId, e);
swamirishi marked this conversation as resolved.
Show resolved Hide resolved
}
}
};
swamirishi marked this conversation as resolved.
Show resolved Hide resolved
}

/**
* Get latest path snapshot in snapshot chain.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
import org.apache.hadoop.ozone.om.request.snapshot.OMSnapshotCreateRequest;
import org.apache.hadoop.ozone.om.request.snapshot.OMSnapshotDeleteRequest;
import org.apache.hadoop.ozone.om.request.snapshot.OMSnapshotMoveDeletedKeysRequest;
import org.apache.hadoop.ozone.om.request.snapshot.OMSnapshotMoveTableKeysRequest;
import org.apache.hadoop.ozone.om.request.snapshot.OMSnapshotPurgeRequest;
import org.apache.hadoop.ozone.om.request.snapshot.OMSnapshotRenameRequest;
import org.apache.hadoop.ozone.om.request.snapshot.OMSnapshotSetPropertyRequest;
Expand Down Expand Up @@ -232,6 +233,8 @@ public static OMClientRequest createClientRequest(OMRequest omRequest,
return new OMSnapshotRenameRequest(omRequest);
case SnapshotMoveDeletedKeys:
return new OMSnapshotMoveDeletedKeysRequest(omRequest);
case SnapshotMoveTableKeys:
return new OMSnapshotMoveTableKeysRequest(omRequest);
case SnapshotPurge:
return new OMSnapshotPurgeRequest(omRequest);
case SetSnapshotProperty:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIn
// Check the snapshot exists.
SnapshotUtils.getSnapshotInfo(ozoneManager, fromSnapshot.getTableKey());

nextSnapshot = SnapshotUtils.getNextActiveSnapshot(fromSnapshot, snapshotChainManager, ozoneManager);
nextSnapshot = SnapshotUtils.getNextSnapshot(ozoneManager, snapshotChainManager, fromSnapshot);

// Get next non-deleted snapshot.
List<SnapshotMoveKeyInfos> nextDBKeysList = moveDeletedKeysRequest.getNextDBKeysList();
Expand Down
Loading