From 19f9afb42a1becfc6fefcf725374db08d42c3bc7 Mon Sep 17 00:00:00 2001 From: Sumit Agrawal Date: Wed, 24 Jul 2024 13:04:49 +0530 Subject: [PATCH] HDDS-11215. Quota count can go wrong when double buffer flush takes time. (#6978) --- .../TestDirectoryDeletingServiceWithFSO.java | 80 +++++++++++++++++++ .../hadoop/ozone/om/KeyManagerImpl.java | 6 ++ .../om/ratis/OzoneManagerDoubleBuffer.java | 4 +- .../key/OMDirectoriesPurgeRequestWithFSO.java | 10 +++ 4 files changed, 98 insertions(+), 2 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestDirectoryDeletingServiceWithFSO.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestDirectoryDeletingServiceWithFSO.java index 382f4b72034..0abfb133654 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestDirectoryDeletingServiceWithFSO.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestDirectoryDeletingServiceWithFSO.java @@ -18,6 +18,7 @@ package org.apache.hadoop.fs.ozone; +import java.util.concurrent.CompletableFuture; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; @@ -33,8 +34,13 @@ import org.apache.hadoop.ozone.TestDataUtil; import org.apache.hadoop.ozone.client.OzoneBucket; import org.apache.hadoop.ozone.client.OzoneClient; +import org.apache.hadoop.ozone.client.OzoneVolume; import org.apache.hadoop.ozone.om.OMMetadataManager; +import org.apache.hadoop.ozone.om.helpers.OzoneFileStatus; import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; +import org.apache.hadoop.ozone.om.ratis.OzoneManagerDoubleBuffer; +import org.apache.hadoop.ozone.om.ratis.OzoneManagerStateMachine; +import org.apache.hadoop.ozone.om.request.file.OMFileRequest; import org.apache.hadoop.ozone.om.service.DirectoryDeletingService; import org.apache.hadoop.ozone.om.service.KeyDeletingService; import org.apache.hadoop.ozone.om.OMConfigKeys; @@ -299,6 +305,80 @@ public void testDeleteWithMultiLevels() throws Exception { assertThat(dirDeletingService.getRunCount().get()).isGreaterThan(1); } + @Test + public void testDeleteWithMultiLevelsBlockDoubleBuffer() throws Exception { + Path root = new Path("/rootDirdd"); + Path appRoot = new Path(root, "appRoot"); + + for (int i = 1; i <= 3; i++) { + Path parent = new Path(appRoot, "parentDir" + i); + Path child = new Path(parent, "childFile"); + ContractTestUtils.touch(fs, child); + } + + OMMetadataManager metadataManager = cluster.getOzoneManager().getMetadataManager(); + Table deletedDirTable = metadataManager.getDeletedDirTable(); + Table keyTable = metadataManager.getKeyTable(getFSOBucketLayout()); + Table dirTable = metadataManager.getDirectoryTable(); + + DirectoryDeletingService dirDeletingService = (DirectoryDeletingService) cluster.getOzoneManager().getKeyManager() + .getDirDeletingService(); + + // Before delete + assertTableRowCount(deletedDirTable, 0); + assertTableRowCount(dirTable, 5); + assertTableRowCount(keyTable, 3); + + // stop daemon to simulate blocked double buffer + OzoneManagerStateMachine omStateMachine = cluster.getOzoneManager().getOmRatisServer().getOmStateMachine(); + OzoneManagerDoubleBuffer omDoubleBuffer = omStateMachine.getOzoneManagerDoubleBuffer(); + omDoubleBuffer.awaitFlush(); + omDoubleBuffer.stopDaemon(); + + OzoneVolume volume = client.getObjectStore().getVolume(volumeName); + OzoneBucket bucket = volume.getBucket(bucketName); long volumeId = metadataManager.getVolumeId(volumeName); + + // manually delete dir and add to deleted table. namespace count occupied "1" as manual deletion do not reduce + long bucketId = metadataManager.getBucketId(volumeName, bucketName); + OzoneFileStatus keyStatus = OMFileRequest.getOMKeyInfoIfExists( + metadataManager, volumeName, bucketName, "rootDirdd", 0, + cluster.getOzoneManager().getDefaultReplicationConfig()); + String ozoneDbKey = metadataManager.getOzonePathKey(volumeId, bucketId, keyStatus.getKeyInfo().getParentObjectID(), + keyStatus.getKeyInfo().getFileName()); + deletedDirTable.put(ozoneDbKey, keyStatus.getKeyInfo()); + dirTable.delete(ozoneDbKey); + assertTableRowCount(deletedDirTable, 1); + assertTableRowCount(dirTable, 4); + + System.out.println("starting count: " + bucket.getUsedNamespace()); + + // wait for running 2 more iteration + long currentCount = dirDeletingService.getRunCount().get(); + GenericTestUtils.waitFor(() -> dirDeletingService.getRunCount().get() > currentCount + 2, 1000, + 10000); + + // verify bucket used namespace is not going -ve + assertTrue(volume.getBucket(bucketName).getUsedNamespace() >= 0); + + // re-init double buffer in state machine to resume further processing + omStateMachine.pause(); + omStateMachine.unpause(omStateMachine.getLastAppliedTermIndex().getIndex(), + omStateMachine.getLatestSnapshot().getIndex()); + // flush previous pending transaction manually + omDoubleBuffer.resume(); + CompletableFuture.supplyAsync(() -> { + omDoubleBuffer.flushTransactions(); + return null; + }); + omDoubleBuffer.awaitFlush(); + omDoubleBuffer.stop(); + // verify deletion progress completion + assertTableRowCount(deletedDirTable, 0); + assertTableRowCount(keyTable, 0); + assertTrue(volume.getBucket(bucketName).getUsedNamespace() >= 0); + assertTrue(volume.getBucket(bucketName).getUsedBytes() == 0); + } + @Test public void testDeleteFilesAndSubFiles() throws Exception { diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java index a0967c781ee..8e9c7eb8b4d 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java @@ -2031,6 +2031,9 @@ private List gatherSubDirsWithIterator(OmKeyInfo parentInfo, parentInfo.getObjectID())) { break; } + if (!metadataManager.getDirectoryTable().isExist(entry.getKey())) { + continue; + } String dirName = OMFileRequest.getAbsolutePath(parentInfo.getKeyName(), dirInfo.getName()); OmKeyInfo omKeyInfo = OMFileRequest.getOmKeyInfo( @@ -2065,6 +2068,9 @@ public List getPendingDeletionSubFiles(long volumeId, parentInfo.getObjectID())) { break; } + if (!metadataManager.getFileTable().isExist(entry.getKey())) { + continue; + } fileInfo.setFileName(fileInfo.getKeyName()); String fullKeyPath = OMFileRequest.getAbsolutePath( parentInfo.getKeyName(), fileInfo.getKeyName()); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerDoubleBuffer.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerDoubleBuffer.java index 857005bd929..a6fcc40dda1 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerDoubleBuffer.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerDoubleBuffer.java @@ -290,7 +290,7 @@ private void addToBatchTransactionInfoWithTrace(String parentName, * and commit to DB. */ @VisibleForTesting - void flushTransactions() { + public void flushTransactions() { while (isRunning.get() && canFlush()) { flushCurrentBuffer(); } @@ -617,7 +617,7 @@ int getReadyBufferSize() { } @VisibleForTesting - void resume() { + public void resume() { isRunning.set(true); } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMDirectoriesPurgeRequestWithFSO.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMDirectoriesPurgeRequestWithFSO.java index 505b6287307..74f7bb54c16 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMDirectoriesPurgeRequestWithFSO.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMDirectoriesPurgeRequestWithFSO.java @@ -24,6 +24,8 @@ import java.util.Map; import java.util.Set; import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.hdds.utils.db.cache.CacheKey; +import org.apache.hadoop.hdds.utils.db.cache.CacheValue; import org.apache.hadoop.ozone.om.OMMetrics; import org.apache.ratis.server.protocol.TermIndex; import org.apache.hadoop.ozone.om.OMMetadataManager; @@ -95,6 +97,10 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIn if (null != omBucketInfo && omBucketInfo.getObjectID() == path.getBucketId()) { omBucketInfo.incrUsedNamespace(-1L); + String ozoneDbKey = omMetadataManager.getOzonePathKey(path.getVolumeId(), + path.getBucketId(), keyInfo.getParentObjectID(), keyInfo.getFileName()); + omMetadataManager.getDirectoryTable().addCacheEntry(new CacheKey<>(ozoneDbKey), + CacheValue.get(termIndex.getIndex())); volBucketInfoMap.putIfAbsent(volBucketPair, omBucketInfo); } } @@ -119,6 +125,10 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIn && omBucketInfo.getObjectID() == path.getBucketId()) { omBucketInfo.incrUsedBytes(-sumBlockLengths(keyInfo)); omBucketInfo.incrUsedNamespace(-1L); + String ozoneDbKey = omMetadataManager.getOzonePathKey(path.getVolumeId(), + path.getBucketId(), keyInfo.getParentObjectID(), keyInfo.getFileName()); + omMetadataManager.getFileTable().addCacheEntry(new CacheKey<>(ozoneDbKey), + CacheValue.get(termIndex.getIndex())); volBucketInfoMap.putIfAbsent(volBucketPair, omBucketInfo); } }