Skip to content

Commit fc9946e

Browse files
committed
HADOOP-17244. S3A directory delete tombstones dir markers prematurely.
* Fix S3A test failures caused by DeleteOperation bugs added * Extend S3A tests * Fix FileContext base test so that raised IOEs are always rethrown regarding change apache#3, I think those IOEs should be thrown first, but as the existing code does the asserts in finally(), I'm leaving it alone Change-Id: Ia3159db7760424264d84fa2e38d2cbb452a9854a
1 parent 63a6f22 commit fc9946e

File tree

4 files changed

+46
-34
lines changed

4 files changed

+46
-34
lines changed

hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileContextMainOperationsBaseTest.java

Lines changed: 3 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
import java.io.File;
2222
import java.io.FileNotFoundException;
2323
import java.io.IOException;
24-
import java.util.Arrays;
2524
import java.util.EnumSet;
2625
import java.util.NoSuchElementException;
2726
import java.util.concurrent.CompletableFuture;
@@ -1323,39 +1322,17 @@ protected void rename(Path src, Path dst, boolean srcExists,
13231322
LOG.debug("Probing source and destination");
13241323
Assert.assertEquals("Source exists", srcExists, exists(fc, src));
13251324
Assert.assertEquals("Destination exists", dstExists, exists(fc, dst));
1326-
if (!dstExists) {
1327-
verifyNoListing(fc, dst);
1328-
}
13291325
} catch (AssertionError e) {
13301326
if (ioe != null && e.getCause() == null) {
13311327
e.initCause(ioe);
13321328
}
13331329
throw e;
1334-
13351330
}
1336-
}
1337-
1338-
/**
1339-
* List a path, verify that there are no direct child entries.
1340-
* @param path path to scan
1341-
*/
1342-
protected void verifyNoListing(FileContext fc, final Path path)
1343-
throws IOException {
1344-
try {
1345-
intercept(FileNotFoundException.class, () -> {
1346-
RemoteIterator<FileStatus> st = fc.listStatus(path);
1347-
if (st.hasNext()) {
1348-
return st.next().toString();
1349-
}
1350-
return "No exception raised";
1351-
});
1352-
} catch (IOException e) {
1353-
throw e;
1354-
} catch (Exception e) {
1355-
throw new AssertionError(e);
1356-
1331+
if (ioe != null) {
1332+
throw ioe;
13571333
}
13581334
}
1335+
13591336
private boolean containsPath(Path path, FileStatus[] filteredPaths)
13601337
throws IOException {
13611338
for(int i = 0; i < filteredPaths.length; i ++) {

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/DeleteOperation.java

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -366,7 +366,7 @@ protected void deleteDirectoryTree(final Path path,
366366
S3AFileStatus next = objects.next();
367367
LOG.debug("Found Unlisted entry {}", next);
368368
queueForDeletion(deletionKey(next), null,
369-
objects.next().isDirectory());
369+
next.isDirectory());
370370
}
371371
if (extraFilesDeleted > 0) {
372372
LOG.debug("Raw S3 Scan found {} extra file(s) to delete",
@@ -548,7 +548,9 @@ private void asyncDeleteAction(
548548
undeletedObjects,
549549
state,
550550
!auditDeletedKeys));
551-
deletedObjects.addAll(result.getDeletedObjects());
551+
if (result != null) {
552+
deletedObjects.addAll(result.getDeletedObjects());
553+
}
552554
// now the dirs
553555
List<DeleteObjectsRequest.KeyVersion> dirs = keyList.stream()
554556
.filter(e -> e.isDirMarker)
@@ -564,15 +566,17 @@ private void asyncDeleteAction(
564566
undeletedObjects,
565567
state,
566568
!auditDeletedKeys));
567-
deletedObjects.addAll(result.getDeletedObjects());
569+
if (result != null) {
570+
deletedObjects.addAll(result.getDeletedObjects());
571+
}
568572
}
569573
if (!pathList.isEmpty()) {
570574
// delete file paths only. This stops tombstones
571575
// being added until the final directory cleanup
572576
// (HADOOP-17244)
573577
metadataStore.deletePaths(pathList, state);
574578
}
575-
if (auditDeletedKeys && result != null) {
579+
if (auditDeletedKeys) {
576580
// audit the deleted keys
577581
if (deletedObjects.size() != keyList.size()) {
578582
// size mismatch

hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardEmptyDirs.java

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import org.assertj.core.api.Assertions;
3333
import org.junit.Test;
3434

35+
import org.apache.hadoop.fs.FileStatus;
3536
import org.apache.hadoop.fs.Path;
3637
import org.apache.hadoop.fs.s3a.impl.StatusProbeEnum;
3738
import org.apache.hadoop.fs.s3a.impl.StoreContext;
@@ -286,4 +287,27 @@ public int read() {
286287
s3.putObject(putObjectRequest);
287288
}
288289

290+
@Test
291+
public void testDirMarkerDelete() throws Throwable {
292+
S3AFileSystem fs = getFileSystem();
293+
assumeFilesystemHasMetadatastore(getFileSystem());
294+
Path baseDir = methodPath();
295+
Path subFile = new Path(baseDir, "subdir/file.txt");
296+
// adds the s3guard entry
297+
fs.mkdirs(baseDir);
298+
touch(fs, subFile);
299+
// PUT a marker
300+
createEmptyObject(fs, fs.pathToKey(baseDir) + "/");
301+
fs.delete(baseDir, true);
302+
assertPathDoesNotExist("Should have been deleted", baseDir);
303+
304+
// now create the dir again
305+
fs.mkdirs(baseDir);
306+
FileStatus fileStatus = fs.getFileStatus(baseDir);
307+
Assertions.assertThat(fileStatus)
308+
.matches(FileStatus::isDirectory, "Not a directory");
309+
Assertions.assertThat(fs.listStatus(baseDir))
310+
.describedAs("listing of %s", baseDir)
311+
.isEmpty();
312+
}
289313
}

hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestS3ADeleteCost.java

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import java.util.Arrays;
2424
import java.util.Collection;
2525

26+
import org.assertj.core.api.Assertions;
2627
import org.junit.Test;
2728
import org.junit.runner.RunWith;
2829
import org.junit.runners.Parameterized;
@@ -195,14 +196,17 @@ public void testDirMarkersSubdir() throws Throwable {
195196
withWhenDeleting(FAKE_DIRECTORIES_DELETED,
196197
directoriesInPath(subDir) - 1));
197198

199+
int dirDeleteRequests = 1;
200+
int fileDeleteRequests = 0;
201+
int totalDeleteRequests = dirDeleteRequests + fileDeleteRequests;
198202

199203
// now delete the deep tree.
200204
verifyMetrics(() ->
201205
fs.delete(parent, true),
202206

203207
// keeping: the parent dir marker needs deletion alongside
204208
// the subdir one.
205-
with(OBJECT_DELETE_REQUESTS, DELETE_MARKER_REQUEST),
209+
with(OBJECT_DELETE_REQUESTS, totalDeleteRequests),
206210
withWhenKeeping(OBJECT_DELETE_OBJECTS, dirsCreated),
207211

208212
// deleting: only the marker at the bottom needs deleting
@@ -211,9 +215,12 @@ public void testDirMarkersSubdir() throws Throwable {
211215
// followup with list calls to make sure all is clear.
212216
verifyNoListing(parent);
213217
verifyNoListing(subDir);
214-
verifyNoListing(new Path(parent, "1"));
215-
verifyNoListing(new Path(parent, "1/2/"));
216-
verifyNoListing(new Path(parent, "1/2/3"));
218+
// now reinstate the directory, which in HADOOP-17244 hitting problems
219+
fs.mkdirs(parent);
220+
FileStatus[] children = fs.listStatus(parent);
221+
Assertions.assertThat(children)
222+
.describedAs("Children of %s", parent)
223+
.isEmpty();
217224
}
218225

219226
/**

0 commit comments

Comments
 (0)