diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardEmptyDirs.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardEmptyDirs.java index fb6e3701d3227..178412c1eacdd 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardEmptyDirs.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardEmptyDirs.java @@ -18,13 +18,28 @@ package org.apache.hadoop.fs.s3a; +import java.io.IOException; +import java.io.InputStream; +import java.util.stream.Stream; + +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.model.ListObjectsRequest; +import com.amazonaws.services.s3.model.ObjectListing; +import com.amazonaws.services.s3.model.PutObjectRequest; +import org.assertj.core.api.Assertions; +import org.junit.Test; + import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.impl.StoreContext; +import org.apache.hadoop.fs.s3a.s3guard.DDBPathMetadata; +import org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore; import org.apache.hadoop.fs.s3a.s3guard.MetadataStore; import org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore; -import org.junit.Assume; -import org.junit.Test; import static org.apache.hadoop.fs.contract.ContractTestUtils.touch; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.assume; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.assumeFilesystemHasMetadatastore; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.getStatusWithEmptyDirFlag; /** * Test logic around whether or not a directory is empty, with S3Guard enabled. @@ -37,10 +52,15 @@ */ public class ITestS3GuardEmptyDirs extends AbstractS3ATestBase { + @Override + public void setup() throws Exception { + super.setup(); + assumeFilesystemHasMetadatastore(getFileSystem()); + } + @Test public void testEmptyDirs() throws Exception { S3AFileSystem fs = getFileSystem(); - Assume.assumeTrue(fs.hasMetadataStore()); MetadataStore configuredMs = fs.getMetadataStore(); Path existingDir = path("existing-dir"); Path existingFile = path("existing-dir/existing-file"); @@ -82,4 +102,119 @@ public void testEmptyDirs() throws Exception { configuredMs.forgetMetadata(existingDir); } } + + /** + * Test tombstones don't get in the way of a listing of the + * root dir. + * This test needs to create a path which appears first in the listing, + * and an entry which can come later. To allow the test to proceed + * while other tests are running, the filename "0000" is used for that + * deleted entry. + */ + @Test + public void testRootTombstones() throws Throwable { + S3AFileSystem fs = getFileSystem(); + + // Create the first and last files. + Path root = fs.makeQualified(new Path("/")); + // use something ahead of all the ASCII alphabet characters so + // even during parallel test runs, this test is expected to work. + String first = "0000"; + Path firstPath = new Path(root, first); + + // this path is near the bottom of the ASCII string space. + // This isn't so critical. + String last = "zzzz"; + Path lastPath = new Path(root, last); + touch(fs, firstPath); + touch(fs, lastPath); + // Delete first entry (+assert tombstone) + assertDeleted(firstPath, false); + DynamoDBMetadataStore ddbMs = getRequiredDDBMetastore(fs); + DDBPathMetadata firstMD = ddbMs.get(firstPath); + assertNotNull("No MD for " + firstPath, firstMD); + assertTrue("Not a tombstone " + firstMD, + firstMD.isDeleted()); + // PUT child to store + Path child = new Path(firstPath, "child"); + StoreContext ctx = fs.createStoreContext(); + String childKey = ctx.pathToKey(child); + String rootKey = ctx.pathToKey(root); + AmazonS3 s3 = fs.getAmazonS3ClientForTesting("LIST"); + String bucket = ctx.getBucket(); + try { + createEmptyObject(fs, childKey); + + // Do a list + ListObjectsRequest listReq = new ListObjectsRequest( + bucket, rootKey, "", "/", 10); + ObjectListing listing = s3.listObjects(listReq); + + // the listing has the first path as a prefix, because of the child + Assertions.assertThat(listing.getCommonPrefixes()) + .describedAs("The prefixes of a LIST of %s", root) + .contains(first + "/"); + + // and the last file is one of the files + Stream files = listing.getObjectSummaries() + .stream() + .map(s -> s.getKey()); + Assertions.assertThat(files) + .describedAs("The files of a LIST of %s", root) + .contains(last); + + // verify absolutely that the last file exists + assertPathExists("last file", lastPath); + + // do a getFile status with empty dir flag + S3AFileStatus rootStatus = getStatusWithEmptyDirFlag(fs, root); + assertNonEmptyDir(rootStatus); + } finally { + // try to recover from the defective state. + s3.deleteObject(bucket, childKey); + fs.delete(lastPath, true); + ddbMs.forgetMetadata(firstPath); + } + } + + protected void assertNonEmptyDir(final S3AFileStatus status) { + assertEquals("Should not be empty dir: " + status, Tristate.FALSE, + status.isEmptyDirectory()); + } + + /** + * Get the DynamoDB metastore; assume false if it is of a different + * type. + * @return extracted and cast metadata store. + */ + @SuppressWarnings("ConstantConditions") + private DynamoDBMetadataStore getRequiredDDBMetastore(S3AFileSystem fs) { + MetadataStore ms = fs.getMetadataStore(); + assume("Not a DynamoDBMetadataStore: " + ms, + ms instanceof DynamoDBMetadataStore); + return (DynamoDBMetadataStore) ms; + } + + /** + * From {@code S3AFileSystem.createEmptyObject()}. + * @param fs filesystem + * @param key key + * @throws IOException failure + */ + private void createEmptyObject(S3AFileSystem fs, String key) + throws IOException { + final InputStream im = new InputStream() { + @Override + public int read() throws IOException { + return -1; + } + }; + + PutObjectRequest putObjectRequest = fs.newPutObjectRequest(key, + fs.newObjectMetadata(0L), + im); + AmazonS3 s3 = fs.getAmazonS3ClientForTesting("PUT"); + s3.putObject(putObjectRequest); + } + } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java index a789e83d1dda9..c65f6bbdc7e3c 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java @@ -505,6 +505,16 @@ public static boolean isMetadataStoreAuthoritative(Configuration conf) { Constants.DEFAULT_METADATASTORE_AUTHORITATIVE); } + /** + * Require a filesystem to have a metadata store; skip test + * if not. + * @param fs filesystem to check + */ + public static void assumeFilesystemHasMetadatastore(S3AFileSystem fs) { + assume("Filesystem does not have a metastore", + fs.hasMetadataStore()); + } + /** * Reset all metrics in a list. * @param metrics metrics to reset @@ -818,6 +828,22 @@ public static T terminateService(final T service) { return null; } + /** + * Get a file status from S3A with the {@code needEmptyDirectoryFlag} + * state probed. + * This accesses a package-private method in the + * S3A filesystem. + * @param fs filesystem + * @param dir directory + * @return a status + * @throws IOException + */ + public static S3AFileStatus getStatusWithEmptyDirFlag( + final S3AFileSystem fs, + final Path dir) throws IOException { + return fs.innerGetFileStatus(dir, true); + } + /** * Helper class to do diffs of metrics. */