Skip to content

Commit

Permalink
HADOOP-18508. S3A: Support parallel integration test runs on same buc…
Browse files Browse the repository at this point in the history
…ket (#5081)

It is now possible to provide a job ID in the maven "job.id" property
hadoop-aws test runs to isolate paths under a the test bucket
under which all tests will be executed.

This will allow independent builds *in different source trees*
to test against the same bucket in parallel, and is designed for
CI testing.

Example:

mvn verify -Dparallel-tests -Droot.tests.enabled=false -Djob.id=1
mvn verify -Droot.tests.enabled=false -Djob.id=2

- Root tests must be be disabled to stop them cleaning up
  the test paths of other test runs.
- Do still regularly run the root tests just to force cleanup
  of the output of any interrupted test suites.

Contributed by Steve Loughran
  • Loading branch information
steveloughran committed Jun 14, 2024
1 parent acec539 commit 3173b72
Show file tree
Hide file tree
Showing 18 changed files with 197 additions and 170 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,9 @@ public void setUp() throws Exception {

@After
public void tearDown() throws Exception {
fSys.delete(new Path(getAbsoluteTestRootPath(fSys), new Path("test")), true);
if (fSys != null) {
fSys.delete(new Path(getAbsoluteTestRootPath(fSys), new Path("test")), true);
}
}


Expand Down Expand Up @@ -192,7 +194,7 @@ public void testWorkingDirectory() throws Exception {

@Test
public void testWDAbsolute() throws IOException {
Path absoluteDir = new Path(fSys.getUri() + "/test/existingDir");
Path absoluteDir = getTestRootPath(fSys, "test/existingDir");
fSys.mkdirs(absoluteDir);
fSys.setWorkingDirectory(absoluteDir);
Assert.assertEquals(absoluteDir, fSys.getWorkingDirectory());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,12 @@ public abstract class FileContextMainOperationsBaseTest {
protected final FileContextTestHelper fileContextTestHelper =
createFileContextHelper();

/**
* Create the test helper.
* Important: this is invoked during the construction of the base class,
* so is very brittle.
* @return a test helper.
*/
protected FileContextTestHelper createFileContextHelper() {
return new FileContextTestHelper();
}
Expand All @@ -107,7 +113,7 @@ public boolean accept(Path file) {

private static final byte[] data = getFileData(numBlocks,
getDefaultBlockSize());

@Before
public void setUp() throws Exception {
File testBuildData = GenericTestUtils.getRandomizedTestDir();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,23 +21,13 @@
import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;

public class TestFSMainOperationsLocalFileSystem extends FSMainOperationsBaseTest {

@Override
protected FileSystem createFileSystem() throws IOException {
return FileSystem.getLocal(new Configuration());
}

@Override
@Before
public void setUp() throws Exception {
super.setUp();
}

static Path wd = null;
@Override
Expand All @@ -46,19 +36,5 @@ protected Path getDefaultWorkingDirectory() throws IOException {
wd = FileSystem.getLocal(new Configuration()).getWorkingDirectory();
return wd;
}

@Override
@After
public void tearDown() throws Exception {
super.tearDown();
}

@Test
@Override
public void testWDAbsolute() throws IOException {
Path absoluteDir = getTestRootPath(fSys, "test/existingDir");
fSys.mkdirs(absoluteDir);
fSys.setWorkingDirectory(absoluteDir);
Assert.assertEquals(absoluteDir, fSys.getWorkingDirectory());
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,5 @@ public void tearDown() throws Exception {
super.tearDown();
ViewFileSystemTestSetup.tearDown(this, fcTarget);
}

@Test
@Override
public void testWDAbsolute() throws IOException {
Path absoluteDir = getTestRootPath(fSys, "test/existingDir");
fSys.mkdirs(absoluteDir);
fSys.setWorkingDirectory(absoluteDir);
Assert.assertEquals(absoluteDir, fSys.getWorkingDirectory());

}
}
27 changes: 15 additions & 12 deletions hadoop-tools/hadoop-aws/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,11 @@

<!-- Is prefetch enabled? -->
<fs.s3a.prefetch.enabled>unset</fs.s3a.prefetch.enabled>
<!-- Job ID; allows for parallel jobs on same bucket -->
<!-- job.id is used to build the path for tests; default is 00.-->
<job.id>00</job.id>
<!-- are root tests enabled. Set to false when running parallel jobs on same bucket -->
<root.tests.enabled>unset</root.tests.enabled>
</properties>

<profiles>
Expand Down Expand Up @@ -115,14 +120,8 @@
<test.build.data>${test.build.data}/${surefire.forkNumber}</test.build.data>
<test.build.dir>${test.build.dir}/${surefire.forkNumber}</test.build.dir>
<hadoop.tmp.dir>${hadoop.tmp.dir}/${surefire.forkNumber}</hadoop.tmp.dir>
<test.unique.fork.id>job-${job.id}-fork-000${surefire.forkNumber}</test.unique.fork.id>

<!-- Due to a Maven quirk, setting this to just -->
<!-- surefire.forkNumber won't do the parameter -->
<!-- substitution. Putting a prefix in front of it like -->
<!-- "fork-" makes it work. -->
<!-- Important: Those leading 0s are needed to guarantee that -->
<!-- trailing three chars are always numeric and unique -->
<test.unique.fork.id>fork-000${surefire.forkNumber}</test.unique.fork.id>
<!-- Propagate scale parameters -->
<fs.s3a.scale.test.enabled>${fs.s3a.scale.test.enabled}</fs.s3a.scale.test.enabled>
<fs.s3a.scale.test.huge.filesize>${fs.s3a.scale.test.huge.filesize}</fs.s3a.scale.test.huge.filesize>
Expand Down Expand Up @@ -163,7 +162,7 @@
<!-- surefire.forkNumber won't do the parameter -->
<!-- substitution. Putting a prefix in front of it like -->
<!-- "fork-" makes it work. -->
<test.unique.fork.id>fork-000${surefire.forkNumber}</test.unique.fork.id>
<test.unique.fork.id>job-${job.id}-fork-000${surefire.forkNumber}</test.unique.fork.id>
<!-- Propagate scale parameters -->
<fs.s3a.scale.test.enabled>${fs.s3a.scale.test.enabled}</fs.s3a.scale.test.enabled>
<fs.s3a.scale.test.huge.filesize>${fs.s3a.scale.test.huge.filesize}</fs.s3a.scale.test.huge.filesize>
Expand All @@ -174,14 +173,14 @@
<test.default.timeout>${test.integration.timeout}</test.default.timeout>
<!-- Prefetch -->
<fs.s3a.prefetch.enabled>${fs.s3a.prefetch.enabled}</fs.s3a.prefetch.enabled>
<!-- are root tests enabled. Set to false when running parallel jobs on same bucket -->
<fs.s3a.root.tests.enabled>${root.tests.enabled}</fs.s3a.root.tests.enabled>

</systemPropertyVariables>
<!-- Some tests cannot run in parallel. Tests that cover -->
<!-- access to the root directory must run in isolation -->
<!-- from anything else that could modify the bucket. -->
<!-- S3A tests that cover multi-part upload must run in -->
<!-- isolation, because the file system is configured to -->
<!-- purge existing multi-part upload data on -->
<!-- initialization. MiniYARNCluster has not yet been -->
<!-- MiniYARNCluster has not yet been -->
<!-- changed to handle parallel test execution gracefully. -->
<!-- Exclude all of these tests from parallel execution, -->
<!-- and instead run them sequentially in a separate -->
Expand Down Expand Up @@ -228,6 +227,9 @@
<fs.s3a.directory.marker.audit>${fs.s3a.directory.marker.audit}</fs.s3a.directory.marker.audit>
<!-- Prefetch -->
<fs.s3a.prefetch.enabled>${fs.s3a.prefetch.enabled}</fs.s3a.prefetch.enabled>
<!-- are root tests enabled. Set to false when running parallel jobs on same bucket -->
<fs.s3a.root.tests.enabled>${root.tests.enabled}</fs.s3a.root.tests.enabled>
<test.unique.fork.id>job-${job.id}</test.unique.fork.id>
</systemPropertyVariables>
<!-- Do a sequential run for tests that cannot handle -->
<!-- parallel execution. -->
Expand Down Expand Up @@ -289,6 +291,7 @@
<fs.s3a.directory.marker.audit>${fs.s3a.directory.marker.audit}</fs.s3a.directory.marker.audit>
<!-- Prefetch -->
<fs.s3a.prefetch.enabled>${fs.s3a.prefetch.enabled}</fs.s3a.prefetch.enabled>
<test.unique.fork.id>job-${job.id}</test.unique.fork.id>
</systemPropertyVariables>
<forkedProcessTimeoutInSeconds>${fs.s3a.scale.test.timeout}</forkedProcessTimeoutInSeconds>
</configuration>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ is a more specific lie and harder to make. And, if you get caught out: you
lose all credibility with the project.

You don't need to test from a VM within the AWS infrastructure; with the
`-Dparallel=tests` option the non-scale tests complete in under ten minutes.
`-Dparallel-tests` option the non-scale tests complete in under twenty minutes.
Because the tests clean up after themselves, they are also designed to be low
cost. It's neither hard nor expensive to run the tests; if you can't,
there's no guarantee your patch works. The reviewers have enough to do, and
Expand Down Expand Up @@ -539,12 +539,51 @@ Otherwise, set a large timeout in `fs.s3a.scale.test.timeout`
The tests are executed in an order to only clean up created files after
the end of all the tests. If the tests are interrupted, the test data will remain.

## <a name="CI"/> Testing through continuous integration

### Parallel CI builds.
For CI testing of the module, including the integration tests,
it is generally necessary to support testing multiple PRs simultaneously.

To do this
1. A job ID must be supplied in the `job.id` property, so each job works on an isolated directory
tree. This should be a number or unique string, which will be used within a path element, so
must only contain characters valid in an S3/hadoop path element.
2. Root directory tests need to be disabled by setting `fs.s3a.root.tests.enabled` to
`false`, either in the command line to maven or in the XML configurations.

```
mvn verify -T 1C -Dparallel-tests -DtestsThreadCount=14 -Dscale -Dfs.s3a.root.tests.enabled=false -Djob.id=001
```

This parallel execution feature is only for isolated builds sharing a single S3 bucket; it does
not support parallel builds and tests from the same local source tree.

Without the root tests being executed, set up a scheduled job to purge the test bucket of all
data on a regular basis, to keep costs down.
The easiest way to do this is to have a bucket lifecycle rule for the bucket to delete all files more than a few days old,
alongside one to abort all pending uploads more than 24h old.


### Securing CI builds

It's clearly unsafe to have CI infrastructure testing PRs submitted to apache github account
with AWS credentials -which is why it isn't done by the Yetus-initiated builds.

Anyone doing this privately should:
* Review incoming patches before triggering the tests.
* Have a dedicated IAM role with restricted access to the test bucket, any KMS keys used, and the
external bucket containing the CSV test file.
* Have a build process which generates short-lived session credentials for this role.
* Run the tests in an EC2 VM/container which collects the restricted IAM credentials
from the IAM instance/container credentials provider.

## <a name="load"></a> Load tests.

Some are designed to overload AWS services with more
Some tests are designed to overload AWS services with more
requests per second than an AWS account is permitted.

The operation of these test maybe observable to other users of the same
The operation of these tests may be observable to other users of the same
account -especially if they are working in the AWS region to which the
tests are targeted.

Expand All @@ -556,6 +595,10 @@ They do not run automatically: they must be explicitly run from the command line

Look in the source for these and reads the Javadocs before executing.

Note: one fear here was that asking for two many session/role credentials in a short period
of time would actually lock an account out of a region. It doesn't: it simply triggers
throttling of STS requests.

## <a name="alternate_s3"></a> Testing against non-AWS S3 Stores.

The S3A filesystem is designed to work with S3 stores which implement
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
import org.apache.hadoop.fs.contract.AbstractFSContract;
import org.apache.hadoop.fs.s3a.S3AFileSystem;

import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeSkipRootTests;

/**
* root dir operations against an S3 bucket.
*/
Expand All @@ -36,6 +38,12 @@ public class ITestS3AContractRootDir extends
private static final Logger LOG =
LoggerFactory.getLogger(ITestS3AContractRootDir.class);

@Override
public void setup() throws Exception {
super.setup();
maybeSkipRootTests(getFileSystem().getConf());
}

@Override
protected AbstractFSContract createContract(Configuration conf) {
return new S3AContract(conf);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -386,8 +386,10 @@ public void shouldBeAbleToSwitchOnS3PathStyleAccessViaConfigProperty()
s3Configuration.pathStyleAccessEnabled());
byte[] file = ContractTestUtils.toAsciiByteArray("test file");
ContractTestUtils.writeAndRead(fs,
new Path("/path/style/access/testFile"), file, file.length,
(int) conf.getLongBytes(Constants.FS_S3A_BLOCK_SIZE, file.length), false, true);
createTestPath(new Path("/path/style/access/testFile")),
file, file.length,
(int) conf.getLongBytes(Constants.FS_S3A_BLOCK_SIZE, file.length),
false, true);
} catch (final AWSRedirectException e) {
LOG.error("Caught exception: ", e);
// Catch/pass standard path style access behaviour when live bucket
Expand Down
Loading

0 comments on commit 3173b72

Please sign in to comment.