diff --git a/BUILDING.txt b/BUILDING.txt
index 191df097b219c..bf08f55a23ab8 100644
--- a/BUILDING.txt
+++ b/BUILDING.txt
@@ -146,7 +146,7 @@ Maven build goals:
* Run clover : mvn test -Pclover
* Run Rat : mvn apache-rat:check
* Build javadocs : mvn javadoc:javadoc
- * Build distribution : mvn package [-Pdist][-Pdocs][-Psrc][-Pnative][-Dtar][-Preleasedocs][-Pyarn-ui]
+ * Build distribution : mvn package [-Pdist][-Pdocs][-Psrc][-Pnative][-Dtar][-Preleasedocs][-Pyarn-ui][-Pawssdk]
* Change Hadoop version : mvn versions:set -DnewVersion=NEWVERSION
Build options:
@@ -159,6 +159,7 @@ Maven build goals:
* Use -Pyarn-ui to build YARN UI v2. (Requires Internet connectivity)
* Use -DskipShade to disable client jar shading to speed up build times (in
development environments only, not to build release artifacts)
+ * Use -Pawssdk to include the AWS V2 SDK in the release distribution
YARN Application Timeline Service V2 build options:
@@ -371,6 +372,13 @@ Create binary distribution with native code:
$ mvn package -Pdist,native -DskipTests -Dtar
+Create binary distribution with AWS SDK:
+
+ $ mvn package -Pdist,awssdk -DskipTests -Dtar
+
+This ensures that the hadoop-aws sdk has all its dependencies,
+but does approximately double the size of the tar file.
+
Create source distribution:
$ mvn package -Psrc -DskipTests
diff --git a/LICENSE-binary b/LICENSE-binary
index c0eb82f3dabfb..d82c3970fe7f1 100644
--- a/LICENSE-binary
+++ b/LICENSE-binary
@@ -362,6 +362,8 @@ org.objenesis:objenesis:2.6
org.xerial.snappy:snappy-java:1.1.10.4
org.yaml:snakeyaml:2.0
org.wildfly.openssl:wildfly-openssl:1.1.3.Final
+
+In distributions which include the aws V2 SDK:
software.amazon.awssdk:bundle:jar:2.25.53
diff --git a/dev-support/bin/create-release b/dev-support/bin/create-release
index 148b88a9912fe..96fe5f6ccbd6e 100755
--- a/dev-support/bin/create-release
+++ b/dev-support/bin/create-release
@@ -283,6 +283,7 @@ function usage
{
echo "--artifactsdir=[path] Path to use to store release bits"
echo "--asfrelease Make an ASF release"
+ echo "--aws-sdk Include the AWS SDK bundle.jar in the release"
echo "--deploy Deploy Maven artifacts using ~/.m2/settings.xml"
echo "--docker Use Hadoop's Dockerfile for guaranteed environment"
echo "--dockercache Use a Docker-private maven cache"
@@ -311,6 +312,9 @@ function option_parse
--artifactsdir=*)
ARTIFACTS_DIR=${i#*=}
;;
+ --aws-sdk)
+ AWSSDK=true
+ ;;
--deploy)
DEPLOY=true
;;
@@ -580,11 +584,18 @@ function makearelease
target="deploy"
fi
+ # create profiles for the main build
+ PROFILES=dist,src,yarn-ui
+ if [[ "${AWSSDK}" = true ]]; then
+ # Add (large) AWS SDK to the build
+ PROFILES="${PROFILES},awssdk"
+ fi
+
# Create SRC and BIN tarballs for release,
# shellcheck disable=SC2046,SC2086
run_and_redirect "${LOGDIR}/mvn_${target}.log" \
"${MVN}" ${MVN_ARGS[*]} ${target} \
- -Pdist,src,yarn-ui \
+ -P${PROFILES} \
"${signflags[@]}" \
-DskipTests -Dtar $(hadoop_native_flags)
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java
index 57fd879c38cf6..0e61c11c945d8 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java
@@ -422,8 +422,14 @@ public int run(String[] args, PrintStream out)
CommandFormat commands = getCommandFormat();
URI fsURI = toUri(s3Path);
- S3AFileSystem fs = bindFilesystem(
- FileSystem.newInstance(fsURI, getConf()));
+ S3AFileSystem fs;
+ try {
+ fs = bindFilesystem(FileSystem.newInstance(fsURI, getConf()));
+ } catch (NoClassDefFoundError e) {
+ println(out, "Failed to instantiate S3A filesystem due to missing class: %s", e);
+ println(out, "Make sure the AWS v2 SDK is on the classpath");
+ throw e;
+ }
Configuration conf = fs.getConf();
URI fsUri = fs.getUri();
println(out, "Filesystem %s", fsUri);
diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
index 1b4b2e8b21b38..ba588bf4876e5 100644
--- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
@@ -53,8 +53,46 @@ full details.
## Overview
-Apache Hadoop's `hadoop-aws` module provides support for AWS integration.
-applications to easily use this support.
+Apache Hadoop's `hadoop-aws` module provides support for AWS integration,
+primarily the s3a open source connector to Amazon S3 Storage, including
+Amazon S3 Express One zone storage as well as third-party stores with S3
+compatibility.
+
+## Installation
+
+### SDK Download
+
+This release uses the AWS SDK for Java 2.0
+
+Unless using a hadoop release with the AWS SDK `bundle.jar` JAR included
+in the binary distribution, the library MUST be downloaded and installed
+into the hadoop distribution.
+
+The exact version of the SDK to be used is listed in the file:
+```
+LICENSE-binary
+```
+The [mvn repository](https://mvnrepository.com/)
+site will list it as a "Compile Dependency" of the
+[hadoop-aws](https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws) artifact.
+
+AWS SDK releases can be downloaded from github at [AWS SDK for Java 2.0](https://github.com/aws/aws-sdk-java-v2)
+
+Or from the [Maven central repository](https://repo1.maven.org/maven2/software/amazon/awssdk/bundle/).
+
+Download the release and place it in the directory `share/hadoop/tools/lib`
+of the hadoop distribution.
+
+* Using an earlier SDK than that this SDK was compiled and tested against
+ will not work.
+* Using a later SDK *should* work, but there are no guarantees.
+* The V1 SDK will not work.
+
+Any project declaring a dependency on `hadoop-aws` in their Maven/Ivy/SBT/Gradle
+build will automatically get the specific version of the AWS SDK which this
+module was compiled against.
+
+### Inclusion on classpath
To include the S3A client in Apache Hadoop's default classpath:
diff --git a/hadoop-tools/hadoop-tools-dist/pom.xml b/hadoop-tools/hadoop-tools-dist/pom.xml
index 97b112ffa67f4..bdf79c6261797 100644
--- a/hadoop-tools/hadoop-tools-dist/pom.xml
+++ b/hadoop-tools/hadoop-tools-dist/pom.xml
@@ -97,6 +97,12 @@
hadoop-aws
compile
${project.version}
+
+
+ software.amazon.awssdk
+ bundle
+
+
org.apache.hadoop
@@ -197,5 +203,18 @@
+
+
+
+ awssdk
+
+
+ software.amazon.awssdk
+ bundle
+ compile
+
+
+