diff --git a/BUILDING.txt b/BUILDING.txt index 191df097b219c..bf08f55a23ab8 100644 --- a/BUILDING.txt +++ b/BUILDING.txt @@ -146,7 +146,7 @@ Maven build goals: * Run clover : mvn test -Pclover * Run Rat : mvn apache-rat:check * Build javadocs : mvn javadoc:javadoc - * Build distribution : mvn package [-Pdist][-Pdocs][-Psrc][-Pnative][-Dtar][-Preleasedocs][-Pyarn-ui] + * Build distribution : mvn package [-Pdist][-Pdocs][-Psrc][-Pnative][-Dtar][-Preleasedocs][-Pyarn-ui][-Pawssdk] * Change Hadoop version : mvn versions:set -DnewVersion=NEWVERSION Build options: @@ -159,6 +159,7 @@ Maven build goals: * Use -Pyarn-ui to build YARN UI v2. (Requires Internet connectivity) * Use -DskipShade to disable client jar shading to speed up build times (in development environments only, not to build release artifacts) + * Use -Pawssdk to include the AWS V2 SDK in the release distribution YARN Application Timeline Service V2 build options: @@ -371,6 +372,13 @@ Create binary distribution with native code: $ mvn package -Pdist,native -DskipTests -Dtar +Create binary distribution with AWS SDK: + + $ mvn package -Pdist,awssdk -DskipTests -Dtar + +This ensures that the hadoop-aws sdk has all its dependencies, +but does approximately double the size of the tar file. + Create source distribution: $ mvn package -Psrc -DskipTests diff --git a/LICENSE-binary b/LICENSE-binary index c0eb82f3dabfb..d82c3970fe7f1 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -362,6 +362,8 @@ org.objenesis:objenesis:2.6 org.xerial.snappy:snappy-java:1.1.10.4 org.yaml:snakeyaml:2.0 org.wildfly.openssl:wildfly-openssl:1.1.3.Final + +In distributions which include the aws V2 SDK: software.amazon.awssdk:bundle:jar:2.25.53 diff --git a/dev-support/bin/create-release b/dev-support/bin/create-release index 148b88a9912fe..96fe5f6ccbd6e 100755 --- a/dev-support/bin/create-release +++ b/dev-support/bin/create-release @@ -283,6 +283,7 @@ function usage { echo "--artifactsdir=[path] Path to use to store release bits" echo "--asfrelease Make an ASF release" + echo "--aws-sdk Include the AWS SDK bundle.jar in the release" echo "--deploy Deploy Maven artifacts using ~/.m2/settings.xml" echo "--docker Use Hadoop's Dockerfile for guaranteed environment" echo "--dockercache Use a Docker-private maven cache" @@ -311,6 +312,9 @@ function option_parse --artifactsdir=*) ARTIFACTS_DIR=${i#*=} ;; + --aws-sdk) + AWSSDK=true + ;; --deploy) DEPLOY=true ;; @@ -580,11 +584,18 @@ function makearelease target="deploy" fi + # create profiles for the main build + PROFILES=dist,src,yarn-ui + if [[ "${AWSSDK}" = true ]]; then + # Add (large) AWS SDK to the build + PROFILES="${PROFILES},awssdk" + fi + # Create SRC and BIN tarballs for release, # shellcheck disable=SC2046,SC2086 run_and_redirect "${LOGDIR}/mvn_${target}.log" \ "${MVN}" ${MVN_ARGS[*]} ${target} \ - -Pdist,src,yarn-ui \ + -P${PROFILES} \ "${signflags[@]}" \ -DskipTests -Dtar $(hadoop_native_flags) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java index 57fd879c38cf6..0e61c11c945d8 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java @@ -422,8 +422,14 @@ public int run(String[] args, PrintStream out) CommandFormat commands = getCommandFormat(); URI fsURI = toUri(s3Path); - S3AFileSystem fs = bindFilesystem( - FileSystem.newInstance(fsURI, getConf())); + S3AFileSystem fs; + try { + fs = bindFilesystem(FileSystem.newInstance(fsURI, getConf())); + } catch (NoClassDefFoundError e) { + println(out, "Failed to instantiate S3A filesystem due to missing class: %s", e); + println(out, "Make sure the AWS v2 SDK is on the classpath"); + throw e; + } Configuration conf = fs.getConf(); URI fsUri = fs.getUri(); println(out, "Filesystem %s", fsUri); diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md index 1b4b2e8b21b38..ba588bf4876e5 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md @@ -53,8 +53,46 @@ full details. ## Overview -Apache Hadoop's `hadoop-aws` module provides support for AWS integration. -applications to easily use this support. +Apache Hadoop's `hadoop-aws` module provides support for AWS integration, +primarily the s3a open source connector to Amazon S3 Storage, including +Amazon S3 Express One zone storage as well as third-party stores with S3 +compatibility. + +## Installation + +### SDK Download + +This release uses the AWS SDK for Java 2.0 + +Unless using a hadoop release with the AWS SDK `bundle.jar` JAR included +in the binary distribution, the library MUST be downloaded and installed +into the hadoop distribution. + +The exact version of the SDK to be used is listed in the file: +``` +LICENSE-binary +``` +The [mvn repository](https://mvnrepository.com/) +site will list it as a "Compile Dependency" of the +[hadoop-aws](https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws) artifact. + +AWS SDK releases can be downloaded from github at [AWS SDK for Java 2.0](https://github.com/aws/aws-sdk-java-v2) + +Or from the [Maven central repository](https://repo1.maven.org/maven2/software/amazon/awssdk/bundle/). + +Download the release and place it in the directory `share/hadoop/tools/lib` +of the hadoop distribution. + +* Using an earlier SDK than that this SDK was compiled and tested against + will not work. +* Using a later SDK *should* work, but there are no guarantees. +* The V1 SDK will not work. + +Any project declaring a dependency on `hadoop-aws` in their Maven/Ivy/SBT/Gradle +build will automatically get the specific version of the AWS SDK which this +module was compiled against. + +### Inclusion on classpath To include the S3A client in Apache Hadoop's default classpath: diff --git a/hadoop-tools/hadoop-tools-dist/pom.xml b/hadoop-tools/hadoop-tools-dist/pom.xml index 97b112ffa67f4..bdf79c6261797 100644 --- a/hadoop-tools/hadoop-tools-dist/pom.xml +++ b/hadoop-tools/hadoop-tools-dist/pom.xml @@ -97,6 +97,12 @@ hadoop-aws compile ${project.version} + + + software.amazon.awssdk + bundle + + org.apache.hadoop @@ -197,5 +203,18 @@ + + + + awssdk + + + software.amazon.awssdk + bundle + compile + + +