diff --git a/dev/appveyor-install-dependencies.ps1 b/dev/appveyor-install-dependencies.ps1 index 7c7bdd623477a..e655d4bfebf26 100644 --- a/dev/appveyor-install-dependencies.ps1 +++ b/dev/appveyor-install-dependencies.ps1 @@ -81,7 +81,7 @@ if (!(Test-Path $tools)) { # ========================== Maven Push-Location $tools -$mavenVer = "3.6.0" +$mavenVer = "3.6.2" Start-FileDownload "https://archive.apache.org/dist/maven/maven-3/$mavenVer/binaries/apache-maven-$mavenVer-bin.zip" "maven.zip" # extract diff --git a/docs/building-spark.md b/docs/building-spark.md index b16083f9f8282..8b7b3f2180b71 100644 --- a/docs/building-spark.md +++ b/docs/building-spark.md @@ -12,8 +12,8 @@ redirect_from: "building-with-maven.html" ## Apache Maven The Maven-based build is the build of reference for Apache Spark. -Building Spark using Maven requires Maven 3.6.0 and Java 8. -Note that support for Java 7 was removed as of Spark 2.2.0. +Building Spark using Maven requires Maven 3.6.2 and Java 8. +Spark requires Scala 2.12; support for Scala 2.11 was removed in Spark 3.0.0. ### Setting up Maven's Memory Usage diff --git a/pom.xml b/pom.xml index eb6b379585b81..50ab77d568a0d 100644 --- a/pom.xml +++ b/pom.xml @@ -123,7 +123,7 @@ 1.8 ${java.version} ${java.version} - 3.6.0 + 3.6.2 spark 1.7.25 1.2.17 diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile index 6f7095a310987..f7754c9c29d56 100644 --- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile +++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile @@ -15,7 +15,7 @@ # limitations under the License. # -FROM openjdk:8-alpine +FROM openjdk:8-jdk-slim ARG spark_uid=185 @@ -27,14 +27,17 @@ ARG spark_uid=185 # docker build -t spark:latest -f kubernetes/dockerfiles/spark/Dockerfile . RUN set -ex && \ - apk upgrade --no-cache && \ - apk add --no-cache bash tini krb5 krb5-libs && \ + apt-get update && \ + ln -s /lib /lib64 && \ + apt install -y bash tini libc6 libpam-modules krb5-user libnss3 && \ mkdir -p /opt/spark && \ mkdir -p /opt/spark/work-dir && \ touch /opt/spark/RELEASE && \ rm /bin/sh && \ ln -sv /bin/bash /bin/sh && \ - chgrp root /etc/passwd && chmod ug+rw /etc/passwd + echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su && \ + chgrp root /etc/passwd && chmod ug+rw /etc/passwd && \ + rm -rf /var/cache/apt/* COPY jars /opt/spark/jars COPY bin /opt/spark/bin diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile index 34d449c9f08b9..c65824c3f9a88 100644 --- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile +++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile @@ -25,7 +25,7 @@ USER 0 RUN mkdir ${SPARK_HOME}/R -RUN apk add --no-cache R R-dev +RUN apt install -y r-base r-base-dev && rm -rf /var/cache/apt/* COPY R ${SPARK_HOME}/R ENV R_HOME /usr/lib/R diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile index 5044900d1d8a6..938407213ce7d 100644 --- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile +++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile @@ -25,17 +25,15 @@ USER 0 RUN mkdir ${SPARK_HOME}/python # TODO: Investigate running both pip and pip3 via virtualenvs -RUN apk add --no-cache python && \ - apk add --no-cache python3 && \ - python -m ensurepip && \ - python3 -m ensurepip && \ +RUN apt install -y python python-pip && \ + apt install -y python3 python3-pip && \ # We remove ensurepip since it adds no functionality since pip is # installed on the image and it just takes up 1.6MB on the image rm -r /usr/lib/python*/ensurepip && \ pip install --upgrade pip setuptools && \ # You may install with python3 packages by using pip3.6 # Removed the .cache to save space - rm -r /root/.cache + rm -r /root/.cache && rm -rf /var/cache/apt/* COPY python/pyspark ${SPARK_HOME}/python/pyspark COPY python/lib ${SPARK_HOME}/python/lib diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh index 462c849932880..e49214dfee460 100755 --- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh +++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh @@ -111,4 +111,4 @@ case "$1" in esac # Execute the container CMD under tini for better hygiene -exec /sbin/tini -s -- "${CMD[@]}" +exec /usr/bin/tini -s -- "${CMD[@]}" diff --git a/spark-docker-image-generator/src/main/java/org/apache/spark/deploy/kubernetes/docker/gradle/GenerateDockerFileTask.java b/spark-docker-image-generator/src/main/java/org/apache/spark/deploy/kubernetes/docker/gradle/GenerateDockerFileTask.java index 63d9160ee23a4..6e0c66c2a983d 100644 --- a/spark-docker-image-generator/src/main/java/org/apache/spark/deploy/kubernetes/docker/gradle/GenerateDockerFileTask.java +++ b/spark-docker-image-generator/src/main/java/org/apache/spark/deploy/kubernetes/docker/gradle/GenerateDockerFileTask.java @@ -22,6 +22,7 @@ import java.nio.file.Files; import java.nio.file.StandardCopyOption; import java.util.List; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.stream.Collectors; import java.util.stream.Stream; import org.gradle.api.DefaultTask; @@ -73,8 +74,10 @@ public final void generateDockerFile() throws IOException { File currentDestDockerFile = getDestDockerFile(); List fileLines; try (Stream rawLines = Files.lines(currentSrcDockerFile.toPath(), StandardCharsets.UTF_8)) { + AtomicBoolean isFirstFromCommand = new AtomicBoolean(true); fileLines = rawLines.map(line -> { - if (line.equals("FROM openjdk:8-alpine")) { + // The first command in any valid dockerfile must be a from instruction + if (line.startsWith("FROM ") && isFirstFromCommand.getAndSet(false)) { return String.format("FROM %s", baseImage.get()); } else { return line; diff --git a/spark-docker-image-generator/src/test/resources/ExpectedDockerfile b/spark-docker-image-generator/src/test/resources/ExpectedDockerfile index 057bd66baa1e8..64d6b5ad61fe5 100644 --- a/spark-docker-image-generator/src/test/resources/ExpectedDockerfile +++ b/spark-docker-image-generator/src/test/resources/ExpectedDockerfile @@ -27,14 +27,17 @@ ARG spark_uid=185 # docker build -t spark:latest -f kubernetes/dockerfiles/spark/Dockerfile . RUN set -ex && \ - apk upgrade --no-cache && \ - apk add --no-cache bash tini krb5 krb5-libs && \ + apt-get update && \ + ln -s /lib /lib64 && \ + apt install -y bash tini libc6 libpam-modules krb5-user libnss3 && \ mkdir -p /opt/spark && \ mkdir -p /opt/spark/work-dir && \ touch /opt/spark/RELEASE && \ rm /bin/sh && \ ln -sv /bin/bash /bin/sh && \ - chgrp root /etc/passwd && chmod ug+rw /etc/passwd + echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su && \ + chgrp root /etc/passwd && chmod ug+rw /etc/passwd && \ + rm -rf /var/cache/apt/* COPY jars /opt/spark/jars COPY bin /opt/spark/bin diff --git a/spark-docker-image-generator/src/test/resources/plugin-test-project/build.gradle b/spark-docker-image-generator/src/test/resources/plugin-test-project/build.gradle index 26bb4c8480569..12fb2ad3bd26f 100644 --- a/spark-docker-image-generator/src/test/resources/plugin-test-project/build.gradle +++ b/spark-docker-image-generator/src/test/resources/plugin-test-project/build.gradle @@ -43,7 +43,7 @@ dependencies { } sparkDocker { - baseImage 'anapsix/alpine-java:8' + baseImage 'openjdk:8-jdk-slim' imageName 'docker.palantir.test/spark/spark-test-app' tags System.getProperty('docker-tag') }