From 76ba46589658c2e496135934313542796c9e7332 Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Fri, 20 Dec 2024 19:19:09 +0800 Subject: [PATCH 1/3] Publish 3.5.4 to docker registry --- .github/workflows/build_3.5.4.yaml | 43 ++++++ .github/workflows/publish.yml | 4 +- .github/workflows/test.yml | 3 +- .../Dockerfile | 29 ++++ .../Dockerfile | 26 ++++ 3.5.4/scala2.12-java11-r-ubuntu/Dockerfile | 28 ++++ 3.5.4/scala2.12-java11-ubuntu/Dockerfile | 81 +++++++++++ 3.5.4/scala2.12-java11-ubuntu/entrypoint.sh | 130 ++++++++++++++++++ .../Dockerfile | 29 ++++ .../Dockerfile | 26 ++++ 3.5.4/scala2.12-java17-r-ubuntu/Dockerfile | 28 ++++ 3.5.4/scala2.12-java17-ubuntu/Dockerfile | 81 +++++++++++ 3.5.4/scala2.12-java17-ubuntu/entrypoint.sh | 130 ++++++++++++++++++ tools/template.py | 2 + versions.json | 74 ++++++++-- 15 files changed, 702 insertions(+), 12 deletions(-) create mode 100644 .github/workflows/build_3.5.4.yaml create mode 100644 3.5.4/scala2.12-java11-python3-r-ubuntu/Dockerfile create mode 100644 3.5.4/scala2.12-java11-python3-ubuntu/Dockerfile create mode 100644 3.5.4/scala2.12-java11-r-ubuntu/Dockerfile create mode 100644 3.5.4/scala2.12-java11-ubuntu/Dockerfile create mode 100755 3.5.4/scala2.12-java11-ubuntu/entrypoint.sh create mode 100644 3.5.4/scala2.12-java17-python3-r-ubuntu/Dockerfile create mode 100644 3.5.4/scala2.12-java17-python3-ubuntu/Dockerfile create mode 100644 3.5.4/scala2.12-java17-r-ubuntu/Dockerfile create mode 100644 3.5.4/scala2.12-java17-ubuntu/Dockerfile create mode 100755 3.5.4/scala2.12-java17-ubuntu/entrypoint.sh diff --git a/.github/workflows/build_3.5.4.yaml b/.github/workflows/build_3.5.4.yaml new file mode 100644 index 0000000..a945fab --- /dev/null +++ b/.github/workflows/build_3.5.4.yaml @@ -0,0 +1,43 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +name: "Build and Test (3.5.4)" + +on: + pull_request: + branches: + - 'master' + paths: + - '3.5.4/**' + +jobs: + run-build: + strategy: + matrix: + image-type: ["all", "python", "scala", "r"] + java: [11, 17] + name: Run + secrets: inherit + uses: ./.github/workflows/main.yml + with: + spark: 3.5.4 + scala: 2.12 + java: ${{ matrix.java }} + image-type: ${{ matrix.image-type }} + diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 6031f49..6be9bc3 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -25,10 +25,10 @@ on: spark: description: 'The Spark version of Spark image.' required: true - default: '3.5.3' + default: '3.5.4' type: choice options: - - 3.5.3 + - 3.5.4 publish: description: 'Publish the image or not.' default: false diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 1bdfde4..02ef44d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -25,11 +25,12 @@ on: spark: description: 'The Spark version of Spark image.' required: true - default: '3.5.3' + default: '3.5.4' type: choice options: - 4.0.0-preview2 - 4.0.0-preview1 + - 3.5.4 - 3.5.3 - 3.5.2 - 3.5.1 diff --git a/3.5.4/scala2.12-java11-python3-r-ubuntu/Dockerfile b/3.5.4/scala2.12-java11-python3-r-ubuntu/Dockerfile new file mode 100644 index 0000000..0778a40 --- /dev/null +++ b/3.5.4/scala2.12-java11-python3-r-ubuntu/Dockerfile @@ -0,0 +1,29 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +FROM spark:3.5.4-scala2.12-java11-ubuntu + +USER root + +RUN set -ex; \ + apt-get update; \ + apt-get install -y python3 python3-pip; \ + apt-get install -y r-base r-base-dev; \ + rm -rf /var/lib/apt/lists/* + +ENV R_HOME /usr/lib/R + +USER spark diff --git a/3.5.4/scala2.12-java11-python3-ubuntu/Dockerfile b/3.5.4/scala2.12-java11-python3-ubuntu/Dockerfile new file mode 100644 index 0000000..ddb7c57 --- /dev/null +++ b/3.5.4/scala2.12-java11-python3-ubuntu/Dockerfile @@ -0,0 +1,26 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +FROM spark:3.5.4-scala2.12-java11-ubuntu + +USER root + +RUN set -ex; \ + apt-get update; \ + apt-get install -y python3 python3-pip; \ + rm -rf /var/lib/apt/lists/* + +USER spark diff --git a/3.5.4/scala2.12-java11-r-ubuntu/Dockerfile b/3.5.4/scala2.12-java11-r-ubuntu/Dockerfile new file mode 100644 index 0000000..c5da428 --- /dev/null +++ b/3.5.4/scala2.12-java11-r-ubuntu/Dockerfile @@ -0,0 +1,28 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +FROM spark:3.5.4-scala2.12-java11-ubuntu + +USER root + +RUN set -ex; \ + apt-get update; \ + apt-get install -y r-base r-base-dev; \ + rm -rf /var/lib/apt/lists/* + +ENV R_HOME /usr/lib/R + +USER spark diff --git a/3.5.4/scala2.12-java11-ubuntu/Dockerfile b/3.5.4/scala2.12-java11-ubuntu/Dockerfile new file mode 100644 index 0000000..b5d6a4a --- /dev/null +++ b/3.5.4/scala2.12-java11-ubuntu/Dockerfile @@ -0,0 +1,81 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +FROM eclipse-temurin:11-jre-focal + +ARG spark_uid=185 + +RUN groupadd --system --gid=${spark_uid} spark && \ + useradd --system --uid=${spark_uid} --gid=spark spark + +RUN set -ex; \ + apt-get update; \ + apt-get install -y gnupg2 wget bash tini libc6 libpam-modules krb5-user libnss3 procps net-tools gosu libnss-wrapper; \ + mkdir -p /opt/spark; \ + mkdir /opt/spark/python; \ + mkdir -p /opt/spark/examples; \ + mkdir -p /opt/spark/work-dir; \ + chmod g+w /opt/spark/work-dir; \ + touch /opt/spark/RELEASE; \ + chown -R spark:spark /opt/spark; \ + echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su; \ + rm -rf /var/lib/apt/lists/* + +# Install Apache Spark +# https://downloads.apache.org/spark/KEYS +ENV SPARK_TGZ_URL=https://archive.apache.org/dist/spark/spark-3.5.4/spark-3.5.4-bin-hadoop3.tgz \ + SPARK_TGZ_ASC_URL=https://archive.apache.org/dist/spark/spark-3.5.4/spark-3.5.4-bin-hadoop3.tgz.asc \ + GPG_KEY=19F745C40A0E550420BB2C522541488DA93FE4B4 + +RUN set -ex; \ + export SPARK_TMP="$(mktemp -d)"; \ + cd $SPARK_TMP; \ + wget -nv -O spark.tgz "$SPARK_TGZ_URL"; \ + wget -nv -O spark.tgz.asc "$SPARK_TGZ_ASC_URL"; \ + export GNUPGHOME="$(mktemp -d)"; \ + gpg --batch --keyserver hkps://keys.openpgp.org --recv-key "$GPG_KEY" || \ + gpg --batch --keyserver hkps://keyserver.ubuntu.com --recv-keys "$GPG_KEY"; \ + gpg --batch --verify spark.tgz.asc spark.tgz; \ + gpgconf --kill all; \ + rm -rf "$GNUPGHOME" spark.tgz.asc; \ + \ + tar -xf spark.tgz --strip-components=1; \ + chown -R spark:spark .; \ + mv jars /opt/spark/; \ + mv RELEASE /opt/spark/; \ + mv bin /opt/spark/; \ + mv sbin /opt/spark/; \ + mv kubernetes/dockerfiles/spark/decom.sh /opt/; \ + mv examples /opt/spark/; \ + ln -s "$(basename /opt/spark/examples/jars/spark-examples_*.jar)" /opt/spark/examples/jars/spark-examples.jar; \ + mv kubernetes/tests /opt/spark/; \ + mv data /opt/spark/; \ + mv python/pyspark /opt/spark/python/pyspark/; \ + mv python/lib /opt/spark/python/lib/; \ + mv R /opt/spark/; \ + chmod a+x /opt/decom.sh; \ + cd ..; \ + rm -rf "$SPARK_TMP"; + +COPY entrypoint.sh /opt/ + +ENV SPARK_HOME /opt/spark + +WORKDIR /opt/spark/work-dir + +USER spark + +ENTRYPOINT [ "/opt/entrypoint.sh" ] diff --git a/3.5.4/scala2.12-java11-ubuntu/entrypoint.sh b/3.5.4/scala2.12-java11-ubuntu/entrypoint.sh new file mode 100755 index 0000000..c576d8f --- /dev/null +++ b/3.5.4/scala2.12-java11-ubuntu/entrypoint.sh @@ -0,0 +1,130 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Prevent any errors from being silently ignored +set -eo pipefail + +attempt_setup_fake_passwd_entry() { + # Check whether there is a passwd entry for the container UID + local myuid; myuid="$(id -u)" + # If there is no passwd entry for the container UID, attempt to fake one + # You can also refer to the https://github.com/docker-library/official-images/pull/13089#issuecomment-1534706523 + # It's to resolve OpenShift random UID case. + # See also: https://github.com/docker-library/postgres/pull/448 + if ! getent passwd "$myuid" &> /dev/null; then + local wrapper + for wrapper in {/usr,}/lib{/*,}/libnss_wrapper.so; do + if [ -s "$wrapper" ]; then + NSS_WRAPPER_PASSWD="$(mktemp)" + NSS_WRAPPER_GROUP="$(mktemp)" + export LD_PRELOAD="$wrapper" NSS_WRAPPER_PASSWD NSS_WRAPPER_GROUP + local mygid; mygid="$(id -g)" + printf 'spark:x:%s:%s:${SPARK_USER_NAME:-anonymous uid}:%s:/bin/false\n' "$myuid" "$mygid" "$SPARK_HOME" > "$NSS_WRAPPER_PASSWD" + printf 'spark:x:%s:\n' "$mygid" > "$NSS_WRAPPER_GROUP" + break + fi + done + fi +} + +if [ -z "$JAVA_HOME" ]; then + JAVA_HOME=$(java -XshowSettings:properties -version 2>&1 > /dev/null | grep 'java.home' | awk '{print $3}') +fi + +SPARK_CLASSPATH="$SPARK_CLASSPATH:${SPARK_HOME}/jars/*" +for v in "${!SPARK_JAVA_OPT_@}"; do + SPARK_EXECUTOR_JAVA_OPTS+=( "${!v}" ) +done + +if [ -n "$SPARK_EXTRA_CLASSPATH" ]; then + SPARK_CLASSPATH="$SPARK_CLASSPATH:$SPARK_EXTRA_CLASSPATH" +fi + +if ! [ -z "${PYSPARK_PYTHON+x}" ]; then + export PYSPARK_PYTHON +fi +if ! [ -z "${PYSPARK_DRIVER_PYTHON+x}" ]; then + export PYSPARK_DRIVER_PYTHON +fi + +# If HADOOP_HOME is set and SPARK_DIST_CLASSPATH is not set, set it here so Hadoop jars are available to the executor. +# It does not set SPARK_DIST_CLASSPATH if already set, to avoid overriding customizations of this value from elsewhere e.g. Docker/K8s. +if [ -n "${HADOOP_HOME}" ] && [ -z "${SPARK_DIST_CLASSPATH}" ]; then + export SPARK_DIST_CLASSPATH="$($HADOOP_HOME/bin/hadoop classpath)" +fi + +if ! [ -z "${HADOOP_CONF_DIR+x}" ]; then + SPARK_CLASSPATH="$HADOOP_CONF_DIR:$SPARK_CLASSPATH"; +fi + +if ! [ -z "${SPARK_CONF_DIR+x}" ]; then + SPARK_CLASSPATH="$SPARK_CONF_DIR:$SPARK_CLASSPATH"; +elif ! [ -z "${SPARK_HOME+x}" ]; then + SPARK_CLASSPATH="$SPARK_HOME/conf:$SPARK_CLASSPATH"; +fi + +# SPARK-43540: add current working directory into executor classpath +SPARK_CLASSPATH="$SPARK_CLASSPATH:$PWD" + +# Switch to spark if no USER specified (root by default) otherwise use USER directly +switch_spark_if_root() { + if [ $(id -u) -eq 0 ]; then + echo gosu spark + fi +} + +case "$1" in + driver) + shift 1 + CMD=( + "$SPARK_HOME/bin/spark-submit" + --conf "spark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS" + --conf "spark.executorEnv.SPARK_DRIVER_POD_IP=$SPARK_DRIVER_BIND_ADDRESS" + --deploy-mode client + "$@" + ) + attempt_setup_fake_passwd_entry + # Execute the container CMD under tini for better hygiene + exec $(switch_spark_if_root) /usr/bin/tini -s -- "${CMD[@]}" + ;; + executor) + shift 1 + CMD=( + ${JAVA_HOME}/bin/java + "${SPARK_EXECUTOR_JAVA_OPTS[@]}" + -Xms"$SPARK_EXECUTOR_MEMORY" + -Xmx"$SPARK_EXECUTOR_MEMORY" + -cp "$SPARK_CLASSPATH:$SPARK_DIST_CLASSPATH" + org.apache.spark.scheduler.cluster.k8s.KubernetesExecutorBackend + --driver-url "$SPARK_DRIVER_URL" + --executor-id "$SPARK_EXECUTOR_ID" + --cores "$SPARK_EXECUTOR_CORES" + --app-id "$SPARK_APPLICATION_ID" + --hostname "$SPARK_EXECUTOR_POD_IP" + --resourceProfileId "$SPARK_RESOURCE_PROFILE_ID" + --podName "$SPARK_EXECUTOR_POD_NAME" + ) + attempt_setup_fake_passwd_entry + # Execute the container CMD under tini for better hygiene + exec $(switch_spark_if_root) /usr/bin/tini -s -- "${CMD[@]}" + ;; + + *) + # Non-spark-on-k8s command provided, proceeding in pass-through mode... + exec "$@" + ;; +esac diff --git a/3.5.4/scala2.12-java17-python3-r-ubuntu/Dockerfile b/3.5.4/scala2.12-java17-python3-r-ubuntu/Dockerfile new file mode 100644 index 0000000..5113809 --- /dev/null +++ b/3.5.4/scala2.12-java17-python3-r-ubuntu/Dockerfile @@ -0,0 +1,29 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +FROM spark:3.5.4-scala2.12-java17-ubuntu + +USER root + +RUN set -ex; \ + apt-get update; \ + apt-get install -y python3 python3-pip; \ + apt-get install -y r-base r-base-dev; \ + rm -rf /var/lib/apt/lists/* + +ENV R_HOME /usr/lib/R + +USER spark diff --git a/3.5.4/scala2.12-java17-python3-ubuntu/Dockerfile b/3.5.4/scala2.12-java17-python3-ubuntu/Dockerfile new file mode 100644 index 0000000..8a2cd5d --- /dev/null +++ b/3.5.4/scala2.12-java17-python3-ubuntu/Dockerfile @@ -0,0 +1,26 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +FROM spark:3.5.4-scala2.12-java17-ubuntu + +USER root + +RUN set -ex; \ + apt-get update; \ + apt-get install -y python3 python3-pip; \ + rm -rf /var/lib/apt/lists/* + +USER spark diff --git a/3.5.4/scala2.12-java17-r-ubuntu/Dockerfile b/3.5.4/scala2.12-java17-r-ubuntu/Dockerfile new file mode 100644 index 0000000..0d42b81 --- /dev/null +++ b/3.5.4/scala2.12-java17-r-ubuntu/Dockerfile @@ -0,0 +1,28 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +FROM spark:3.5.4-scala2.12-java17-ubuntu + +USER root + +RUN set -ex; \ + apt-get update; \ + apt-get install -y r-base r-base-dev; \ + rm -rf /var/lib/apt/lists/* + +ENV R_HOME /usr/lib/R + +USER spark diff --git a/3.5.4/scala2.12-java17-ubuntu/Dockerfile b/3.5.4/scala2.12-java17-ubuntu/Dockerfile new file mode 100644 index 0000000..4125654 --- /dev/null +++ b/3.5.4/scala2.12-java17-ubuntu/Dockerfile @@ -0,0 +1,81 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +FROM eclipse-temurin:17-jammy + +ARG spark_uid=185 + +RUN groupadd --system --gid=${spark_uid} spark && \ + useradd --system --uid=${spark_uid} --gid=spark spark + +RUN set -ex; \ + apt-get update; \ + apt-get install -y gnupg2 wget bash tini libc6 libpam-modules krb5-user libnss3 procps net-tools gosu libnss-wrapper; \ + mkdir -p /opt/spark; \ + mkdir /opt/spark/python; \ + mkdir -p /opt/spark/examples; \ + mkdir -p /opt/spark/work-dir; \ + chmod g+w /opt/spark/work-dir; \ + touch /opt/spark/RELEASE; \ + chown -R spark:spark /opt/spark; \ + echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su; \ + rm -rf /var/lib/apt/lists/* + +# Install Apache Spark +# https://downloads.apache.org/spark/KEYS +ENV SPARK_TGZ_URL=https://archive.apache.org/dist/spark/spark-3.5.4/spark-3.5.4-bin-hadoop3.tgz \ + SPARK_TGZ_ASC_URL=https://archive.apache.org/dist/spark/spark-3.5.4/spark-3.5.4-bin-hadoop3.tgz.asc \ + GPG_KEY=19F745C40A0E550420BB2C522541488DA93FE4B4 + +RUN set -ex; \ + export SPARK_TMP="$(mktemp -d)"; \ + cd $SPARK_TMP; \ + wget -nv -O spark.tgz "$SPARK_TGZ_URL"; \ + wget -nv -O spark.tgz.asc "$SPARK_TGZ_ASC_URL"; \ + export GNUPGHOME="$(mktemp -d)"; \ + gpg --batch --keyserver hkps://keys.openpgp.org --recv-key "$GPG_KEY" || \ + gpg --batch --keyserver hkps://keyserver.ubuntu.com --recv-keys "$GPG_KEY"; \ + gpg --batch --verify spark.tgz.asc spark.tgz; \ + gpgconf --kill all; \ + rm -rf "$GNUPGHOME" spark.tgz.asc; \ + \ + tar -xf spark.tgz --strip-components=1; \ + chown -R spark:spark .; \ + mv jars /opt/spark/; \ + mv RELEASE /opt/spark/; \ + mv bin /opt/spark/; \ + mv sbin /opt/spark/; \ + mv kubernetes/dockerfiles/spark/decom.sh /opt/; \ + mv examples /opt/spark/; \ + ln -s "$(basename /opt/spark/examples/jars/spark-examples_*.jar)" /opt/spark/examples/jars/spark-examples.jar; \ + mv kubernetes/tests /opt/spark/; \ + mv data /opt/spark/; \ + mv python/pyspark /opt/spark/python/pyspark/; \ + mv python/lib /opt/spark/python/lib/; \ + mv R /opt/spark/; \ + chmod a+x /opt/decom.sh; \ + cd ..; \ + rm -rf "$SPARK_TMP"; + +COPY entrypoint.sh /opt/ + +ENV SPARK_HOME /opt/spark + +WORKDIR /opt/spark/work-dir + +USER spark + +ENTRYPOINT [ "/opt/entrypoint.sh" ] diff --git a/3.5.4/scala2.12-java17-ubuntu/entrypoint.sh b/3.5.4/scala2.12-java17-ubuntu/entrypoint.sh new file mode 100755 index 0000000..c576d8f --- /dev/null +++ b/3.5.4/scala2.12-java17-ubuntu/entrypoint.sh @@ -0,0 +1,130 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Prevent any errors from being silently ignored +set -eo pipefail + +attempt_setup_fake_passwd_entry() { + # Check whether there is a passwd entry for the container UID + local myuid; myuid="$(id -u)" + # If there is no passwd entry for the container UID, attempt to fake one + # You can also refer to the https://github.com/docker-library/official-images/pull/13089#issuecomment-1534706523 + # It's to resolve OpenShift random UID case. + # See also: https://github.com/docker-library/postgres/pull/448 + if ! getent passwd "$myuid" &> /dev/null; then + local wrapper + for wrapper in {/usr,}/lib{/*,}/libnss_wrapper.so; do + if [ -s "$wrapper" ]; then + NSS_WRAPPER_PASSWD="$(mktemp)" + NSS_WRAPPER_GROUP="$(mktemp)" + export LD_PRELOAD="$wrapper" NSS_WRAPPER_PASSWD NSS_WRAPPER_GROUP + local mygid; mygid="$(id -g)" + printf 'spark:x:%s:%s:${SPARK_USER_NAME:-anonymous uid}:%s:/bin/false\n' "$myuid" "$mygid" "$SPARK_HOME" > "$NSS_WRAPPER_PASSWD" + printf 'spark:x:%s:\n' "$mygid" > "$NSS_WRAPPER_GROUP" + break + fi + done + fi +} + +if [ -z "$JAVA_HOME" ]; then + JAVA_HOME=$(java -XshowSettings:properties -version 2>&1 > /dev/null | grep 'java.home' | awk '{print $3}') +fi + +SPARK_CLASSPATH="$SPARK_CLASSPATH:${SPARK_HOME}/jars/*" +for v in "${!SPARK_JAVA_OPT_@}"; do + SPARK_EXECUTOR_JAVA_OPTS+=( "${!v}" ) +done + +if [ -n "$SPARK_EXTRA_CLASSPATH" ]; then + SPARK_CLASSPATH="$SPARK_CLASSPATH:$SPARK_EXTRA_CLASSPATH" +fi + +if ! [ -z "${PYSPARK_PYTHON+x}" ]; then + export PYSPARK_PYTHON +fi +if ! [ -z "${PYSPARK_DRIVER_PYTHON+x}" ]; then + export PYSPARK_DRIVER_PYTHON +fi + +# If HADOOP_HOME is set and SPARK_DIST_CLASSPATH is not set, set it here so Hadoop jars are available to the executor. +# It does not set SPARK_DIST_CLASSPATH if already set, to avoid overriding customizations of this value from elsewhere e.g. Docker/K8s. +if [ -n "${HADOOP_HOME}" ] && [ -z "${SPARK_DIST_CLASSPATH}" ]; then + export SPARK_DIST_CLASSPATH="$($HADOOP_HOME/bin/hadoop classpath)" +fi + +if ! [ -z "${HADOOP_CONF_DIR+x}" ]; then + SPARK_CLASSPATH="$HADOOP_CONF_DIR:$SPARK_CLASSPATH"; +fi + +if ! [ -z "${SPARK_CONF_DIR+x}" ]; then + SPARK_CLASSPATH="$SPARK_CONF_DIR:$SPARK_CLASSPATH"; +elif ! [ -z "${SPARK_HOME+x}" ]; then + SPARK_CLASSPATH="$SPARK_HOME/conf:$SPARK_CLASSPATH"; +fi + +# SPARK-43540: add current working directory into executor classpath +SPARK_CLASSPATH="$SPARK_CLASSPATH:$PWD" + +# Switch to spark if no USER specified (root by default) otherwise use USER directly +switch_spark_if_root() { + if [ $(id -u) -eq 0 ]; then + echo gosu spark + fi +} + +case "$1" in + driver) + shift 1 + CMD=( + "$SPARK_HOME/bin/spark-submit" + --conf "spark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS" + --conf "spark.executorEnv.SPARK_DRIVER_POD_IP=$SPARK_DRIVER_BIND_ADDRESS" + --deploy-mode client + "$@" + ) + attempt_setup_fake_passwd_entry + # Execute the container CMD under tini for better hygiene + exec $(switch_spark_if_root) /usr/bin/tini -s -- "${CMD[@]}" + ;; + executor) + shift 1 + CMD=( + ${JAVA_HOME}/bin/java + "${SPARK_EXECUTOR_JAVA_OPTS[@]}" + -Xms"$SPARK_EXECUTOR_MEMORY" + -Xmx"$SPARK_EXECUTOR_MEMORY" + -cp "$SPARK_CLASSPATH:$SPARK_DIST_CLASSPATH" + org.apache.spark.scheduler.cluster.k8s.KubernetesExecutorBackend + --driver-url "$SPARK_DRIVER_URL" + --executor-id "$SPARK_EXECUTOR_ID" + --cores "$SPARK_EXECUTOR_CORES" + --app-id "$SPARK_APPLICATION_ID" + --hostname "$SPARK_EXECUTOR_POD_IP" + --resourceProfileId "$SPARK_RESOURCE_PROFILE_ID" + --podName "$SPARK_EXECUTOR_POD_NAME" + ) + attempt_setup_fake_passwd_entry + # Execute the container CMD under tini for better hygiene + exec $(switch_spark_if_root) /usr/bin/tini -s -- "${CMD[@]}" + ;; + + *) + # Non-spark-on-k8s command provided, proceeding in pass-through mode... + exec "$@" + ;; +esac diff --git a/tools/template.py b/tools/template.py index 0c31a2c..b45e552 100755 --- a/tools/template.py +++ b/tools/template.py @@ -48,6 +48,8 @@ "3.5.2": "D76E23B9F11B5BF6864613C4F7051850A0AF904D", # issuer "haejoon@apache.org" "3.5.3": "0A2D660358B6F6F8071FD16F6606986CF5A8447C", + # issuer "yangjie01@apache.org" + "3.5.4": "19F745C40A0E550420BB2C522541488DA93FE4B4", # issuer "wenchen@apache.org" "4.0.0-preview1": "4DC9676CEF9A83E98FCA02784D6620843CD87F5A", # issuer "dongjoon@apache.org" diff --git a/versions.json b/versions.json index 230ce26..355ba98 100644 --- a/versions.json +++ b/versions.json @@ -112,13 +112,73 @@ "4.0.0-preview1-scala2.13-java17-python3-r-ubuntu" ] }, + { + "path": "3.5.4/scala2.12-java17-python3-ubuntu", + "tags": [ + "3.5.4-scala2.12-java17-python3-ubuntu", + "3.5.4-java17-python3", + "3.5.4-java17", + "python3-java17" + ] + }, + { + "path": "3.5.4/scala2.12-java17-r-ubuntu", + "tags": [ + "3.5.4-scala2.12-java17-r-ubuntu", + "3.5.4-java17-r" + ] + }, + { + "path": "3.5.4/scala2.12-java17-ubuntu", + "tags": [ + "3.5.4-scala2.12-java17-ubuntu", + "3.5.4-java17-scala" + ] + }, + { + "path": "3.5.4/scala2.12-java17-python3-r-ubuntu", + "tags": [ + "3.5.4-scala2.12-java17-python3-r-ubuntu" + ] + }, + { + "path": "3.5.4/scala2.12-java11-python3-ubuntu", + "tags": [ + "3.5.4-scala2.12-java11-python3-ubuntu", + "3.5.4-python3", + "3.5.4", + "python3", + "latest" + ] + }, + { + "path": "3.5.4/scala2.12-java11-r-ubuntu", + "tags": [ + "3.5.4-scala2.12-java11-r-ubuntu", + "3.5.4-r", + "r" + ] + }, + { + "path": "3.5.4/scala2.12-java11-ubuntu", + "tags": [ + "3.5.4-scala2.12-java11-ubuntu", + "3.5.4-scala", + "scala" + ] + }, + { + "path": "3.5.4/scala2.12-java11-python3-r-ubuntu", + "tags": [ + "3.5.4-scala2.12-java11-python3-r-ubuntu" + ] + }, { "path": "3.5.3/scala2.12-java17-python3-ubuntu", "tags": [ "3.5.3-scala2.12-java17-python3-ubuntu", "3.5.3-java17-python3", - "3.5.3-java17", - "python3-java17" + "3.5.3-java17" ] }, { @@ -146,25 +206,21 @@ "tags": [ "3.5.3-scala2.12-java11-python3-ubuntu", "3.5.3-python3", - "3.5.3", - "python3", - "latest" + "3.5.3" ] }, { "path": "3.5.3/scala2.12-java11-r-ubuntu", "tags": [ "3.5.3-scala2.12-java11-r-ubuntu", - "3.5.3-r", - "r" + "3.5.3-r" ] }, { "path": "3.5.3/scala2.12-java11-ubuntu", "tags": [ "3.5.3-scala2.12-java11-ubuntu", - "3.5.3-scala", - "scala" + "3.5.3-scala" ] }, { From 1b36487a6d60b7b99530d907275f1b816cb466ad Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Sat, 21 Dec 2024 01:53:15 +0800 Subject: [PATCH 2/3] test install r --- .github/workflows/main.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 33c75fb..834d391 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -257,6 +257,11 @@ jobs: path: ~/.cache/coursier key: build-${{ inputs.spark }}-scala${{ inputs.scala }}-java${{ inputs.java }}-coursier + - name: Install R + run: | + sudo apt update + sudo apt-get install r-base + - name: Test - Start minikube run: | # See more in "Installation" https://minikube.sigs.k8s.io/docs/start/ From 9ec4275d1259c1bd98b2ba8f945b89691b14f39c Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Sun, 22 Dec 2024 12:35:22 +0800 Subject: [PATCH 3/3] Revert "test install r" This reverts commit 1b36487a6d60b7b99530d907275f1b816cb466ad. --- .github/workflows/main.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 834d391..33c75fb 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -257,11 +257,6 @@ jobs: path: ~/.cache/coursier key: build-${{ inputs.spark }}-scala${{ inputs.scala }}-java${{ inputs.java }}-coursier - - name: Install R - run: | - sudo apt update - sudo apt-get install r-base - - name: Test - Start minikube run: | # See more in "Installation" https://minikube.sigs.k8s.io/docs/start/