Skip to content

Commit

Permalink
Use official spark docker.
Browse files Browse the repository at this point in the history
  • Loading branch information
julien bignon committed Aug 26, 2024
1 parent 0e2613d commit 622be70
Show file tree
Hide file tree
Showing 16 changed files with 163 additions and 397 deletions.
61 changes: 0 additions & 61 deletions technologies/job/spark/spark-3.5/Dockerfile

This file was deleted.

2 changes: 1 addition & 1 deletion technologies/job/spark/spark-3.5/context.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ id: "3.5"
label: "3.5"
available: true
recommended: true
trustLevel: stable
trustLevel: stable
4 changes: 0 additions & 4 deletions technologies/job/spark/spark-3.5/dockerInfo.yaml

This file was deleted.

54 changes: 0 additions & 54 deletions technologies/job/spark/spark-3.5/entrypoint.sh

This file was deleted.

62 changes: 0 additions & 62 deletions technologies/job/spark/spark-3.5/image_test.yaml

This file was deleted.

56 changes: 9 additions & 47 deletions technologies/job/spark/spark-3.5/innerContexts/jre/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,59 +1,21 @@
ARG jre_major=11
FROM openjdk:${jre_major}-slim-bullseye
FROM spark:3.5.2

ARG SPARK_VERSION=3.5.2
ARG HADOOP_VERSION=3
ARG TINI_VERSION="v0.18.0"

ENV DEBIAN_FRONTEND noninteractive

ENV SPARK_HOME /opt/spark
ENV PATH "$PATH:$SPARK_HOME/bin"
ENV HADOOP_CONF_DIR=/etc/hadoop/conf
ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/lib/hadoop/lib/native"

ENV LANG C.UTF-8

# LIGHT DEPENDENCIES START
RUN echo "deb http://deb.debian.org/debian/ bullseye-backports main contrib non-free" | tee /etc/apt/sources.list.d/bulleseye-backports.list && \
apt update -qq && apt install -yqq --no-install-recommends \
ftp wget curl unzip telnet openssh-client krb5-user zip procps && \
rm -rf /var/lib/apt/lists/*
# LIGHT DEPENDENCIES END
USER root
RUN apt update -qq && apt install -yqq --no-install-recommends \
wget curl unzip krb5-user zip && \
rm -rf /var/lib/apt/lists/*s

RUN set -ex && \
mkdir -p /opt/spark && \
mkdir -p /opt/spark/work-dir && \
touch /opt/spark/RELEASE && \
rm /bin/sh && \
ln -sv /bin/bash /bin/sh && \
echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su && \
chgrp root /etc/passwd && chmod ug+rw /etc/passwd && \
export TINI_HOME="/usr/local/sbin" && \
curl -fSL "https://github.com/krallin/tini/releases/download/$TINI_VERSION/tini" -o "${TINI_HOME}/tini" && \
curl -fSL "creleases/download/$TINI_VERSION/tini.asc" -o "${TINI_HOME}/tini.asc" && \
chmod +x "${TINI_HOME}/tini" && \
ln -s ${TINI_HOME}/tini /sbin/tini && \
"${TINI_HOME}/tini" -h
COPY entrypoint.sh /opt/
RUN chmod 755 /opt/entrypoint.sh

RUN mkdir -p /tmp/spark && \
cd /tmp/spark && \
wget -nv https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz && \
tar xf spark-*.tgz && \
rm spark-*.tgz && \
cp -R /tmp/spark/*/jars /opt/spark && \
cp -R /tmp/spark/*/bin /opt/spark && \
cp -R /tmp/spark/*/sbin /opt/spark && \
rm -Rf /tmp/spark
USER spark

#See hadoop version used by spark and udpate if necessary.
#See https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.3.4 to get right version of aws-java-sdk-bundle
RUN wget -nv https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/1.12.262/aws-java-sdk-bundle-1.12.262.jar && \
wget -nv https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/3.3.4/hadoop-aws-3.3.4.jar && \
mv *.jar /opt/spark/jars/

COPY entrypoint.sh /opt/
RUN chmod 755 /opt/entrypoint.sh

WORKDIR /opt/spark/work-dir

ENTRYPOINT [ "/opt/entrypoint.sh" ]
Loading

0 comments on commit 622be70

Please sign in to comment.