Skip to content

Commit

Permalink
[ZEPPELIN-5945] Build Zeppelin-Distribution with JDK11 and use Spark …
Browse files Browse the repository at this point in the history
…3.3 as default (#4639)
  • Loading branch information
Reamer authored Aug 22, 2023
1 parent 8ed7bff commit ed958b5
Show file tree
Hide file tree
Showing 7 changed files with 70 additions and 60 deletions.
6 changes: 3 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,20 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
FROM openjdk:8 as builder
FROM openjdk:11 as builder
ADD . /workspace/zeppelin
WORKDIR /workspace/zeppelin
ENV MAVEN_OPTS="-Xms1024M -Xmx2048M -XX:MaxMetaspaceSize=1024m -XX:-UseGCOverheadLimit -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn"
# Allow npm and bower to run with root privileges
RUN echo "unsafe-perm=true" > ~/.npmrc && \
echo '{ "allow_root": true }' > ~/.bowerrc && \
./mvnw -B package -DskipTests -Pbuild-distr -Pspark-3.2 -Pinclude-hadoop -Phadoop3 -Pspark-scala-2.12 -Pweb-angular -Pweb-dist && \
./mvnw -B package -DskipTests -Pbuild-distr -Pspark-3.3 -Pinclude-hadoop -Phadoop3 -Pspark-scala-2.12 -Pweb-angular -Pweb-dist && \
# Example with doesn't compile all interpreters
# ./mvnw -B package -DskipTests -Pbuild-distr -Pspark-3.2 -Pinclude-hadoop -Phadoop3 -Pspark-scala-2.12 -Pweb-angular -Pweb-dist -pl '!groovy,!submarine,!livy,!hbase,!file,!flink' && \
mv /workspace/zeppelin/zeppelin-distribution/target/zeppelin-*/zeppelin-* /opt/zeppelin/ && \
# Removing stuff saves time, because docker creates a temporary layer
rm -rf ~/.m2 && \
rm -rf /workspace/zeppelin/*

FROM ubuntu:20.04
FROM ubuntu:22.04
COPY --from=builder /opt/zeppelin /opt/zeppelin
52 changes: 21 additions & 31 deletions scripts/docker/zeppelin-interpreter/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
ARG ZEPPELIN_DISTRIBUTION_IMAGE=zeppelin-distribution:latest
FROM $ZEPPELIN_DISTRIBUTION_IMAGE AS zeppelin-distribution

FROM ubuntu:20.04
FROM ubuntu:22.04

LABEL maintainer="Apache Software Foundation <dev@zeppelin.apache.org>"

Expand All @@ -25,13 +25,16 @@ ARG version="0.10.0"
ENV VERSION="${version}" \
ZEPPELIN_HOME="/opt/zeppelin"

# Install Java for zeppelin interpreter
# Install micromamba to install a python environment via conda
RUN set -ex && \
apt-get -y update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y openjdk-8-jre-headless wget tini && \
/usr/bin/apt-get update && \
DEBIAN_FRONTEND=noninteractive /usr/bin/apt-get install -y openjdk-11-jre-headless wget tini bzip2 && \
/usr/bin/wget -qO- https://micromamba.snakepit.net/api/micromamba/linux-64/latest | tar -xvj bin/micromamba && \
# Cleanup
rm -rf /var/lib/apt/lists/* && \
apt-get autoclean && \
apt-get clean
/usr/bin/apt-get clean && \
/bin/rm -rf /var/lib/apt/lists/*


COPY --from=zeppelin-distribution /opt/zeppelin/bin ${ZEPPELIN_HOME}/bin
COPY log4j.properties ${ZEPPELIN_HOME}/conf/
Expand All @@ -46,33 +49,20 @@ COPY --from=zeppelin-distribution /opt/zeppelin/interpreter ${ZEPPELIN_HOME}/int
### COPY --from=zeppelin-distribution /opt/zeppelin/interpreter/${interpreter_name} ${ZEPPELIN_HOME}/interpreter/${interpreter_name}


# Decide: Install conda to manage python and R packages. Maybe adjust the packages in pip_packages.txt or conda_packages.txt
ARG miniconda_version="py38_4.8.3"
ARG miniconda_sha256="879457af6a0bf5b34b48c12de31d4df0ee2f06a8e68768e5758c3293b2daf688"
# Decide: Install conda to manage python and R packages. Maybe adjust the packages env_python_3_with_R
# Install python and R packages via conda
COPY conda_packages.txt /conda_packages.txt
# Some python packages are not available via conda, so we are using pip
COPY pip_packages.txt /pip_packages.txt
COPY env_python_3_with_R.yml /env_python_3_with_R.yml
# To improve the build time, the Zeppelin team recommends a conda proxy
# COPY condarc /etc/conda/condarc
RUN set -ex && \
wget -nv https://repo.anaconda.com/miniconda/Miniconda3-${miniconda_version}-Linux-x86_64.sh -O miniconda.sh && \
echo "${miniconda_sha256} miniconda.sh" > anaconda.sha256 && \
sha256sum --strict -c anaconda.sha256 && \
bash miniconda.sh -b -p /opt/conda && \
export PATH=/opt/conda/bin:$PATH && \
conda config --set always_yes yes --set changeps1 no && \
conda info -a && \
conda config --add channels conda-forge && \
conda install -y --quiet --file /conda_packages.txt && \
pip install -q -r /pip_packages.txt && \
# Cleanup
rm -v miniconda.sh anaconda.sha256 && \
# Cleanup based on https://github.com/ContinuumIO/docker-images/commit/cac3352bf21a26fa0b97925b578fb24a0fe8c383
find /opt/conda/ -follow -type f -name '*.a' -delete && \
find /opt/conda/ -follow -type f -name '*.js.map' -delete && \
conda clean -ay
# Allow to modify conda packages. This allows malicious code to be injected into other interpreter sessions, therefore it is disabled by default
# chmod -R ug+rwX /opt/conda
ENV PATH /opt/conda/bin:$PATH
micromamba create -y -p /opt/conda -f env_python_3_with_R.yml && \
micromamba clean -ay

ENV PATH=/opt/conda/bin:$PATH \
SPARK_HOME=/opt/conda/lib/python3.9/site-packages/pyspark

# Allow to modify conda packages. This allows malicious code to be injected into other interpreter sessions, therefore it is disabled by default
# chmod -R ug+rwX /opt/conda

RUN mkdir -p "${ZEPPELIN_HOME}/logs" "${ZEPPELIN_HOME}/run" "${ZEPPELIN_HOME}/local-repo" && \
# Allow process to edit /etc/passwd, to create a user entry for zeppelin
Expand Down
22 changes: 0 additions & 22 deletions scripts/docker/zeppelin-interpreter/conda_packages.txt

This file was deleted.

5 changes: 5 additions & 0 deletions scripts/docker/zeppelin-interpreter/condarc
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Example to use an proxy for conda
channel_alias: https://leandi.avm.de/repository/anaconda-proxy
default_channels:
- https://proxy.mycompany.com/repository/anaconda-proxy/main
- https://proxy.mycompany.com/repository/anaconda-proxy/r
38 changes: 38 additions & 0 deletions scripts/docker/zeppelin-interpreter/env_python_3_with_R.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
name: python_3_with_R
channels:
- conda-forge
- defaults
dependencies:
- python >=3.9,<3.10
- pyspark=3.3.2
- pycodestyle
- scipy
- numpy
- grpcio
- protobuf
- pandasql
- ipython
- ipykernel
- jupyter_client
- hvplot
- plotnine
- seaborn
- intake
- intake-parquet
- intake-xarray
- altair
- vega_datasets
- plotly
- pip
- pip:
# works for regular pip packages
- bkzep==0.6.1
- r-base=3
- r-data.table
- r-evaluate
- r-base64enc
- r-knitr
- r-ggplot2
- r-irkernel
- r-shiny
- r-googlevis
1 change: 0 additions & 1 deletion scripts/docker/zeppelin-interpreter/pip_packages.txt

This file was deleted.

6 changes: 3 additions & 3 deletions scripts/docker/zeppelin-server/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,19 @@ FROM $ZEPPELIN_DISTRIBUTION_IMAGE AS zeppelin-distribution

# Prepare all interpreter settings for Zeppelin server
# This steps are not needed, if you you add only specific interpreters settings to your image
FROM alpine:3.11 AS interpreter-settings
FROM alpine:3.13 AS interpreter-settings
COPY --from=zeppelin-distribution /opt/zeppelin/interpreter /tmp/interpreter
RUN mkdir -p /opt/zeppelin/interpreter && \
cd /tmp/interpreter && \
find . -name 'interpreter-setting.json' -exec cp --parents \{\} /opt/zeppelin/interpreter \;

FROM ubuntu:20.04
FROM ubuntu:22.04
LABEL maintainer="Apache Software Foundation <dev@zeppelin.apache.org>"

RUN set -ex && \
apt-get -y update && \
# Install language and other base packages
DEBIAN_FRONTEND=noninteractive apt-get install -y language-pack-en openjdk-8-jre-headless tini wget && \
DEBIAN_FRONTEND=noninteractive apt-get install -y language-pack-en openjdk-11-jre-headless tini wget && \
# Cleanup
rm -rf /var/lib/apt/lists/* && \
apt-get autoclean && \
Expand Down

0 comments on commit ed958b5

Please sign in to comment.