Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Build Zeppelin-Distribution with JDK11 and use Spark 3.3 as default #4639

Merged
merged 1 commit into from
Aug 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,20 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
FROM openjdk:8 as builder
FROM openjdk:11 as builder
ADD . /workspace/zeppelin
WORKDIR /workspace/zeppelin
ENV MAVEN_OPTS="-Xms1024M -Xmx2048M -XX:MaxMetaspaceSize=1024m -XX:-UseGCOverheadLimit -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn"
# Allow npm and bower to run with root privileges
RUN echo "unsafe-perm=true" > ~/.npmrc && \
echo '{ "allow_root": true }' > ~/.bowerrc && \
./mvnw -B package -DskipTests -Pbuild-distr -Pspark-3.2 -Pinclude-hadoop -Phadoop3 -Pspark-scala-2.12 -Pweb-angular -Pweb-dist && \
./mvnw -B package -DskipTests -Pbuild-distr -Pspark-3.3 -Pinclude-hadoop -Phadoop3 -Pspark-scala-2.12 -Pweb-angular -Pweb-dist && \
# Example with doesn't compile all interpreters
# ./mvnw -B package -DskipTests -Pbuild-distr -Pspark-3.2 -Pinclude-hadoop -Phadoop3 -Pspark-scala-2.12 -Pweb-angular -Pweb-dist -pl '!groovy,!submarine,!livy,!hbase,!file,!flink' && \
mv /workspace/zeppelin/zeppelin-distribution/target/zeppelin-*/zeppelin-* /opt/zeppelin/ && \
# Removing stuff saves time, because docker creates a temporary layer
rm -rf ~/.m2 && \
rm -rf /workspace/zeppelin/*

FROM ubuntu:20.04
FROM ubuntu:22.04
COPY --from=builder /opt/zeppelin /opt/zeppelin
52 changes: 21 additions & 31 deletions scripts/docker/zeppelin-interpreter/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
ARG ZEPPELIN_DISTRIBUTION_IMAGE=zeppelin-distribution:latest
FROM $ZEPPELIN_DISTRIBUTION_IMAGE AS zeppelin-distribution

FROM ubuntu:20.04
FROM ubuntu:22.04

LABEL maintainer="Apache Software Foundation <dev@zeppelin.apache.org>"

Expand All @@ -25,13 +25,16 @@ ARG version="0.10.0"
ENV VERSION="${version}" \
ZEPPELIN_HOME="/opt/zeppelin"

# Install Java for zeppelin interpreter
# Install micromamba to install a python environment via conda
RUN set -ex && \
apt-get -y update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y openjdk-8-jre-headless wget tini && \
/usr/bin/apt-get update && \
DEBIAN_FRONTEND=noninteractive /usr/bin/apt-get install -y openjdk-11-jre-headless wget tini bzip2 && \
/usr/bin/wget -qO- https://micromamba.snakepit.net/api/micromamba/linux-64/latest | tar -xvj bin/micromamba && \
# Cleanup
rm -rf /var/lib/apt/lists/* && \
apt-get autoclean && \
apt-get clean
/usr/bin/apt-get clean && \
/bin/rm -rf /var/lib/apt/lists/*


COPY --from=zeppelin-distribution /opt/zeppelin/bin ${ZEPPELIN_HOME}/bin
COPY log4j.properties ${ZEPPELIN_HOME}/conf/
Expand All @@ -46,33 +49,20 @@ COPY --from=zeppelin-distribution /opt/zeppelin/interpreter ${ZEPPELIN_HOME}/int
### COPY --from=zeppelin-distribution /opt/zeppelin/interpreter/${interpreter_name} ${ZEPPELIN_HOME}/interpreter/${interpreter_name}


# Decide: Install conda to manage python and R packages. Maybe adjust the packages in pip_packages.txt or conda_packages.txt
ARG miniconda_version="py38_4.8.3"
ARG miniconda_sha256="879457af6a0bf5b34b48c12de31d4df0ee2f06a8e68768e5758c3293b2daf688"
# Decide: Install conda to manage python and R packages. Maybe adjust the packages env_python_3_with_R
# Install python and R packages via conda
COPY conda_packages.txt /conda_packages.txt
# Some python packages are not available via conda, so we are using pip
COPY pip_packages.txt /pip_packages.txt
COPY env_python_3_with_R.yml /env_python_3_with_R.yml
# To improve the build time, the Zeppelin team recommends a conda proxy
# COPY condarc /etc/conda/condarc
RUN set -ex && \
wget -nv https://repo.anaconda.com/miniconda/Miniconda3-${miniconda_version}-Linux-x86_64.sh -O miniconda.sh && \
echo "${miniconda_sha256} miniconda.sh" > anaconda.sha256 && \
sha256sum --strict -c anaconda.sha256 && \
bash miniconda.sh -b -p /opt/conda && \
export PATH=/opt/conda/bin:$PATH && \
conda config --set always_yes yes --set changeps1 no && \
conda info -a && \
conda config --add channels conda-forge && \
conda install -y --quiet --file /conda_packages.txt && \
pip install -q -r /pip_packages.txt && \
# Cleanup
rm -v miniconda.sh anaconda.sha256 && \
# Cleanup based on https://github.com/ContinuumIO/docker-images/commit/cac3352bf21a26fa0b97925b578fb24a0fe8c383
find /opt/conda/ -follow -type f -name '*.a' -delete && \
find /opt/conda/ -follow -type f -name '*.js.map' -delete && \
conda clean -ay
# Allow to modify conda packages. This allows malicious code to be injected into other interpreter sessions, therefore it is disabled by default
# chmod -R ug+rwX /opt/conda
ENV PATH /opt/conda/bin:$PATH
micromamba create -y -p /opt/conda -f env_python_3_with_R.yml && \
micromamba clean -ay

ENV PATH=/opt/conda/bin:$PATH \
SPARK_HOME=/opt/conda/lib/python3.9/site-packages/pyspark

# Allow to modify conda packages. This allows malicious code to be injected into other interpreter sessions, therefore it is disabled by default
# chmod -R ug+rwX /opt/conda

RUN mkdir -p "${ZEPPELIN_HOME}/logs" "${ZEPPELIN_HOME}/run" "${ZEPPELIN_HOME}/local-repo" && \
# Allow process to edit /etc/passwd, to create a user entry for zeppelin
Expand Down
22 changes: 0 additions & 22 deletions scripts/docker/zeppelin-interpreter/conda_packages.txt

This file was deleted.

5 changes: 5 additions & 0 deletions scripts/docker/zeppelin-interpreter/condarc
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Example to use an proxy for conda
channel_alias: https://leandi.avm.de/repository/anaconda-proxy
default_channels:
- https://proxy.mycompany.com/repository/anaconda-proxy/main
- https://proxy.mycompany.com/repository/anaconda-proxy/r
38 changes: 38 additions & 0 deletions scripts/docker/zeppelin-interpreter/env_python_3_with_R.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
name: python_3_with_R
channels:
- conda-forge
- defaults
dependencies:
- python >=3.9,<3.10
- pyspark=3.3.2
- pycodestyle
- scipy
- numpy
- grpcio
- protobuf
- pandasql
- ipython
- ipykernel
- jupyter_client
- hvplot
- plotnine
- seaborn
- intake
- intake-parquet
- intake-xarray
- altair
- vega_datasets
- plotly
- pip
- pip:
# works for regular pip packages
- bkzep==0.6.1
- r-base=3
- r-data.table
- r-evaluate
- r-base64enc
- r-knitr
- r-ggplot2
- r-irkernel
- r-shiny
- r-googlevis
1 change: 0 additions & 1 deletion scripts/docker/zeppelin-interpreter/pip_packages.txt

This file was deleted.

6 changes: 3 additions & 3 deletions scripts/docker/zeppelin-server/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,19 @@ FROM $ZEPPELIN_DISTRIBUTION_IMAGE AS zeppelin-distribution

# Prepare all interpreter settings for Zeppelin server
# This steps are not needed, if you you add only specific interpreters settings to your image
FROM alpine:3.11 AS interpreter-settings
FROM alpine:3.13 AS interpreter-settings
COPY --from=zeppelin-distribution /opt/zeppelin/interpreter /tmp/interpreter
RUN mkdir -p /opt/zeppelin/interpreter && \
cd /tmp/interpreter && \
find . -name 'interpreter-setting.json' -exec cp --parents \{\} /opt/zeppelin/interpreter \;

FROM ubuntu:20.04
FROM ubuntu:22.04
LABEL maintainer="Apache Software Foundation <dev@zeppelin.apache.org>"

RUN set -ex && \
apt-get -y update && \
# Install language and other base packages
DEBIAN_FRONTEND=noninteractive apt-get install -y language-pack-en openjdk-8-jre-headless tini wget && \
DEBIAN_FRONTEND=noninteractive apt-get install -y language-pack-en openjdk-11-jre-headless tini wget && \
# Cleanup
rm -rf /var/lib/apt/lists/* && \
apt-get autoclean && \
Expand Down