diff --git a/Dockerfile b/Dockerfile index 5d78aff9a5b..5d97d7dcc0f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,14 +14,14 @@ # See the License for the specific language governing permissions and # limitations under the License. # -FROM openjdk:8 as builder +FROM openjdk:11 as builder ADD . /workspace/zeppelin WORKDIR /workspace/zeppelin ENV MAVEN_OPTS="-Xms1024M -Xmx2048M -XX:MaxMetaspaceSize=1024m -XX:-UseGCOverheadLimit -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn" # Allow npm and bower to run with root privileges RUN echo "unsafe-perm=true" > ~/.npmrc && \ echo '{ "allow_root": true }' > ~/.bowerrc && \ - ./mvnw -B package -DskipTests -Pbuild-distr -Pspark-3.2 -Pinclude-hadoop -Phadoop3 -Pspark-scala-2.12 -Pweb-angular -Pweb-dist && \ + ./mvnw -B package -DskipTests -Pbuild-distr -Pspark-3.3 -Pinclude-hadoop -Phadoop3 -Pspark-scala-2.12 -Pweb-angular -Pweb-dist && \ # Example with doesn't compile all interpreters # ./mvnw -B package -DskipTests -Pbuild-distr -Pspark-3.2 -Pinclude-hadoop -Phadoop3 -Pspark-scala-2.12 -Pweb-angular -Pweb-dist -pl '!groovy,!submarine,!livy,!hbase,!file,!flink' && \ mv /workspace/zeppelin/zeppelin-distribution/target/zeppelin-*/zeppelin-* /opt/zeppelin/ && \ @@ -29,5 +29,5 @@ RUN echo "unsafe-perm=true" > ~/.npmrc && \ rm -rf ~/.m2 && \ rm -rf /workspace/zeppelin/* -FROM ubuntu:20.04 +FROM ubuntu:22.04 COPY --from=builder /opt/zeppelin /opt/zeppelin diff --git a/scripts/docker/zeppelin-interpreter/Dockerfile b/scripts/docker/zeppelin-interpreter/Dockerfile index 8779982acb7..453411bb452 100644 --- a/scripts/docker/zeppelin-interpreter/Dockerfile +++ b/scripts/docker/zeppelin-interpreter/Dockerfile @@ -16,7 +16,7 @@ ARG ZEPPELIN_DISTRIBUTION_IMAGE=zeppelin-distribution:latest FROM $ZEPPELIN_DISTRIBUTION_IMAGE AS zeppelin-distribution -FROM ubuntu:20.04 +FROM ubuntu:22.04 LABEL maintainer="Apache Software Foundation " @@ -25,13 +25,16 @@ ARG version="0.10.0" ENV VERSION="${version}" \ ZEPPELIN_HOME="/opt/zeppelin" +# Install Java for zeppelin interpreter +# Install micromamba to install a python environment via conda RUN set -ex && \ - apt-get -y update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y openjdk-8-jre-headless wget tini && \ + /usr/bin/apt-get update && \ + DEBIAN_FRONTEND=noninteractive /usr/bin/apt-get install -y openjdk-11-jre-headless wget tini bzip2 && \ + /usr/bin/wget -qO- https://micromamba.snakepit.net/api/micromamba/linux-64/latest | tar -xvj bin/micromamba && \ # Cleanup - rm -rf /var/lib/apt/lists/* && \ - apt-get autoclean && \ - apt-get clean + /usr/bin/apt-get clean && \ + /bin/rm -rf /var/lib/apt/lists/* + COPY --from=zeppelin-distribution /opt/zeppelin/bin ${ZEPPELIN_HOME}/bin COPY log4j.properties ${ZEPPELIN_HOME}/conf/ @@ -46,33 +49,20 @@ COPY --from=zeppelin-distribution /opt/zeppelin/interpreter ${ZEPPELIN_HOME}/int ### COPY --from=zeppelin-distribution /opt/zeppelin/interpreter/${interpreter_name} ${ZEPPELIN_HOME}/interpreter/${interpreter_name} -# Decide: Install conda to manage python and R packages. Maybe adjust the packages in pip_packages.txt or conda_packages.txt -ARG miniconda_version="py38_4.8.3" -ARG miniconda_sha256="879457af6a0bf5b34b48c12de31d4df0ee2f06a8e68768e5758c3293b2daf688" +# Decide: Install conda to manage python and R packages. Maybe adjust the packages env_python_3_with_R # Install python and R packages via conda -COPY conda_packages.txt /conda_packages.txt -# Some python packages are not available via conda, so we are using pip -COPY pip_packages.txt /pip_packages.txt +COPY env_python_3_with_R.yml /env_python_3_with_R.yml +# To improve the build time, the Zeppelin team recommends a conda proxy +# COPY condarc /etc/conda/condarc RUN set -ex && \ - wget -nv https://repo.anaconda.com/miniconda/Miniconda3-${miniconda_version}-Linux-x86_64.sh -O miniconda.sh && \ - echo "${miniconda_sha256} miniconda.sh" > anaconda.sha256 && \ - sha256sum --strict -c anaconda.sha256 && \ - bash miniconda.sh -b -p /opt/conda && \ - export PATH=/opt/conda/bin:$PATH && \ - conda config --set always_yes yes --set changeps1 no && \ - conda info -a && \ - conda config --add channels conda-forge && \ - conda install -y --quiet --file /conda_packages.txt && \ - pip install -q -r /pip_packages.txt && \ - # Cleanup - rm -v miniconda.sh anaconda.sha256 && \ - # Cleanup based on https://github.com/ContinuumIO/docker-images/commit/cac3352bf21a26fa0b97925b578fb24a0fe8c383 - find /opt/conda/ -follow -type f -name '*.a' -delete && \ - find /opt/conda/ -follow -type f -name '*.js.map' -delete && \ - conda clean -ay - # Allow to modify conda packages. This allows malicious code to be injected into other interpreter sessions, therefore it is disabled by default - # chmod -R ug+rwX /opt/conda -ENV PATH /opt/conda/bin:$PATH + micromamba create -y -p /opt/conda -f env_python_3_with_R.yml && \ + micromamba clean -ay + +ENV PATH=/opt/conda/bin:$PATH \ + SPARK_HOME=/opt/conda/lib/python3.9/site-packages/pyspark + +# Allow to modify conda packages. This allows malicious code to be injected into other interpreter sessions, therefore it is disabled by default +# chmod -R ug+rwX /opt/conda RUN mkdir -p "${ZEPPELIN_HOME}/logs" "${ZEPPELIN_HOME}/run" "${ZEPPELIN_HOME}/local-repo" && \ # Allow process to edit /etc/passwd, to create a user entry for zeppelin diff --git a/scripts/docker/zeppelin-interpreter/conda_packages.txt b/scripts/docker/zeppelin-interpreter/conda_packages.txt deleted file mode 100644 index 3be8519342a..00000000000 --- a/scripts/docker/zeppelin-interpreter/conda_packages.txt +++ /dev/null @@ -1,22 +0,0 @@ -# python packages -pycodestyle -numpy -pandas -scipy -grpcio -hvplot -protobuf -pandasql -ipython -matplotlib -ipykernel -jupyter_client -bokeh - -# R packages -r-evaluate -r-base64enc -r-knitr -r-ggplot2 -r-shiny -r-googlevis diff --git a/scripts/docker/zeppelin-interpreter/condarc b/scripts/docker/zeppelin-interpreter/condarc new file mode 100644 index 00000000000..6992fdf59e6 --- /dev/null +++ b/scripts/docker/zeppelin-interpreter/condarc @@ -0,0 +1,5 @@ +# Example to use an proxy for conda +channel_alias: https://leandi.avm.de/repository/anaconda-proxy +default_channels: + - https://proxy.mycompany.com/repository/anaconda-proxy/main + - https://proxy.mycompany.com/repository/anaconda-proxy/r \ No newline at end of file diff --git a/scripts/docker/zeppelin-interpreter/env_python_3_with_R.yml b/scripts/docker/zeppelin-interpreter/env_python_3_with_R.yml new file mode 100644 index 00000000000..09ed9a39013 --- /dev/null +++ b/scripts/docker/zeppelin-interpreter/env_python_3_with_R.yml @@ -0,0 +1,38 @@ +name: python_3_with_R +channels: + - conda-forge + - defaults +dependencies: + - python >=3.9,<3.10 + - pyspark=3.3.2 + - pycodestyle + - scipy + - numpy + - grpcio + - protobuf + - pandasql + - ipython + - ipykernel + - jupyter_client + - hvplot + - plotnine + - seaborn + - intake + - intake-parquet + - intake-xarray + - altair + - vega_datasets + - plotly + - pip + - pip: + # works for regular pip packages + - bkzep==0.6.1 + - r-base=3 + - r-data.table + - r-evaluate + - r-base64enc + - r-knitr + - r-ggplot2 + - r-irkernel + - r-shiny + - r-googlevis diff --git a/scripts/docker/zeppelin-interpreter/pip_packages.txt b/scripts/docker/zeppelin-interpreter/pip_packages.txt deleted file mode 100644 index 9123189b05e..00000000000 --- a/scripts/docker/zeppelin-interpreter/pip_packages.txt +++ /dev/null @@ -1 +0,0 @@ -bkzep==0.6.1 \ No newline at end of file diff --git a/scripts/docker/zeppelin-server/Dockerfile b/scripts/docker/zeppelin-server/Dockerfile index 1e1c9c374e9..6a560620675 100644 --- a/scripts/docker/zeppelin-server/Dockerfile +++ b/scripts/docker/zeppelin-server/Dockerfile @@ -18,19 +18,19 @@ FROM $ZEPPELIN_DISTRIBUTION_IMAGE AS zeppelin-distribution # Prepare all interpreter settings for Zeppelin server # This steps are not needed, if you you add only specific interpreters settings to your image -FROM alpine:3.11 AS interpreter-settings +FROM alpine:3.13 AS interpreter-settings COPY --from=zeppelin-distribution /opt/zeppelin/interpreter /tmp/interpreter RUN mkdir -p /opt/zeppelin/interpreter && \ cd /tmp/interpreter && \ find . -name 'interpreter-setting.json' -exec cp --parents \{\} /opt/zeppelin/interpreter \; -FROM ubuntu:20.04 +FROM ubuntu:22.04 LABEL maintainer="Apache Software Foundation " RUN set -ex && \ apt-get -y update && \ # Install language and other base packages - DEBIAN_FRONTEND=noninteractive apt-get install -y language-pack-en openjdk-8-jre-headless tini wget && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y language-pack-en openjdk-11-jre-headless tini wget && \ # Cleanup rm -rf /var/lib/apt/lists/* && \ apt-get autoclean && \