Skip to content

[ZEPPELIN-6113] Write a Dockerfile for mongodb interpreter image build #3959

[ZEPPELIN-6113] Write a Dockerfile for mongodb interpreter image build

[ZEPPELIN-6113] Write a Dockerfile for mongodb interpreter image build #3959

Workflow file for this run

name: core
on:
push:
branches-ignore:
- 'dependabot/**'
pull_request:
branches:
- master
- 'branch-*'
env:
# Disable keepAlive and pool
# https://github.com/actions/virtual-environments/issues/1499#issuecomment-689467080
MAVEN_OPTS: >-
-Xms1024M -Xmx2048M -XX:MaxMetaspaceSize=1024m -XX:-UseGCOverheadLimit
-Dhttp.keepAlive=false
-Dmaven.wagon.http.pool=false
-Dmaven.wagon.http.retryHandler.count=3
MAVEN_ARGS: >-
-B --no-transfer-progress
ZEPPELIN_HELIUM_REGISTRY: helium
SPARK_PRINT_LAUNCH_COMMAND: "true"
SPARK_LOCAL_IP: 127.0.0.1
ZEPPELIN_LOCAL_IP: 127.0.0.1
# Use the bash login, because we are using miniconda
defaults:
run:
shell: bash -l {0}
permissions:
contents: read # to fetch code (actions/checkout)
jobs:
# test on core-modules (zeppelin-interpreter,zeppelin-zengine,zeppelin-server),
# some interpreters are included, because zeppelin-server test depends on them: spark, shell & markdown
core-modules:
runs-on: ubuntu-20.04
strategy:
fail-fast: false
matrix:
hadoop: [hadoop3]
java: [ 8, 11 ]
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Tune Runner VM
uses: ./.github/actions/tune-runner-vm
- name: Set up JDK ${{ matrix.java }}
uses: actions/setup-java@v4
with:
distribution: 'temurin'
java-version: ${{ matrix.java }}
- name: Cache local Maven repository
uses: actions/cache@v4
with:
path: |
~/.m2/repository
!~/.m2/repository/org/apache/zeppelin/
~/.spark-dist
~/.cache
~/conda_pkgs_dir
key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-zeppelin-
- name: install application with some interpreter
run: ./mvnw install -Pbuild-distr -DskipTests -pl zeppelin-server,zeppelin-web,spark-submit,spark/scala-2.12,spark/scala-2.13,markdown,angular,shell -am -Pweb-classic -Phelium-dev -Pexamples -P${{ matrix.hadoop }} ${MAVEN_ARGS}
- name: install and test plugins
run: ./mvnw package -pl zeppelin-plugins -amd ${MAVEN_ARGS}
- name: Setup conda environment with python 3.9 and R
uses: conda-incubator/setup-miniconda@v3
with:
activate-environment: python_3_with_R
environment-file: testing/env_python_3.9_with_R.yml
python-version: 3.9
miniforge-variant: Mambaforge
channels: conda-forge,defaults
channel-priority: true
auto-activate-base: false
use-mamba: true
- name: Make IRkernel available to Jupyter
run: |
R -e "IRkernel::installspec()"
conda list
conda info
- name: run tests with ${{ matrix.hadoop }} # skip spark test because we would run them in other CI
run: ./mvnw verify -Pusing-packaged-distr -pl zeppelin-server,zeppelin-web,spark-submit,spark/scala-2.12,spark/scala-2.13,markdown,angular,shell -am -Pweb-classic -Phelium-dev -Pexamples -P${{ matrix.hadoop }} -Dtests.to.exclude=**/org/apache/zeppelin/spark/* -DfailIfNoTests=false
# test interpreter modules except spark, flink, python, rlang, jupyter
interpreter-test-non-core:
runs-on: ubuntu-20.04
strategy:
fail-fast: false
matrix:
java: [ 8, 11 ]
env:
INTERPRETERS: 'hbase,jdbc,file,flink-cmd,cassandra,elasticsearch,bigquery,alluxio,livy,groovy,java,neo4j,sparql,mongodb,influxdb,shell'
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Tune Runner VM
uses: ./.github/actions/tune-runner-vm
- name: Set up JDK ${{ matrix.java }}
uses: actions/setup-java@v4
with:
distribution: 'temurin'
java-version: ${{ matrix.java }}
- name: Cache local Maven repository
uses: actions/cache@v4
with:
path: |
~/.m2/repository
!~/.m2/repository/org/apache/zeppelin/
~/.spark-dist
~/.cache
key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-zeppelin-
- name: install environment
run: ./mvnw install -DskipTests -am -pl ${INTERPRETERS} ${MAVEN_ARGS}
- name: Setup conda environment with python 3.9 and R
uses: conda-incubator/setup-miniconda@v3
with:
activate-environment: python_3_with_R_and_tensorflow
environment-file: testing/env_python_3_with_R_and_tensorflow.yml
python-version: 3.9
miniforge-variant: Mambaforge
channels: conda-forge,defaults
channel-priority: true
auto-activate-base: false
use-mamba: true
- name: verify interpreter
run: ./mvnw verify -am -pl ${INTERPRETERS} ${MAVEN_ARGS}
# test interpreter modules for jupyter, python, rlang
interpreter-test-jupyter-python-rlang:
runs-on: ubuntu-20.04
strategy:
fail-fast: false
matrix:
python: [ 3.9 ]
java: [ 8, 11 ]
include:
- python: 3.7
java: 8
- python: 3.8
java: 8
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Tune Runner VM
uses: ./.github/actions/tune-runner-vm
- name: Set up JDK ${{ matrix.java }}
uses: actions/setup-java@v4
with:
distribution: 'temurin'
java-version: ${{ matrix.java }}
- name: Cache local Maven repository
uses: actions/cache@v4
with:
path: |
~/.m2/repository
!~/.m2/repository/org/apache/zeppelin/
~/.spark-dist
~/.cache
key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-zeppelin-
- name: Setup conda environment with python ${{ matrix.python }} and R
uses: conda-incubator/setup-miniconda@v3
with:
activate-environment: python_3_with_R
environment-file: testing/env_python_${{ matrix.python }}_with_R.yml
python-version: ${{ matrix.python }}
miniforge-variant: Mambaforge
channels: conda-forge,defaults
channel-priority: true
auto-activate-base: false
use-mamba: true
- name: Make IRkernel available to Jupyter
run: |
R -e "IRkernel::installspec()"
- name: install environment
run: |
./mvnw install -DskipTests -pl python,rlang,zeppelin-jupyter-interpreter -am -Phadoop3 ${MAVEN_ARGS}
- name: run tests with ${{ matrix.python }}
run: |
./mvnw test -pl python,rlang,zeppelin-jupyter-interpreter -DfailIfNoTests=false ${MAVEN_ARGS}
# zeppelin integration test except Spark & Flink
zeppelin-integration-test:
runs-on: ubuntu-20.04
strategy:
fail-fast: false
matrix:
java: [ 8, 11 ]
steps:
# user/password => root/root
- name: Start mysql
run: sudo systemctl start mysql.service
- name: Checkout
uses: actions/checkout@v4
- name: Tune Runner VM
uses: ./.github/actions/tune-runner-vm
- name: Set up JDK ${{ matrix.java }}
uses: actions/setup-java@v4
with:
distribution: 'temurin'
java-version: ${{ matrix.java }}
- name: Cache local Maven repository
uses: actions/cache@v4
with:
path: |
~/.m2/repository
!~/.m2/repository/org/apache/zeppelin/
~/.spark-dist
~/.cache
key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-zeppelin-
- name: install environment
run: |
./mvnw install -DskipTests -Phadoop3 -Pintegration -pl zeppelin-interpreter-integration,zeppelin-web,spark-submit,spark/scala-2.12,spark/scala-2.13,markdown,flink-cmd,flink/flink-scala-2.12,jdbc,shell -am -Pweb-classic -Pflink-117 ${MAVEN_ARGS}
./mvnw package -pl zeppelin-plugins -amd -DskipTests ${MAVEN_ARGS}
- name: Setup conda environment with python 3.9 and R
uses: conda-incubator/setup-miniconda@v3
with:
activate-environment: python_3_with_R
environment-file: testing/env_python_3_with_R.yml
python-version: 3.9
miniforge-variant: Mambaforge
channels: conda-forge,defaults
channel-priority: true
auto-activate-base: false
use-mamba: true
- name: Make IRkernel available to Jupyter
run: |
R -e "IRkernel::installspec()"
- name: run tests
run: ./mvnw test -pl zeppelin-interpreter-integration -Phadoop3 -Pintegration -DfailIfNoTests=false -Dtest=ZeppelinClientIntegrationTest,ZeppelinClientWithAuthIntegrationTest,ZSessionIntegrationTest,ShellIntegrationTest,JdbcIntegrationTest
- name: Print zeppelin logs
if: always()
run: if [ -d "logs" ]; then cat logs/*; fi
flink-test-and-flink-integration-test:
runs-on: ubuntu-20.04
strategy:
fail-fast: false
matrix:
python: [ 3.9 ]
flink: [116, 117]
include:
# Flink 1.15 supports Python 3.6, 3.7, and 3.8
# https://nightlies.apache.org/flink/flink-docs-release-1.15/docs/dev/python/installation/
- python: 3.8
flink: 115
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Tune Runner VM
uses: ./.github/actions/tune-runner-vm
- name: Set up JDK 8
uses: actions/setup-java@v4
with:
distribution: 'temurin'
java-version: 8
- name: Cache local Maven repository
uses: actions/cache@v4
with:
path: |
~/.m2/repository
!~/.m2/repository/org/apache/zeppelin/
~/.spark-dist
~/.cache
key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-zeppelin-
- name: install environment for flink
run: |
./mvnw install -DskipTests -am -pl flink/flink-scala-2.12,flink-cmd,zeppelin-interpreter-integration -Pflink-${{ matrix.flink }} -Phadoop3 -Pintegration ${MAVEN_ARGS}
./mvnw clean package -pl zeppelin-plugins -amd -DskipTests ${MAVEN_ARGS}
- name: Setup conda environment with python ${{ matrix.python }} and R
uses: conda-incubator/setup-miniconda@v3
with:
activate-environment: python_3_with_flink
environment-file: testing/env_python_3_with_flink_${{ matrix.flink }}.yml
python-version: ${{ matrix.python }}
miniforge-variant: Mambaforge
channels: conda-forge,defaults
channel-priority: true
auto-activate-base: false
use-mamba: true
- name: run tests for flink
run: ./mvnw verify -pl flink/flink-scala-2.12,flink-cmd,zeppelin-interpreter-integration -Pflink-${{ matrix.flink }} -am -Phadoop3 -Pintegration -DfailIfNoTests=false -Dtest=org.apache.zeppelin.flink.*Test,FlinkIntegrationTest${{ matrix.flink }} ${MAVEN_ARGS}
- name: Print zeppelin logs
if: always()
run: if [ -d "logs" ]; then cat logs/*; fi
spark-integration-test:
runs-on: ubuntu-20.04
strategy:
fail-fast: false
matrix:
hadoop: [ 3 ]
java: [ 8, 11 ]
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Tune Runner VM
uses: ./.github/actions/tune-runner-vm
- name: Set up JDK ${{ matrix.java }}
uses: actions/setup-java@v4
with:
distribution: 'temurin'
java-version: ${{ matrix.java }}
- name: Cache local Maven repository
uses: actions/cache@v4
with:
path: |
~/.m2/repository
!~/.m2/repository/org/apache/zeppelin/
~/.spark-dist
~/.cache
key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-zeppelin-
- name: install environment
run: |
./mvnw install -DskipTests -pl zeppelin-interpreter-integration,zeppelin-web,spark-submit,spark/scala-2.12,spark/scala-2.13,markdown -am -Pweb-classic -Phadoop3 -Pintegration ${MAVEN_ARGS}
./mvnw clean package -pl zeppelin-plugins -amd -DskipTests ${MAVEN_ARGS}
- name: Setup conda environment with python 3.9 and R
uses: conda-incubator/setup-miniconda@v3
with:
activate-environment: python_3_with_R
environment-file: testing/env_python_3_with_R.yml
python-version: 3.9
miniforge-variant: Mambaforge
channels: conda-forge,defaults
channel-priority: true
auto-activate-base: false
use-mamba: true
- name: Make IRkernel available to Jupyter
run: |
R -e "IRkernel::installspec()"
- name: run tests on hadoop${{ matrix.hadoop }}
run: ./mvnw test -pl zeppelin-interpreter-integration -Phadoop${{ matrix.hadoop }} -Pintegration -Dtest=SparkSubmitIntegrationTest,ZeppelinSparkClusterTest32,SparkIntegrationTest32,ZeppelinSparkClusterTest33,SparkIntegrationTest33 -DfailIfNoTests=false ${MAVEN_ARGS}
# test on spark for each spark version & scala version
spark-test:
runs-on: ubuntu-20.04
strategy:
fail-fast: false
matrix:
python: [ 3.9 ]
java: [ 8, 11 ]
include:
- python: 3.7
java: 8
- python: 3.8
java: 8
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Tune Runner VM
uses: ./.github/actions/tune-runner-vm
- name: Set up JDK ${{ matrix.java }}
uses: actions/setup-java@v4
with:
distribution: 'temurin'
java-version: ${{ matrix.java }}
- name: Cache local Maven repository
uses: actions/cache@v4
with:
path: |
~/.m2/repository
!~/.m2/repository/org/apache/zeppelin/
~/.spark-dist
~/.cache
key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-zeppelin-
- name: install environment
run: ./mvnw install -DskipTests -pl spark-submit,spark/scala-2.12,spark/scala-2.13 -am -Phadoop3 ${MAVEN_ARGS}
- name: Setup conda environment with python ${{ matrix.python }} and R
uses: conda-incubator/setup-miniconda@v3
with:
activate-environment: python_3_with_R
environment-file: testing/env_python_${{ matrix.python }}_with_R.yml
python-version: ${{ matrix.python }}
miniforge-variant: Mambaforge
channels: conda-forge,defaults
channel-priority: true
auto-activate-base: false
use-mamba: true
- name: Make IRkernel available to Jupyter
run: |
R -e "IRkernel::installspec()"
- name: run spark-3.3 tests with scala-2.12 and python-${{ matrix.python }}
run: |
rm -rf spark/interpreter/metastore_db
./mvnw verify -pl spark-submit,spark/interpreter -am -Dtest=org/apache/zeppelin/spark/* -Pspark-3.3 -Pspark-scala-2.12 -Phadoop3 -Pintegration -DfailIfNoTests=false ${MAVEN_ARGS}
- name: run spark-3.3 tests with scala-2.13 and python-${{ matrix.python }}
run: |
rm -rf spark/interpreter/metastore_db
./mvnw verify -pl spark-submit,spark/interpreter -am -Dtest=org/apache/zeppelin/spark/* -Pspark-3.3 -Pspark-scala-2.13 -Phadoop3 -Pintegration -DfailIfNoTests=false ${MAVEN_ARGS}
- name: run spark-3.4 tests with scala-2.13 and python-${{ matrix.python }}
run: |
rm -rf spark/interpreter/metastore_db
./mvnw verify -pl spark-submit,spark/interpreter -am -Dtest=org/apache/zeppelin/spark/* -Pspark-3.4 -Pspark-scala-2.13 -Phadoop3 -Pintegration -DfailIfNoTests=false ${MAVEN_ARGS}
- name: run spark-3.5 tests with scala-2.13 and python-${{ matrix.python }}
if: matrix.python >= '3.8'
run: |
rm -rf spark/interpreter/metastore_db
./mvnw verify -pl spark-submit,spark/interpreter -am -Dtest=org/apache/zeppelin/spark/* -Pspark-3.5 -Pspark-scala-2.13 -Phadoop3 -Pintegration -DfailIfNoTests=false ${MAVEN_ARGS}
# The version combination is based on the facts:
# 1. official Livy 0.8 binary tarball is built against Spark 2.4
# 2. official Spark 2.4 binary tarball is built against Scala 2.11
# 3. Spark 2.4 support Python 2.7, 3.4 to 3.7
livy-0-8-with-spark-2-4-under-python37:
runs-on: ubuntu-20.04
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Tune Runner VM
uses: ./.github/actions/tune-runner-vm
- name: Set up JDK 8
uses: actions/setup-java@v4
with:
distribution: 'temurin'
java-version: 8
- name: Cache local Maven repository
uses: actions/cache@v4
with:
path: |
~/.m2/repository
!~/.m2/repository/org/apache/zeppelin/
~/.spark-dist
~/.cache
key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-zeppelin-
- name: install environment
run: |
./mvnw install -DskipTests -pl livy -am ${MAVEN_ARGS}
./testing/downloadSpark.sh "2.4.8" "2.7"
./testing/downloadLivy.sh "0.8.0-incubating" "2.11"
- name: Setup conda environment with python 3.7 and R
uses: conda-incubator/setup-miniconda@v3
with:
activate-environment: python_37_with_R
environment-file: testing/env_python_3.7_with_R.yml
python-version: 3.7
miniforge-variant: Mambaforge
channels: conda-forge,defaults
channel-priority: true
auto-activate-base: false
use-mamba: true
- name: Make IRkernel available to Jupyter
run: |
R -e "IRkernel::installspec()"
- name: run tests
run: |
export SPARK_HOME=$PWD/spark-2.4.8-bin-hadoop2.7
export LIVY_HOME=$PWD/apache-livy-0.8.0-incubating_2.11-bin
./mvnw verify -pl livy -am ${MAVEN_ARGS}
default-build:
runs-on: ubuntu-20.04
strategy:
fail-fast: false
matrix:
java: [ 8, 11 ]
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Tune Runner VM
uses: ./.github/actions/tune-runner-vm
- name: Set up JDK ${{ matrix.java }}
uses: actions/setup-java@v4
with:
distribution: 'temurin'
java-version: ${{ matrix.java }}
- name: Cache local Maven repository
uses: actions/cache@v4
with:
path: |
~/.m2/repository
!~/.m2/repository/org/apache/zeppelin/
~/.spark-dist
~/.cache
key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-zeppelin-
- name: build without any profiles
run: ./mvnw clean verify -DskipTests ${MAVEN_ARGS}