Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
142 changes: 129 additions & 13 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,15 @@ on:
jobs:
main:
runs-on: ubuntu-latest
# Due to the multi-platform images cannot be exported with the `docker` export type,
# https://github.com/docker/buildx/issues/59
# So, the local registry (push) is used here rather than local build (load):
# https://github.com/docker/build-push-action/blob/master/docs/advanced/local-registry.md
services:
registry:
image: registry:2
ports:
- 5000:5000
strategy:
matrix:
spark_version:
Expand All @@ -55,50 +64,157 @@ jobs:
uses: actions/checkout@v2

- name: Set up QEMU
uses: docker/setup-qemu-action@v1
uses: docker/setup-qemu-action@v2

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1

- name: Login to GHCR
uses: docker/login-action@v2
uses: docker/setup-buildx-action@v2
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
# This required by local registry
driver-opts: network=host

- name: Generate tags
run: |
TAG=scala${{ matrix.scala_version }}-java${{ matrix.java_version }}-${{ matrix.image_suffix }}

REPO_OWNER=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]')
TEST_REPO=ghcr.io/$REPO_OWNER/spark-docker
TEST_REPO=localhost:5000/$REPO_OWNER/spark-docker
IMAGE_NAME=spark
IMAGE_PATH=${{ matrix.spark_version }}/$TAG
UNIQUE_IMAGE_TAG=${{ matrix.spark_version }}-$TAG
IMAGE_URL=$TEST_REPO/$IMAGE_NAME:$UNIQUE_IMAGE_TAG

# Unique image tag in each version: scala2.12-java11-python3-ubuntu
# Unique image tag in each version: 3.3.0-scala2.12-java11-python3-ubuntu
echo "UNIQUE_IMAGE_TAG=${UNIQUE_IMAGE_TAG}" >> $GITHUB_ENV
# Test repo: ghcr.io/apache/spark-docker
echo "TEST_REPO=${TEST_REPO}" >> $GITHUB_ENV
# Image name: spark
echo "IMAGE_NAME=${IMAGE_NAME}" >> $GITHUB_ENV
# Image dockerfile path: 3.3.0/scala2.12-java11-python3-ubuntu
echo "IMAGE_PATH=${IMAGE_PATH}" >> $GITHUB_ENV
# Image URL: ghcr.io/apache/spark-docker/spark:3.3.0-scala2.12-java11-python3-ubuntu
echo "IMAGE_URL=${IMAGE_URL}" >> $GITHUB_ENV

- name: Print Image tags
run: |
echo "UNIQUE_IMAGE_TAG: "${UNIQUE_IMAGE_TAG}
echo "TEST_REPO: "${TEST_REPO}
echo "IMAGE_NAME: "${IMAGE_NAME}
echo "IMAGE_PATH: "${IMAGE_PATH}
echo "IMAGE_URL: "${IMAGE_URL}

- name: Build and push test image
uses: docker/build-push-action@v2
with:
context: ${{ env.IMAGE_PATH }}
tags: ${{ env.TEST_REPO }}/${{ env.IMAGE_NAME }}:${{ env.UNIQUE_IMAGE_TAG }}
tags: ${{ env.IMAGE_URL }}
platforms: linux/amd64,linux/arm64
push: true

- name: Test - Checkout Spark repository
uses: actions/checkout@v3
with:
fetch-depth: 0
repository: apache/spark
ref: v${{ matrix.spark_version }}
path: ${{ github.workspace }}/spark

- name: Test - Cherry pick commits
# Apache Spark enable resource limited k8s IT since v3.3.1, cherry-pick patches for old release
# https://github.com/apache/spark/pull/36087#issuecomment-1251756266
if: matrix.spark_version == '3.3.0'
working-directory: ${{ github.workspace }}/spark
run: |
# SPARK-38802: Add driverRequestCores/executorRequestCores supported
# https://github.com/apache/spark/commit/83963828b54bffe99527a004057272bc584cbc26
git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' cherry-pick 83963828b54bffe99527a004057272bc584cbc26
# SPARK-38803: Lower minio cpu to 250m
# https://github.com/apache/spark/commit/5ea2b386eb866e20540660cdb6ed43792cb29969
git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' cherry-pick 5ea2b386eb866e20540660cdb6ed43792cb29969

- name: Test - Install Java ${{ inputs.java }}
uses: actions/setup-java@v3
with:
# This is required after v2, now just keep same distribution with v1
# https://github.com/actions/setup-java/releases/tag/v2.0.0
distribution: 'zulu'
java-version: ${{ matrix.java_version }}

- name: Test - Cache Scala, SBT and Maven
uses: actions/cache@v3
with:
path: |
build/apache-maven-*
build/scala-*
build/*.jar
~/.sbt
key: build-${{ matrix.spark_version }}-scala${{ matrix.scala_version }}-java${{ matrix.java_version }}

- name: Test - Cache Coursier local repository
uses: actions/cache@v3
with:
path: ~/.cache/coursier
key: build-${{ matrix.spark_version }}-scala${{ matrix.scala_version }}-java${{ matrix.java_version }}-coursier

- name: Image digest
run: echo ${{ steps.docker_build.outputs.digest }}
- name: Test - Start minikube
run: |
# See more in "Installation" https://minikube.sigs.k8s.io/docs/start/
curl -LO https://storage.googleapis.com/minikube/releases/latest/minikube-linux-amd64
sudo install minikube-linux-amd64 /usr/local/bin/minikube
# Github Action limit cpu:2, memory: 6947MB, limit to 2U6G for better resource statistic
minikube start --cpus 2 --memory 6144

- name: Test - Print K8S pods and nodes info
run: |
kubectl get pods -A
kubectl describe node

- name: Test - Run Spark on K8S integration test (With driver cpu 0.5, executor cpu 0.2 limited)
working-directory: ${{ github.workspace }}/spark
run: |
kubectl create clusterrolebinding serviceaccounts-cluster-admin --clusterrole=cluster-admin --group=system:serviceaccounts || true
minikube image load ${{ env.IMAGE_URL }}

eval $(minikube docker-env)
OPTS="-Pkubernetes -Pkubernetes-integration-tests "
OPTS+="-Dspark.kubernetes.test.driverRequestCores=0.5 -Dspark.kubernetes.test.executorRequestCores=0.2 "
OPTS+="-Dspark.kubernetes.test.deployMode=minikube "
OPTS+="-Dspark.kubernetes.test.imageRepo=${TEST_REPO} -Dspark.kubernetes.test.imageTag=${UNIQUE_IMAGE_TAG} "
OPTS+="-Dspark.kubernetes.test.jvmImage=${IMAGE_NAME} "
OPTS+="-Dspark.kubernetes.test.pythonImage=${IMAGE_NAME} "
OPTS+="-Dspark.kubernetes.test.rImage=${IMAGE_NAME} "

if echo ${{ matrix.image_suffix }} | grep -q "python3-r-ubuntu"; then
# Prepare test jar for client tests
CONTAINER_TMP_NAME=spark-example-image
docker create -ti --name $CONTAINER_TMP_NAME ${{ env.IMAGE_URL }} bash
docker cp $CONTAINER_TMP_NAME:/opt/spark/examples/jars/spark-examples_${{ matrix.scala_version }}-${{ matrix.spark_version }}.jar .
docker rm -f $CONTAINER_TMP_NAME
# Prepare PV test
PVC_TMP_DIR=$(mktemp -d)
export PVC_TESTS_HOST_PATH=$PVC_TMP_DIR
export PVC_TESTS_VM_PATH=$PVC_TMP_DIR
minikube mount ${PVC_TESTS_HOST_PATH}:${PVC_TESTS_VM_PATH} --gid=0 --uid=185 &
# Run all K8s test for all in one image
build/sbt $OPTS 'kubernetes-integration-tests/testOnly'
else
# Run basic test for Scala/PySpark/SparkR image
build/sbt $OPTS 'kubernetes-integration-tests/testOnly -- -z "Run SparkPi"'

# Run basic test for PySpark image
if echo ${{ matrix.image_suffix }} | grep -q "python"; then
build/sbt $OPTS 'kubernetes-integration-tests/testOnly -- -z "Run PySpark"'
fi

# Run basic test for SparkR image
if echo ${{ matrix.image_suffix }} | grep -q "r-"; then
OPTS+="-Psparkr -Dtest.include.tags=r "
build/sbt $OPTS 'kubernetes-integration-tests/testOnly'
fi
fi

- name: Test - Upload Spark on K8S integration tests log files
if: failure()
uses: actions/upload-artifact@v3
with:
name: spark-on-kubernetes-it-log
path: "**/target/integration-tests.log"