From 29bd37b29351ac163b1c8d2d4c5fb3b73f5c3116 Mon Sep 17 00:00:00 2001 From: Yikun Jiang Date: Wed, 12 Oct 2022 11:45:50 +0800 Subject: [PATCH 1/5] Add K8s test --- .github/workflows/main.yml | 109 +++++++++++++++++++++++++++++++++---- 1 file changed, 98 insertions(+), 11 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 7972703..8a49b64 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -50,36 +50,36 @@ jobs: java_version: - ${{ inputs.java }} image_suffix: [python3-ubuntu, ubuntu, r-ubuntu, python3-r-ubuntu] + services: + registry: + image: registry:2 + ports: + - 5000:5000 steps: - name: Checkout Spark repository uses: actions/checkout@v2 - name: Set up QEMU - uses: docker/setup-qemu-action@v1 + uses: docker/setup-qemu-action@v2 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v1 - - - name: Login to GHCR - uses: docker/login-action@v2 + uses: docker/setup-buildx-action@v2 with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} + driver-opts: network=host - name: Generate tags run: | TAG=scala${{ matrix.scala_version }}-java${{ matrix.java_version }}-${{ matrix.image_suffix }} REPO_OWNER=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]') - TEST_REPO=ghcr.io/$REPO_OWNER/spark-docker + TEST_REPO=localhost:5000/$REPO_OWNER/spark-docker IMAGE_NAME=spark IMAGE_PATH=${{ matrix.spark_version }}/$TAG UNIQUE_IMAGE_TAG=${{ matrix.spark_version }}-$TAG # Unique image tag in each version: scala2.12-java11-python3-ubuntu echo "UNIQUE_IMAGE_TAG=${UNIQUE_IMAGE_TAG}" >> $GITHUB_ENV - # Test repo: ghcr.io/apache/spark-docker + # Test repo: localhost:5000/apache/spark-docker echo "TEST_REPO=${TEST_REPO}" >> $GITHUB_ENV # Image name: spark echo "IMAGE_NAME=${IMAGE_NAME}" >> $GITHUB_ENV @@ -94,11 +94,98 @@ jobs: echo "IMAGE_PATH: "${IMAGE_PATH} - name: Build and push test image - uses: docker/build-push-action@v2 + uses: docker/build-push-action@v3 with: context: ${{ env.IMAGE_PATH }} tags: ${{ env.TEST_REPO }}/${{ env.IMAGE_NAME }}:${{ env.UNIQUE_IMAGE_TAG }} platforms: linux/amd64,linux/arm64 + push: true + + - name: Test - Checkout Spark repository + uses: actions/checkout@v2 + with: + fetch-depth: 0 + repository: apache/spark + ref: v${{ matrix.spark_version }} + path: ${{ github.workspace }}/spark + + - name: Test - Cherry pick commits + # Apache Spark enable resource limited k8s IT since v3.3.1, cherrpick patches for old release + # https://github.com/apache/spark/pull/36087#issuecomment-1251756266 + if: matrix.spark_version == '3.3.0' + working-directory: ${{ github.workspace }}/spark + run: | + # SPARK-38802: Add driverRequestCores/executorRequestCores supported + # https://github.com/apache/spark/commit/83963828b54bffe99527a004057272bc584cbc26 + git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' cherry-pick 83963828b54bffe99527a004057272bc584cbc26 + # SPARK-38803: Lower minio cpu to 250m + # https://github.com/apache/spark/commit/5ea2b386eb866e20540660cdb6ed43792cb29969 + git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' cherry-pick 5ea2b386eb866e20540660cdb6ed43792cb29969 + + - name: Test - Install Java ${{ inputs.java }} + uses: actions/setup-java@v1 + with: + java-version: ${{ matrix.java_version }} + + - name: Test - Cache Scala, SBT and Maven + uses: actions/cache@v2 + with: + path: | + build/apache-maven-* + build/scala-* + build/*.jar + ~/.sbt + key: build-${{ matrix.spark_version }}-scala${{ matrix.scala_version }}-java${{ matrix.java_version }} + + - name: Test - Cache Coursier local repository + uses: actions/cache@v2 + with: + path: ~/.cache/coursier + key: build-${{ matrix.spark_version }}-scala${{ matrix.scala_version }}-java${{ matrix.java_version }} + + - name: Test - Start minikube + run: | + # See more in "Installation" https://minikube.sigs.k8s.io/docs/start/ + curl -LO https://storage.googleapis.com/minikube/releases/latest/minikube-linux-amd64 + sudo install minikube-linux-amd64 /usr/local/bin/minikube + # Github Action limit cpu:2, memory: 6947MB, limit to 2U6G for better resource statistic + minikube start --cpus 2 --memory 6144 + + - name: Test - Print K8S pods and nodes info + run: | + kubectl get pods -A + kubectl describe node + + - name: Test - Run Spark on K8S integration test (With driver cpu 0.5, executor cpu 0.2 limited) + working-directory: ${{ github.workspace }}/spark + run: | + kubectl create clusterrolebinding serviceaccounts-cluster-admin --clusterrole=cluster-admin --group=system:serviceaccounts || true + minikube image load ${{ env.TEST_REPO }}/${{ env.IMAGE_NAME }}:${{ env.UNIQUE_IMAGE_TAG }} + eval $(minikube docker-env) + OPTS="-Pkubernetes -Pkubernetes-integration-tests " + OPTS+="-Dspark.kubernetes.test.driverRequestCores=0.5 -Dspark.kubernetes.test.executorRequestCores=0.2 " + OPTS+="-Dspark.kubernetes.test.deployMode=minikube " + OPTS+="-Dspark.kubernetes.test.imageRepo=${TEST_REPO} -Dspark.kubernetes.test.imageTag=${UNIQUE_IMAGE_TAG} " + OPTS+="-Dspark.kubernetes.test.jvmImage=${IMAGE_NAME} " + + build/sbt $OPTS 'kubernetes-integration-tests/testOnly -- -z "Run SparkPi"' + + if echo ${{ matrix.image_suffix }} | grep -q "python"; then + OPTS+="-Dspark.kubernetes.test.pythonImage=${IMAGE_NAME} " + build/sbt $OPTS 'kubernetes-integration-tests/testOnly -- -z "Run PySpark"' + fi + + if echo ${{ matrix.image_suffix }} | grep -q "r-"; then + OPTS+="-Psparkr -Dtest.include.tags=r -Dspark.kubernetes.test.rImage=${IMAGE_NAME} " + build/sbt $OPTS 'kubernetes-integration-tests/testOnly' + fi + + - name: Test - Upload Spark on K8S integration tests log files + if: failure() + uses: actions/upload-artifact@v2 + with: + name: spark-on-kubernetes-it-log + path: "**/target/integration-tests.log" - name: Image digest run: echo ${{ steps.docker_build.outputs.digest }} From fb057a1abfd2d3e9b9b1cf72f607564ac20abd18 Mon Sep 17 00:00:00 2001 From: Yikun Jiang Date: Wed, 12 Oct 2022 14:56:10 +0800 Subject: [PATCH 2/5] Use local build --- .github/workflows/main.yml | 54 +++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 8a49b64..86ef00b 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -50,36 +50,29 @@ jobs: java_version: - ${{ inputs.java }} image_suffix: [python3-ubuntu, ubuntu, r-ubuntu, python3-r-ubuntu] - services: - registry: - image: registry:2 - ports: - - 5000:5000 steps: - name: Checkout Spark repository uses: actions/checkout@v2 - name: Set up QEMU - uses: docker/setup-qemu-action@v2 + uses: docker/setup-qemu-action@v1 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 - with: - driver-opts: network=host + uses: docker/setup-buildx-action@v1 - name: Generate tags run: | TAG=scala${{ matrix.scala_version }}-java${{ matrix.java_version }}-${{ matrix.image_suffix }} REPO_OWNER=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]') - TEST_REPO=localhost:5000/$REPO_OWNER/spark-docker + TEST_REPO=ghcr.io/$REPO_OWNER/spark-docker IMAGE_NAME=spark IMAGE_PATH=${{ matrix.spark_version }}/$TAG UNIQUE_IMAGE_TAG=${{ matrix.spark_version }}-$TAG # Unique image tag in each version: scala2.12-java11-python3-ubuntu echo "UNIQUE_IMAGE_TAG=${UNIQUE_IMAGE_TAG}" >> $GITHUB_ENV - # Test repo: localhost:5000/apache/spark-docker + # Test repo: ghcr.io/apache/spark-docker echo "TEST_REPO=${TEST_REPO}" >> $GITHUB_ENV # Image name: spark echo "IMAGE_NAME=${IMAGE_NAME}" >> $GITHUB_ENV @@ -93,13 +86,12 @@ jobs: echo "IMAGE_NAME: "${IMAGE_NAME} echo "IMAGE_PATH: "${IMAGE_PATH} - - name: Build and push test image - uses: docker/build-push-action@v3 + - name: Build image + uses: docker/build-push-action@v2 with: context: ${{ env.IMAGE_PATH }} tags: ${{ env.TEST_REPO }}/${{ env.IMAGE_NAME }}:${{ env.UNIQUE_IMAGE_TAG }} platforms: linux/amd64,linux/arm64 - push: true - name: Test - Checkout Spark repository uses: actions/checkout@v2 @@ -167,17 +159,31 @@ jobs: OPTS+="-Dspark.kubernetes.test.deployMode=minikube " OPTS+="-Dspark.kubernetes.test.imageRepo=${TEST_REPO} -Dspark.kubernetes.test.imageTag=${UNIQUE_IMAGE_TAG} " OPTS+="-Dspark.kubernetes.test.jvmImage=${IMAGE_NAME} " - - build/sbt $OPTS 'kubernetes-integration-tests/testOnly -- -z "Run SparkPi"' - - if echo ${{ matrix.image_suffix }} | grep -q "python"; then - OPTS+="-Dspark.kubernetes.test.pythonImage=${IMAGE_NAME} " - build/sbt $OPTS 'kubernetes-integration-tests/testOnly -- -z "Run PySpark"' - fi - - if echo ${{ matrix.image_suffix }} | grep -q "r-"; then - OPTS+="-Psparkr -Dtest.include.tags=r -Dspark.kubernetes.test.rImage=${IMAGE_NAME} " + OPTS+="-Dspark.kubernetes.test.pythonImage=${IMAGE_NAME} " + OPTS+="-Dspark.kubernetes.test.rImage=${IMAGE_NAME} " + + if echo ${{ matrix.image_suffix }} | grep -q "python3-r-ubuntu"; then + # Prepare PV test + PVC_TMP_DIR=$(mktemp -d) + export PVC_TESTS_HOST_PATH=$PVC_TMP_DIR + export PVC_TESTS_VM_PATH=$PVC_TMP_DIR + minikube mount ${PVC_TESTS_HOST_PATH}:${PVC_TESTS_VM_PATH} --gid=0 --uid=185 & + # Run all K8s test for all in one image build/sbt $OPTS 'kubernetes-integration-tests/testOnly' + else + # Run basic test for Scala/PySpark/SparkR image + build/sbt $OPTS 'kubernetes-integration-tests/testOnly -- -z "Run SparkPi"' + + # Run basic test for PySpark image + if echo ${{ matrix.image_suffix }} | grep -q "python"; then + build/sbt $OPTS 'kubernetes-integration-tests/testOnly -- -z "Run PySpark"' + fi + + # Run basic test for SparkR image + if echo ${{ matrix.image_suffix }} | grep -q "r-"; then + OPTS+="-Psparkr -Dtest.include.tags=r " + build/sbt $OPTS 'kubernetes-integration-tests/testOnly' + fi fi - name: Test - Upload Spark on K8S integration tests log files From b559b18bb8d457998abf5a4d4d647525ca1ba2e4 Mon Sep 17 00:00:00 2001 From: Yikun Jiang Date: Thu, 13 Oct 2022 11:52:01 +0800 Subject: [PATCH 3/5] Fix deps test --- .github/workflows/main.yml | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 86ef00b..d65c289 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -69,8 +69,9 @@ jobs: IMAGE_NAME=spark IMAGE_PATH=${{ matrix.spark_version }}/$TAG UNIQUE_IMAGE_TAG=${{ matrix.spark_version }}-$TAG + IMAGE_URL=$TEST_REPO/$IMAGE_NAME:$UNIQUE_IMAGE_TAG - # Unique image tag in each version: scala2.12-java11-python3-ubuntu + # Unique image tag in each version: 3.3.0-scala2.12-java11-python3-ubuntu echo "UNIQUE_IMAGE_TAG=${UNIQUE_IMAGE_TAG}" >> $GITHUB_ENV # Test repo: ghcr.io/apache/spark-docker echo "TEST_REPO=${TEST_REPO}" >> $GITHUB_ENV @@ -78,6 +79,8 @@ jobs: echo "IMAGE_NAME=${IMAGE_NAME}" >> $GITHUB_ENV # Image dockerfile path: 3.3.0/scala2.12-java11-python3-ubuntu echo "IMAGE_PATH=${IMAGE_PATH}" >> $GITHUB_ENV + # Image URL: ghcr.io/apache/spark-docker/spark:3.3.0-scala2.12-java11-python3-ubuntu + echo "IMAGE_URL=${IMAGE_URL}" >> $GITHUB_ENV - name: Print Image tags run: | @@ -85,12 +88,13 @@ jobs: echo "TEST_REPO: "${TEST_REPO} echo "IMAGE_NAME: "${IMAGE_NAME} echo "IMAGE_PATH: "${IMAGE_PATH} + echo "IMAGE_URL: "${IMAGE_URL} - name: Build image uses: docker/build-push-action@v2 with: context: ${{ env.IMAGE_PATH }} - tags: ${{ env.TEST_REPO }}/${{ env.IMAGE_NAME }}:${{ env.UNIQUE_IMAGE_TAG }} + tags: ${{ env.IMAGE_URL }} platforms: linux/amd64,linux/arm64 - name: Test - Checkout Spark repository @@ -133,7 +137,7 @@ jobs: uses: actions/cache@v2 with: path: ~/.cache/coursier - key: build-${{ matrix.spark_version }}-scala${{ matrix.scala_version }}-java${{ matrix.java_version }} + key: build-${{ matrix.spark_version }}-scala${{ matrix.scala_version }}-java${{ matrix.java_version }}-coursier - name: Test - Start minikube run: | @@ -152,7 +156,8 @@ jobs: working-directory: ${{ github.workspace }}/spark run: | kubectl create clusterrolebinding serviceaccounts-cluster-admin --clusterrole=cluster-admin --group=system:serviceaccounts || true - minikube image load ${{ env.TEST_REPO }}/${{ env.IMAGE_NAME }}:${{ env.UNIQUE_IMAGE_TAG }} + minikube image load ${{ env.IMAGE_URL }} + eval $(minikube docker-env) OPTS="-Pkubernetes -Pkubernetes-integration-tests " OPTS+="-Dspark.kubernetes.test.driverRequestCores=0.5 -Dspark.kubernetes.test.executorRequestCores=0.2 " @@ -163,6 +168,11 @@ jobs: OPTS+="-Dspark.kubernetes.test.rImage=${IMAGE_NAME} " if echo ${{ matrix.image_suffix }} | grep -q "python3-r-ubuntu"; then + # Prepare test jar for client tests + CONTAINER_TMP_NAME=spark-example-image + docker create -ti --name $CONTAINER_TMP_NAME ${{ env.IMAGE_URL }} bash + docker cp $CONTAINER_TMP_NAME:/opt/spark/examples/jars/spark-examples_${{ matrix.scala_version }}-${{ matrix.spark_version }}.jar . + docker rm -f $CONTAINER_TMP_NAME # Prepare PV test PVC_TMP_DIR=$(mktemp -d) export PVC_TESTS_HOST_PATH=$PVC_TMP_DIR @@ -192,6 +202,3 @@ jobs: with: name: spark-on-kubernetes-it-log path: "**/target/integration-tests.log" - - - name: Image digest - run: echo ${{ steps.docker_build.outputs.digest }} From 7f00ff09b5bc32c500df04dc7700f7370c5ec8df Mon Sep 17 00:00:00 2001 From: Yikun Jiang Date: Thu, 13 Oct 2022 18:17:32 +0800 Subject: [PATCH 4/5] Add load and address comments --- .github/workflows/main.yml | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index d65c289..23d3ee6 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -55,10 +55,10 @@ jobs: uses: actions/checkout@v2 - name: Set up QEMU - uses: docker/setup-qemu-action@v1 + uses: docker/setup-qemu-action@v2 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v1 + uses: docker/setup-buildx-action@v2 - name: Generate tags run: | @@ -96,9 +96,10 @@ jobs: context: ${{ env.IMAGE_PATH }} tags: ${{ env.IMAGE_URL }} platforms: linux/amd64,linux/arm64 + load: true - name: Test - Checkout Spark repository - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: fetch-depth: 0 repository: apache/spark @@ -106,7 +107,7 @@ jobs: path: ${{ github.workspace }}/spark - name: Test - Cherry pick commits - # Apache Spark enable resource limited k8s IT since v3.3.1, cherrpick patches for old release + # Apache Spark enable resource limited k8s IT since v3.3.1, cherry-pick patches for old release # https://github.com/apache/spark/pull/36087#issuecomment-1251756266 if: matrix.spark_version == '3.3.0' working-directory: ${{ github.workspace }}/spark @@ -119,12 +120,12 @@ jobs: git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' cherry-pick 5ea2b386eb866e20540660cdb6ed43792cb29969 - name: Test - Install Java ${{ inputs.java }} - uses: actions/setup-java@v1 + uses: actions/setup-java@v3 with: java-version: ${{ matrix.java_version }} - name: Test - Cache Scala, SBT and Maven - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: | build/apache-maven-* @@ -134,7 +135,7 @@ jobs: key: build-${{ matrix.spark_version }}-scala${{ matrix.scala_version }}-java${{ matrix.java_version }} - name: Test - Cache Coursier local repository - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: ~/.cache/coursier key: build-${{ matrix.spark_version }}-scala${{ matrix.scala_version }}-java${{ matrix.java_version }}-coursier @@ -198,7 +199,7 @@ jobs: - name: Test - Upload Spark on K8S integration tests log files if: failure() - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: spark-on-kubernetes-it-log path: "**/target/integration-tests.log" From 4d81af14cf45f7e8e7ead6b293d7a479720722d2 Mon Sep 17 00:00:00 2001 From: Yikun Jiang Date: Thu, 13 Oct 2022 18:25:39 +0800 Subject: [PATCH 5/5] Add local registry --- .github/workflows/main.yml | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 23d3ee6..b47245b 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -41,6 +41,15 @@ on: jobs: main: runs-on: ubuntu-latest + # Due to the multi-platform images cannot be exported with the `docker` export type, + # https://github.com/docker/buildx/issues/59 + # So, the local registry (push) is used here rather than local build (load): + # https://github.com/docker/build-push-action/blob/master/docs/advanced/local-registry.md + services: + registry: + image: registry:2 + ports: + - 5000:5000 strategy: matrix: spark_version: @@ -59,13 +68,16 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v2 + with: + # This required by local registry + driver-opts: network=host - name: Generate tags run: | TAG=scala${{ matrix.scala_version }}-java${{ matrix.java_version }}-${{ matrix.image_suffix }} REPO_OWNER=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]') - TEST_REPO=ghcr.io/$REPO_OWNER/spark-docker + TEST_REPO=localhost:5000/$REPO_OWNER/spark-docker IMAGE_NAME=spark IMAGE_PATH=${{ matrix.spark_version }}/$TAG UNIQUE_IMAGE_TAG=${{ matrix.spark_version }}-$TAG @@ -90,13 +102,13 @@ jobs: echo "IMAGE_PATH: "${IMAGE_PATH} echo "IMAGE_URL: "${IMAGE_URL} - - name: Build image + - name: Build and push test image uses: docker/build-push-action@v2 with: context: ${{ env.IMAGE_PATH }} tags: ${{ env.IMAGE_URL }} platforms: linux/amd64,linux/arm64 - load: true + push: true - name: Test - Checkout Spark repository uses: actions/checkout@v3 @@ -122,6 +134,9 @@ jobs: - name: Test - Install Java ${{ inputs.java }} uses: actions/setup-java@v3 with: + # This is required after v2, now just keep same distribution with v1 + # https://github.com/actions/setup-java/releases/tag/v2.0.0 + distribution: 'zulu' java-version: ${{ matrix.java_version }} - name: Test - Cache Scala, SBT and Maven