diff --git a/.github/workflows/velox_be.yml b/.github/workflows/velox_be.yml index 18cf02db5520b..d0da4451caa4b 100644 --- a/.github/workflows/velox_be.yml +++ b/.github/workflows/velox_be.yml @@ -111,312 +111,6 @@ jobs: run: | $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/clean.sh - ubuntu2004-test-spark32-slow: - runs-on: velox-self-hosted - env: - OS_IMAGE_NAME: ubuntu - OS_IMAGE_TAG: 20.04 - steps: - - uses: actions/checkout@v4 - - name: Setup docker container - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/checkout.sh - - name: Build Gluten velox third party - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten/ep/build-velox/src && \ - ./get_velox.sh && \ - ./build_velox.sh --run_setup_script=ON --enable_ep_cache=OFF' - - name: Build Gluten CPP library - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten/cpp && \ - ./compile.sh --build_velox_backend=ON' - - name: Build and run unit test for Spark 3.2.2 (slow tests) - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten && \ - mvn clean install -Pspark-3.2 -Pspark-ut -Pbackends-velox -Prss -Piceberg -Pdelta -DargLine="-Dspark.test.home=/opt/spark322" -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest' - - name: TPC-H SF1.0 && TPC-DS SF1.0 Parquet local spark3.2 - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ - mvn clean install -Pspark-3.2 \ - && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ - && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1' - - name: Exit docker container - if: ${{ always() }} - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/clean.sh - - ubuntu2004-test-spark33-slow: - runs-on: velox-self-hosted - env: - OS_IMAGE_NAME: ubuntu - OS_IMAGE_TAG: 20.04 - steps: - - uses: actions/checkout@v4 - - name: Setup docker container - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/checkout.sh - - name: Build Gluten velox third party - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten/ep/build-velox/src && \ - ./get_velox.sh && \ - ./build_velox.sh --run_setup_script=ON --enable_ep_cache=OFF' - - name: Build Gluten CPP library - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten/cpp && \ - ./compile.sh --build_velox_backend=ON' - - name: Build and Run unit test for Spark 3.3.1 (slow tests) - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten && \ - mvn clean install -Pspark-3.3 -Pbackends-velox -Prss -Piceberg -Pdelta -Pspark-ut -DargLine="-Dspark.test.home=/opt/spark331" -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest' - - name: TPC-H SF1.0 && TPC-DS SF1.0 Parquet local spark3.3 - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ - mvn clean install -Pspark-3.3 \ - && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ - && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1' - - name: TPC-H SF1.0 && TPC-DS SF1.0 Parquet local spark3.3 Q38 flush - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it \ - && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 --queries=q38 \ - --disable-bhj \ - --extra-conf=spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=0.1 \ - --extra-conf=spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=0.2 \ - --extra-conf=spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=100 \ - --extra-conf=spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0' - - name: Exit docker container - if: ${{ always() }} - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/clean.sh - - ubuntu2004-test-spark33: - runs-on: velox-self-hosted - env: - OS_IMAGE_NAME: ubuntu - OS_IMAGE_TAG: 20.04 - steps: - - uses: actions/checkout@v4 - - name: Setup docker container - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/checkout.sh - - name: Build Gluten velox third party - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten/ep/build-velox/src && \ - ./get_velox.sh && \ - ./build_velox.sh --run_setup_script=ON --enable_ep_cache=OFF' - - name: Build Gluten CPP library - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten/cpp && \ - ./compile.sh --build_velox_backend=ON --build_examples=ON' - - name: Build and Run unit test for Spark 3.3.1 (other tests) - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten && \ - mvn clean install -Pspark-3.3 -Pbackends-velox -Prss -Piceberg -Pdelta -Pspark-ut -DargLine="-Dspark.test.home=/opt/spark331" -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,io.glutenproject.tags.UDFTest,io.glutenproject.tags.SkipTestTags && \ - mvn test -Pspark-3.3 -Pbackends-velox -DtagsToExclude=None -DtagsToInclude=io.glutenproject.tags.UDFTest' - - name: Copy golden files from container to host - if: failure() - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/cp.sh /tmp/tpch-approved-plan/ /tmp/$GITHUB_RUN_ID/spark33/tpch-approved-plan - - name: Upload golden files - if: failure() - uses: actions/upload-artifact@v4 - with: - name: golden-files-spark33 - path: | - /tmp/${{ github.run_id }}/spark33/tpch-approved-plan/** - - name: Clean temp golden files - run: | - rm -rf /tmp/$GITHUB_RUN_ID/spark33 - - name: Exit docker container - if: ${{ always() }} - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/clean.sh - - ubuntu2004-test-spark34-slow: - runs-on: velox-self-hosted - env: - OS_IMAGE_NAME: ubuntu - OS_IMAGE_TAG: 20.04 - steps: - - uses: actions/checkout@v4 - - name: Setup docker container - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/checkout.sh - - name: Build Gluten velox third party - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten/ep/build-velox/src && \ - ./get_velox.sh && \ - ./build_velox.sh --run_setup_script=ON --enable_ep_cache=OFF' - - name: Build Gluten CPP library - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten/cpp && \ - ./compile.sh --build_velox_backend=ON ' - - name: Build and Run unit test for Spark 3.4.2 (slow tests) - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten && \ - mvn clean install -Pspark-3.4 -Pbackends-velox -Prss -Piceberg -Pdelta -Pspark-ut -DargLine="-Dspark.test.home=/opt/spark342" -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest' - - name: TPC-H SF1.0 && TPC-DS SF1.0 Parquet local spark3.4 - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ - mvn clean install -Pspark-3.4 \ - && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ - && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1' - - name: Exit docker container - if: ${{ always() }} - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/clean.sh - - ubuntu2004-test-spark34: - runs-on: velox-self-hosted - env: - OS_IMAGE_NAME: ubuntu - OS_IMAGE_TAG: 20.04 - steps: - - uses: actions/checkout@v4 - - name: Setup docker container - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/checkout.sh - - name: Build Gluten velox third party - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten/ep/build-velox/src && \ - ./get_velox.sh && \ - ./build_velox.sh --run_setup_script=ON --enable_ep_cache=OFF' - - name: Build Gluten CPP library - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten/cpp && \ - ./compile.sh --build_velox_backend=ON --build_examples=ON' - - name: Build and Run unit test for Spark 3.4.2 (other tests) - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten && \ - mvn clean install -Pspark-3.4 -Pbackends-velox -Prss -Piceberg -Pdelta -Pspark-ut -DargLine="-Dspark.test.home=/opt/spark342" -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,io.glutenproject.tags.UDFTest,io.glutenproject.tags.SkipTestTags && \ - mvn test -Pspark-3.4 -Pbackends-velox -DtagsToExclude=None -DtagsToInclude=io.glutenproject.tags.UDFTest' - - name: Copy golden files from container to host - if: failure() - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/cp.sh /tmp/tpch-approved-plan/ /tmp/$GITHUB_RUN_ID/spark34/tpch-approved-plan - - name: Upload golden files - if: failure() - uses: actions/upload-artifact@v4 - with: - name: golden-files-spark34 - path: | - /tmp/${{ github.run_id }}/spark34/tpch-approved-plan/** - - name: Clean temp golden files - run: | - rm -rf /tmp/$GITHUB_RUN_ID/spark34 - - name: Exit docker container - if: ${{ always() }} - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/clean.sh - - ubuntu2004-test-spark35: - runs-on: velox-self-hosted - env: - OS_IMAGE_NAME: ubuntu - OS_IMAGE_TAG: 20.04 - steps: - - uses: actions/checkout@v4 - - name: Setup docker container - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/checkout.sh - - name: Build Gluten velox third party - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten/ep/build-velox/src && \ - ./get_velox.sh && \ - ./build_velox.sh --run_setup_script=ON --enable_ep_cache=OFF' - - name: Build Gluten CPP library - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten/cpp && \ - ./compile.sh --build_velox_backend=ON ' - - name: Build for Spark 3.5 - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten && \ - mvn clean install -Pspark-3.5 -Pbackends-velox -Prss -DskipTests' - - name: TPC-H SF1.0 && TPC-DS SF1.0 Parquet local spark3.5 - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ - mvn clean install -Pspark-3.5 \ - && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ - && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1' - - name: Exit docker container - if: ${{ always() }} - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/clean.sh - - ubuntu2204-test-spark33-spark34: - runs-on: velox-self-hosted - env: - OS_IMAGE_NAME: ubuntu - OS_IMAGE_TAG: 22.04 - steps: - - uses: actions/checkout@v4 - - name: Setup docker container - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/checkout.sh - - name: Build Gluten velox third party - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten/ep/build-velox/src && \ - ./get_velox.sh --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=ON && \ - ./build_velox.sh --run_setup_script=ON --enable_ep_cache=OFF --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=ON' - - name: Build Gluten CPP library - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten/cpp && \ - ./compile.sh --build_velox_backend=ON --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=ON' - - name: Build for Spark 3.3.1 - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten && \ - mvn clean install -Pspark-3.3 -Pbackends-velox -Prss -Piceberg -Pdelta -DskipTests' - - name: TPC-H SF1.0 && TPC-DS SF10.0 Parquet local spark3.3 - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ - mvn clean install -Pspark-3.3 \ - && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ - && GLUTEN_IT_JVM_ARGS=-Xmx20G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=30g -s=10.0 --threads=32 --iterations=1' - - name: Build for Spark 3.4.2 - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten && \ - mvn clean install -Pspark-3.4 -Pbackends-velox -Prss -Piceberg -Pdelta -DskipTests' - - name: TPC-H SF1.0 && TPC-DS SF10.0 Parquet local spark3.4 - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ - mvn clean install -Pspark-3.4 \ - && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ - && GLUTEN_IT_JVM_ARGS=-Xmx20G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=30g -s=10.0 --threads=32 --iterations=1' - - name: Exit docker container - if: ${{ always() }} - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/clean.sh - ubuntu2204-test: runs-on: velox-self-hosted env: @@ -514,216 +208,4 @@ jobs: - name: Exit docker container if: ${{ always() }} run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/clean.sh - - centos8-test: - runs-on: velox-self-hosted - env: - OS_IMAGE_NAME: centos - OS_IMAGE_TAG: 8 - steps: - - uses: actions/checkout@v4 - - name: Setup docker container - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/checkout.sh - - name: Build Gluten velox third party - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - source /env.sh && \ - sudo yum -y install patch && \ - cd /opt/gluten/ep/build-velox/src && \ - ./get_velox.sh --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=ON && \ - ./build_velox.sh --run_setup_script=ON --enable_ep_cache=OFF --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=ON' - - name: Build Gluten CPP library - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - source /env.sh && \ - cd /opt/gluten/cpp && \ - ./compile.sh --build_velox_backend=ON --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=ON' - - name: Build for Spark 3.2.2 - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten && \ - mvn clean install -Pspark-3.2 -Pbackends-velox -Prss -Piceberg -Pdelta -DskipTests' - - name: TPC-H SF1.0 && TPC-DS SF30.0 Parquet local spark3.2 - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ - mvn clean install -Pspark-3.2 \ - && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ - && GLUTEN_IT_JVM_ARGS=-Xmx50G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=50g -s=30.0 --threads=32 --iterations=1' - - name: TPC-H SF1.0 && TPC-DS SF30.0 Parquet local spark3.2 random kill tasks - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ - mvn clean install -Pspark-3.2 \ - && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries \ - --local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 --skip-data-gen --random-kill-tasks \ - && GLUTEN_IT_JVM_ARGS=-Xmx50G sbin/gluten-it.sh queries \ - --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=50g -s=30.0 --threads=32 --iterations=1 --skip-data-gen --random-kill-tasks' - - name: Exit docker container - if: ${{ always() }} - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/clean.sh - - centos7-test: - runs-on: velox-self-hosted - env: - OS_IMAGE_NAME: centos - OS_IMAGE_TAG: 7 - steps: - - uses: actions/checkout@v4 - - name: Setup docker container - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/checkout.sh - - name: Build Gluten velox third party - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - yum -y install epel-release centos-release-scl patch sudo && \ - cd /opt/gluten/ep/build-velox/src && \ - source /opt/rh/devtoolset-9/enable && \ - ./get_velox.sh --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=ON && \ - ./build_velox.sh --run_setup_script=ON --enable_ep_cache=OFF --enable_s3=ON --enable_gcs=ON --enable_abfs=ON --enable_hdfs=ON' - - name: Build Gluten CPP library - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten/cpp && \ - source /opt/rh/devtoolset-9/enable && \ - ./compile.sh --build_velox_backend=ON --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=ON' - - name: Build for Spark 3.2.2 - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten && \ - mvn clean install -Pspark-3.2 -Pbackends-velox -Prss -Piceberg -Pdelta -DskipTests' - - name: TPC-H SF1.0 && TPC-DS SF30.0 Parquet local spark3.2 - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ - mvn clean install -Pspark-3.2 \ - && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ - && GLUTEN_IT_JVM_ARGS=-Xmx50G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=50g -s=30.0 --threads=32 --iterations=1' - - name: TPC-DS SF30.0 Parquet local spark3.2 Q67/Q95 low memory, memory isolation off - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ - mvn clean install -Pspark-3.2 \ - && GLUTEN_IT_JVM_ARGS=-Xmx50G sbin/gluten-it.sh parameterized \ - --local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q67,q95 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \ - --skip-data-gen -m=OffHeapExecutionMemory \ - -d=ISOLATION:OFF,spark.gluten.memory.isolation=false \ - -d=OFFHEAP_SIZE:5g,spark.memory.offHeap.size=5g \ - -d=OFFHEAP_SIZE:3g,spark.memory.offHeap.size=3g \ - -d=OVER_ACQUIRE:0.3,spark.gluten.memory.overAcquiredMemoryRatio=0.3 \ - -d=OVER_ACQUIRE:0.5,spark.gluten.memory.overAcquiredMemoryRatio=0.5' - - name: (To be fixed) TPC-DS SF30.0 Parquet local spark3.2 Q67/Q95 low memory, memory isolation on - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ - mvn clean install -Pspark-3.2 \ - && GLUTEN_IT_JVM_ARGS=-Xmx50G sbin/gluten-it.sh parameterized \ - --local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q67,q95 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \ - --skip-data-gen -m=OffHeapExecutionMemory \ - -d=ISOLATION:ON,spark.gluten.memory.isolation=true,spark.memory.storageFraction=0.1 \ - -d=OFFHEAP_SIZE:5g,spark.memory.offHeap.size=5g \ - -d=OFFHEAP_SIZE:3g,spark.memory.offHeap.size=3g \ - -d=OVER_ACQUIRE:0.3,spark.gluten.memory.overAcquiredMemoryRatio=0.3 \ - -d=OVER_ACQUIRE:0.5,spark.gluten.memory.overAcquiredMemoryRatio=0.5' || true - - name: TPC-DS SF30.0 Parquet local spark3.2 Q23A/Q23B low memory - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ - GLUTEN_IT_JVM_ARGS=-Xmx50G sbin/gluten-it.sh parameterized \ - --local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q23a,q23b -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \ - --skip-data-gen -m=OffHeapExecutionMemory \ - -d=ISOLATION:OFF,spark.gluten.memory.isolation=false \ - -d=ISOLATION:ON,spark.gluten.memory.isolation=true,spark.memory.storageFraction=0.1 \ - -d=OFFHEAP_SIZE:2g,spark.memory.offHeap.size=2g \ - -d=FLUSH_MODE:DISABLED,spark.gluten.sql.columnar.backend.velox.flushablePartialAggregation=false,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=100,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0 \ - -d=FLUSH_MODE:ABANDONED,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0 \ - -d=FLUSH_MODE:FLUSHED,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=0.05,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=0.1,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=100,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0' - - name: (To be fixed) TPC-DS SF30.0 Parquet local spark3.2 Q97 low memory # The case currently causes crash with "free: invalid size". - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ - GLUTEN_IT_JVM_ARGS=-Xmx50G sbin/gluten-it.sh parameterized \ - --local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q97 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \ - --skip-data-gen -m=OffHeapExecutionMemory \ - -d=ISOLATION:OFF,spark.gluten.memory.isolation=false \ - -d=ISOLATION:ON,spark.gluten.memory.isolation=true,spark.memory.storageFraction=0.1 \ - -d=OFFHEAP_SIZE:2g,spark.memory.offHeap.size=2g \ - -d=OFFHEAP_SIZE:1g,spark.memory.offHeap.size=1g' || true - - name: Exit docker container - if: ${{ always() }} - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/clean.sh - - - static-build-centos7-test: - runs-on: velox-self-hosted - steps: - - uses: actions/checkout@v4 - - name: Setup docker container - run: | - docker run --rm --init --privileged --ulimit nofile=65536:65536 --ulimit core=-1 --security-opt seccomp=unconfined \ - -v $PWD:/opt/gluten --name static-build-test-$GITHUB_RUN_ID -e NUM_THREADS=30 -detach $DOCKER_PULL_REGISTRY/gluten-te/gluten-buildenv-centos:7 \ - bash -c 'cd /opt/gluten && sleep 14400' - - name: Build Gluten CPP library - run: | - docker exec -i static-build-test-$GITHUB_RUN_ID bash -c ' - source /env.sh && \ - sudo yum -y install patch && \ - cd /opt/gluten && \ - sudo -E ./dev/vcpkg/setup-build-depends.sh && \ - source ./dev/vcpkg/env.sh && \ - ./dev/builddeps-veloxbe.sh --enable_vcpkg=ON --build_tests=OFF --build_benchmarks=OFF --enable_s3=ON \ - --enable_gcs=ON --enable_hdfs=ON --enable_abfs=ON --build_type=Debug' - - name: Build for Spark 3.2.2 - run: | - docker exec static-build-test-$GITHUB_RUN_ID bash -c ' - cd /opt/gluten && \ - mvn clean install -Pspark-3.2 -Pbackends-velox -Prss -Piceberg -Pdelta -DskipTests && \ - cd /opt/gluten/tools/gluten-it && \ - mvn clean install -Pspark-3.2' - - name: TPC-H SF1.0 && TPC-DS SF1.0 Parquet local spark3.2 (ubuntu 20.04) - run: | - docker run --rm --init --privileged --ulimit nofile=65536:65536 --ulimit core=-1 --security-opt seccomp=unconfined \ - -v $PWD:/opt/gluten --name static-build-test-$GITHUB_RUN_ID-tpc -e NUM_THREADS=30 ubuntu:20.04 \ - bash -c 'apt-get update -y && DEBIAN_FRONTEND=noninteractive apt-get install openjdk-8-jre -y \ - && cd /opt/gluten/tools/gluten-it \ - && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=h --error-on-memleak --disable-aqe --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ - && GLUTEN_IT_JVM_ARGS=-Xmx10G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=20g -s=1.0 --threads=32 --iterations=1' - - name: TPC-H SF1.0 && TPC-DS SF1.0 Parquet local spark3.2 (ubuntu 22.04) - run: | - docker run --rm --init --privileged --ulimit nofile=65536:65536 --ulimit core=-1 --security-opt seccomp=unconfined \ - -v $PWD:/opt/gluten --name static-build-test-$GITHUB_RUN_ID-tpc -e NUM_THREADS=30 ubuntu:22.04 \ - bash -c 'apt-get update -y && DEBIAN_FRONTEND=noninteractive apt-get install openjdk-8-jre -y \ - && cd /opt/gluten/tools/gluten-it \ - && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=h --error-on-memleak --disable-aqe --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ - && GLUTEN_IT_JVM_ARGS=-Xmx10G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=20g -s=1.0 --threads=32 --iterations=1' - - name: Exit docker container - if: ${{ always() }} - run: | - docker stop static-build-test-$GITHUB_RUN_ID || true - - build-script-test: - runs-on: velox-self-hosted - env: - OS_IMAGE_NAME: centos - OS_IMAGE_TAG: 8 - steps: - - uses: actions/checkout@v4 - - name: Setup docker container - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/checkout.sh - - name: Build Script Test - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - source /env.sh && \ - sudo yum -y install patch && \ - cd /opt/gluten/ && \ - ./dev/package.sh' - - name: Exit docker container - if: ${{ always() }} - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/clean.sh + $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/clean.sh \ No newline at end of file