Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
131 changes: 63 additions & 68 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,28 +14,6 @@ on:
required: true

jobs:
# This is on the top to give the most visibility in case of failures
hadoop-2:
name: Hadoop 2 build
runs-on: ubuntu-20.04
steps:
- name: Checkout Spark repository
uses: actions/checkout@v2
- name: Cache Coursier local repository
uses: actions/cache@v2
with:
path: ~/.cache/coursier
key: hadoop-2-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
restore-keys: |
hadoop-2-coursier-
- name: Install Java 8
uses: actions/setup-java@v1
with:
java-version: 1.8
- name: Build with SBT
run: |
./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Phadoop-2.7 compile test:compile

# Build: build Spark and run the tests for specified modules.
build:
name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})"
Expand All @@ -45,7 +23,7 @@ jobs:
fail-fast: false
matrix:
java:
- 1.8
- 8
hadoop:
- hadoop3.2
hive:
Expand All @@ -71,26 +49,26 @@ jobs:
include:
# Hive tests
- modules: hive
java: 1.8
java: 8
hadoop: hadoop3.2
hive: hive2.3
included-tags: org.apache.spark.tags.SlowHiveTest
comment: "- slow tests"
- modules: hive
java: 1.8
java: 8
hadoop: hadoop3.2
hive: hive2.3
excluded-tags: org.apache.spark.tags.SlowHiveTest
comment: "- other tests"
# SQL tests
- modules: sql
java: 1.8
java: 8
hadoop: hadoop3.2
hive: hive2.3
included-tags: org.apache.spark.tags.ExtendedSQLTest
comment: "- slow tests"
- modules: sql
java: 1.8
java: 8
hadoop: hadoop3.2
hive: hive2.3
excluded-tags: org.apache.spark.tags.ExtendedSQLTest
Expand Down Expand Up @@ -123,24 +101,18 @@ jobs:
build/zinc-*
build/scala-*
build/*.jar
~/.sbt
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
restore-keys: |
build-
- name: Cache Maven local repository
uses: actions/cache@v2
with:
path: ~/.m2/repository
key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ matrix.java }}-${{ matrix.hadoop }}-maven-
- name: Cache Coursier local repository
uses: actions/cache@v2
with:
path: ~/.cache/coursier
key: ${{ matrix.java }}-${{ matrix.hadoop }}-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
restore-keys: |
${{ matrix.java }}-${{ matrix.hadoop }}-coursier-
- name: Install JDK ${{ matrix.java }}
- name: Install Java ${{ matrix.java }}
uses: actions/setup-java@v1
with:
java-version: ${{ matrix.java }}
Expand All @@ -163,9 +135,7 @@ jobs:
run: |
# Hive and SQL tests become flaky when running in parallel as it's too intensive.
if [[ "$MODULES_TO_TEST" == "hive" ]] || [[ "$MODULES_TO_TEST" == "sql" ]]; then export SERIAL_SBT_TESTS=1; fi
mkdir -p ~/.m2
./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS"
rm -rf ~/.m2/repository/org/apache/spark
- name: Upload test results to report
if: always()
uses: actions/upload-artifact@v2
Expand Down Expand Up @@ -218,16 +188,10 @@ jobs:
build/zinc-*
build/scala-*
build/*.jar
~/.sbt
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
restore-keys: |
build-
- name: Cache Maven local repository
uses: actions/cache@v2
with:
path: ~/.m2/repository
key: pyspark-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
pyspark-maven-
- name: Cache Coursier local repository
uses: actions/cache@v2
with:
Expand All @@ -250,24 +214,22 @@ jobs:
# Run the tests.
- name: Run tests
run: |
mkdir -p ~/.m2
./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST"
rm -rf ~/.m2/repository/org/apache/spark
- name: Upload test results to report
if: always()
uses: actions/upload-artifact@v2
with:
name: test-results-${{ matrix.modules }}--1.8-hadoop3.2-hive2.3
name: test-results-${{ matrix.modules }}--8-hadoop3.2-hive2.3
path: "**/target/test-reports/*.xml"
- name: Upload unit tests log files
if: failure()
uses: actions/upload-artifact@v2
with:
name: unit-tests-log-${{ matrix.modules }}--1.8-hadoop3.2-hive2.3
name: unit-tests-log-${{ matrix.modules }}--8-hadoop3.2-hive2.3
path: "**/target/unit-tests.log"

sparkr:
name: Build modules - sparkr
name: "Build modules: sparkr"
runs-on: ubuntu-20.04
container:
image: dongjoon/apache-spark-github-action-image:20201025
Expand All @@ -294,16 +256,10 @@ jobs:
build/zinc-*
build/scala-*
build/*.jar
~/.sbt
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
restore-keys: |
build-
- name: Cache Maven local repository
uses: actions/cache@v2
with:
path: ~/.m2/repository
key: sparkr-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
sparkr-maven-
- name: Cache Coursier local repository
uses: actions/cache@v2
with:
Expand All @@ -313,18 +269,16 @@ jobs:
sparkr-coursier-
- name: Run tests
run: |
mkdir -p ~/.m2
# The followings are also used by `r-lib/actions/setup-r` to avoid
# R issues at docker environment
export TZ=UTC
export _R_CHECK_SYSTEM_CLOCK_=FALSE
./dev/run-tests --parallelism 2 --modules sparkr
rm -rf ~/.m2/repository/org/apache/spark
- name: Upload test results to report
if: always()
uses: actions/upload-artifact@v2
with:
name: test-results-sparkr--1.8-hadoop3.2-hive2.3
name: test-results-sparkr--8-hadoop3.2-hive2.3
path: "**/target/test-reports/*.xml"

# Static analysis, and documentation build
Expand All @@ -334,17 +288,37 @@ jobs:
steps:
- name: Checkout Spark repository
uses: actions/checkout@v2
# Cache local repositories. Note that GitHub Actions cache has a 2G limit.
- name: Cache Scala, SBT, Maven and Zinc
uses: actions/cache@v2
with:
path: |
build/apache-maven-*
build/zinc-*
build/scala-*
build/*.jar
~/.sbt
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
restore-keys: |
build-
- name: Cache Coursier local repository
uses: actions/cache@v2
with:
path: ~/.cache/coursier
key: docs-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
restore-keys: |
docs-coursier-
- name: Cache Maven local repository
uses: actions/cache@v2
with:
path: ~/.m2/repository
key: docs-maven-repo-${{ hashFiles('**/pom.xml') }}
key: docs-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
docs-maven-
- name: Install JDK 1.8
- name: Install Java 8
uses: actions/setup-java@v1
with:
java-version: 1.8
java-version: 8
- name: Install Python 3.6
uses: actions/setup-python@v2
with:
Expand Down Expand Up @@ -395,8 +369,8 @@ jobs:
cd docs
jekyll build

java11:
name: Java 11 build
java-11:
name: Java 11 build with Maven
runs-on: ubuntu-20.04
steps:
- name: Checkout Spark repository
Expand All @@ -416,12 +390,12 @@ jobs:
run: |
export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
export MAVEN_CLI_OPTS="--no-transfer-progress"
mkdir -p ~/.m2
# It uses Maven's 'install' intentionally, see https://github.com/apache/spark/pull/26414.
./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=11 install
rm -rf ~/.m2/repository/org/apache/spark

scala-213:
name: Scala 2.13 build
name: Scala 2.13 build with SBT
runs-on: ubuntu-20.04
steps:
- name: Checkout Spark repository
Expand All @@ -433,11 +407,32 @@ jobs:
key: scala-213-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
restore-keys: |
scala-213-coursier-
- name: Install Java 11
- name: Install Java 8
uses: actions/setup-java@v1
with:
java-version: 11
java-version: 8
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No problem.

- name: Build with SBT
run: |
./dev/change-scala-version.sh 2.13
./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Djava.version=11 -Pscala-2.13 compile test:compile

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dongjoon-hyun, I just move it back below. Seems like it affects when things are ported back (Hadoop 2 build is only in master branch). I think it will still obviously show the red X one of builds fails anyway .. so I think it might be fine to move below here together with Java 11 and Scala 2.13 builds.

Copy link
Member

@dongjoon-hyun dongjoon-hyun Nov 17, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@HyukjinKwon . This was added at the end but was moved to the first position due @mridulm 's direct request. (#30378 (comment)).

I'm okay with all positions.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mridulm, would you mind moving it below here? It will show an X obviously if one of builds fails anyway. It seems like it can cause some conflicts easily when we backport something to other branches for GitHub Actions. Currently, we're running the builds in all active branches, and porting back things.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@HyukjinKwon Currently, we merge if either of the builds pass (jenkins or github).
The reason I was requesting @dongjoon-hyun to move it to top was if jenkins gives positive build (since it is not testing hadoop 2), then only way to identify hadoop 2.x failure is by github actions failing on hadoop 2 build.

If we can get jenkins to also build hadoop-2, then we dont need to move this to top.
Else, it is common for reviewers to not see the last entry in github build (when there is a jenkins green).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@HyukjinKwon Is there a better solution currently ? I am not an expert on github or jenkins :-)

Copy link
Member Author

@HyukjinKwon HyukjinKwon Nov 18, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mridulm, oh but if there's any of them fails, it shows a clear X like

Screen Shot 2020-11-18 at 9 07 02 PM

So it should be fine. I do believe reviewers check the X or test failures from Jenkins before merging it in :-). Actually, Java 11 and Scala 2.13 too. Jenkins PR builder does not run them as well.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That sounds good to me. Thanks for the details @HyukjinKwon !
@dongjoon-hyun given details @HyukjinKwon provided, I am fine with the changes he proposed. Thanks

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given the agreement, shall we merge this?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please go ahead @dongjoon-hyun , thanks !

hadoop-2:
name: Hadoop 2 build with SBT
runs-on: ubuntu-20.04
steps:
- name: Checkout Spark repository
uses: actions/checkout@v2
- name: Cache Coursier local repository
uses: actions/cache@v2
with:
path: ~/.cache/coursier
key: hadoop-2-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
restore-keys: |
hadoop-2-coursier-
- name: Install Java 8
uses: actions/setup-java@v1
with:
java-version: 8
- name: Build with SBT
run: |
./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Phadoop-2.7 compile test:compile