Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
b1fd03a
[SPARK-32245][INFRA] Run Spark tests in Github Actions
HyukjinKwon Jul 11, 2020
f4924a2
[SPARK-32245][INFRA][FOLLOWUP] Reenable Github Actions on commit
dongjoon-hyun Jul 12, 2020
c074e06
[SPARK-32292][SPARK-32252][INFRA] Run the relevant tests only in GitH…
HyukjinKwon Jul 13, 2020
94491d5
[SPARK-32316][TESTS][INFRA] Test PySpark with Python 3.8 in Github Ac…
HyukjinKwon Jul 15, 2020
94988d6
[SPARK-32408][BUILD] Enable crossPaths back to prevent side effects
HyukjinKwon Jul 24, 2020
25deb17
[SPARK-32303][PYTHON][TESTS] Remove leftover from editable mode insta…
HyukjinKwon Jul 14, 2020
d2f5d32
[SPARK-32363][PYTHON][BUILD] Fix flakiness in pip package testing in …
HyukjinKwon Jul 21, 2020
04f6289
[SPARK-32419][PYTHON][BUILD] Avoid using subshell for Conda env (de)a…
HyukjinKwon Jul 25, 2020
d8129c0
[SPARK-32491][INFRA] Do not install SparkR in test-only mode in testi…
HyukjinKwon Jul 30, 2020
2184fd0
[SPARK-32493][INFRA] Manually install R instead of using setup-r in G…
HyukjinKwon Jul 30, 2020
f20ba18
[SPARK-32496][INFRA] Include GitHub Action file as the changes in tes…
HyukjinKwon Jul 30, 2020
06a30cf
[SPARK-32497][INFRA] Installs qpdf package for CRAN check in GitHub A…
HyukjinKwon Jul 30, 2020
12e4c9b
[SPARK-32357][INFRA] Publish failed and succeeded test reports in Git…
HyukjinKwon Aug 14, 2020
617619d
[SPARK-32606][SPARK-32605][INFRA] Remove the forks of action-surefire…
HyukjinKwon Aug 17, 2020
dda30c5
[SPARK-32645][INFRA] Upload unit-tests.log as an artifact
HyukjinKwon Aug 19, 2020
84846a8
[MINOR][INFRA] Rename master.yml to build_and_test.yml
HyukjinKwon Aug 18, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 0 additions & 104 deletions .github/workflows/branch-2.4.yml

This file was deleted.

241 changes: 241 additions & 0 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,241 @@
name: Build and test

on:
push:
branches:
- branch-2.4
pull_request:
branches:
- branch-2.4

jobs:
# Build: build Spark and run the tests for specified modules.
build:
name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }})"
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
java:
- 1.8
hadoop:
- hadoop2.6
# TODO(SPARK-32246): We don't test 'streaming-kinesis-asl' for now.
# Kinesis tests depends on external Amazon kinesis service.
# Note that the modules below are from sparktestsupport/modules.py.
modules:
- >-
core, unsafe, kvstore, avro,
network-common, network-shuffle, repl, launcher,
examples, sketch, graphx
- >-
catalyst, hive-thriftserver
- >-
streaming, sql-kafka-0-10, streaming-kafka-0-10,
mllib-local, mllib,
yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl,
streaming-flume, streaming-flume-sink, streaming-kafka-0-8
- >-
pyspark-sql, pyspark-mllib
- >-
pyspark-core, pyspark-streaming, pyspark-ml
- >-
sparkr
- >-
sql
# Here, we split Hive and SQL tests into some of slow ones and the rest of them.
included-tags: [""]
excluded-tags: [""]
comment: [""]
include:
# Hive tests
- modules: hive
java: 1.8
hadoop: hadoop2.6
included-tags: org.apache.spark.tags.SlowHiveTest
comment: "- slow tests"
- modules: hive
java: 1.8
hadoop: hadoop2.6
excluded-tags: org.apache.spark.tags.SlowHiveTest
comment: "- other tests"
env:
MODULES_TO_TEST: ${{ matrix.modules }}
EXCLUDED_TAGS: ${{ matrix.excluded-tags }}
INCLUDED_TAGS: ${{ matrix.included-tags }}
HADOOP_PROFILE: ${{ matrix.hadoop }}
# GitHub Actions' default miniconda to use in pip packaging test.
CONDA_PREFIX: /usr/share/miniconda
GITHUB_PREV_SHA: ${{ github.event.before }}
ARROW_PRE_0_15_IPC_FORMAT: 1
steps:
- name: Checkout Spark repository
uses: actions/checkout@v2
# In order to fetch changed files
with:
fetch-depth: 0
# Cache local repositories. Note that GitHub Actions cache has a 2G limit.
- name: Cache Scala, SBT, Maven and Zinc
uses: actions/cache@v1
with:
path: build
key: build-${{ hashFiles('**/pom.xml') }}
restore-keys: |
build-
- name: Cache Maven local repository
uses: actions/cache@v2
with:
path: ~/.m2/repository
key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ matrix.java }}-${{ matrix.hadoop }}-maven-
- name: Cache Ivy local repository
uses: actions/cache@v2
with:
path: ~/.ivy2/cache
key: ${{ matrix.java }}-${{ matrix.hadoop }}-ivy-${{ hashFiles('**/pom.xml') }}-${{ hashFiles('**/plugins.sbt') }}
restore-keys: |
${{ matrix.java }}-${{ matrix.hadoop }}-ivy-
- name: Install JDK ${{ matrix.java }}
uses: actions/setup-java@v1
with:
java-version: ${{ matrix.java }}
# PySpark
- name: Install PyPy3
# Note that order of Python installations here matters because default python is
# overridden.
uses: actions/setup-python@v2
if: contains(matrix.modules, 'pyspark')
with:
python-version: pypy3
architecture: x64
- name: Install Python 3.6
uses: actions/setup-python@v2
if: contains(matrix.modules, 'pyspark')
with:
python-version: 3.6
architecture: x64
- name: Install Python 2.7
uses: actions/setup-python@v2
# Yarn has a Python specific test too, for example, YarnClusterSuite.
if: contains(matrix.modules, 'yarn') || contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
with:
python-version: 2.7
architecture: x64
- name: Install Python packages (Python 3.6 and PyPy3)
if: contains(matrix.modules, 'pyspark')
# PyArrow is not supported in PyPy yet, see ARROW-2651.
# TODO(SPARK-32247): scipy installation with PyPy fails for an unknown reason.
run: |
python3.6 -m pip install numpy pyarrow pandas scipy xmlrunner
python3.6 -m pip list
# PyPy does not have xmlrunner
pypy3 -m pip install numpy pandas
pypy3 -m pip list
- name: Install Python packages (Python 2.7)
if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
run: |
# Some tests do not pass in PySpark with PyArrow, for example, pyspark.sql.tests.ArrowTests.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some tests do not pass in PySpark with PyArrow, for example, pyspark.sql.tests.ArrowTests with Python 2.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not GitHub Actions specific. Jenkins does not test Python 2 with PyArrow and I can reproduce it in my local as well.

python2.7 -m pip install numpy pandas scipy xmlrunner
python2.7 -m pip list
# SparkR
- name: Install R 4.0
if: contains(matrix.modules, 'sparkr')
run: |
sudo sh -c "echo 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/' >> /etc/apt/sources.list"
curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xE298A3A825C0D65DFD57CBB651716619E084DAB9" | sudo apt-key add
sudo apt-get update
sudo apt-get install -y r-base r-base-dev libcurl4-openssl-dev
- name: Install R packages
if: contains(matrix.modules, 'sparkr')
run: |
# qpdf is required to reduce the size of PDFs to make CRAN check pass. See SPARK-32497.
sudo apt-get install -y libcurl4-openssl-dev qpdf
sudo Rscript -e "install.packages(c('knitr', 'rmarkdown', 'testthat', 'devtools', 'e1071', 'survival', 'arrow', 'roxygen2'), repos='https://cloud.r-project.org/')"
# Show installed packages in R.
sudo Rscript -e 'pkg_list <- as.data.frame(installed.packages()[, c(1,3:4)]); pkg_list[is.na(pkg_list$Priority), 1:2, drop = FALSE]'
# Run the tests.
- name: Run tests
run: |
# Hive tests become flaky when running in parallel as it's too intensive.
if [[ "$MODULES_TO_TEST" == "hive" ]]; then export SERIAL_SBT_TESTS=1; fi
mkdir -p ~/.m2
./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS"
rm -rf ~/.m2/repository/org/apache/spark
- name: Upload test results to report
if: always()
uses: actions/upload-artifact@v2
with:
name: test-results-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}
path: "**/target/test-reports/*.xml"
- name: Upload unit tests log files
if: failure()
uses: actions/upload-artifact@v2
with:
name: unit-tests-log-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }}
path: "**/target/unit-tests.log"

# Static analysis, and documentation build
lint:
name: Linters, licenses, dependencies and documentation generation
runs-on: ubuntu-latest
steps:
- name: Checkout Spark repository
uses: actions/checkout@v2
- name: Cache Maven local repository
uses: actions/cache@v2
with:
path: ~/.m2/repository
key: docs-maven-repo-${{ hashFiles('**/pom.xml') }}
restore-keys: |
docs-maven-
- name: Install JDK 1.8
uses: actions/setup-java@v1
with:
java-version: 1.8
- name: Install Python 3.6
uses: actions/setup-python@v2
with:
python-version: 3.6
architecture: x64
- name: Install Python linter dependencies
run: |
pip3 install flake8 sphinx numpy
- name: Install R 4.0
run: |
sudo sh -c "echo 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/' >> /etc/apt/sources.list"
curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xE298A3A825C0D65DFD57CBB651716619E084DAB9" | sudo apt-key add
sudo apt-get update
sudo apt-get install -y r-base r-base-dev libcurl4-openssl-dev
- name: Install R linter dependencies and SparkR
run: |
sudo apt-get install -y libcurl4-openssl-dev
sudo Rscript -e "install.packages(c('devtools'), repos='https://cloud.r-project.org/')"
sudo Rscript -e "devtools::install_github('jimhester/lintr@v2.0.0')"
./R/install-dev.sh
- name: Install Ruby 2.7 for documentation generation
uses: actions/setup-ruby@v1
with:
ruby-version: 2.7
- name: Install dependencies for documentation generation
run: |
sudo apt-get install -y libcurl4-openssl-dev pandoc
pip install sphinx mkdocs numpy
gem install jekyll jekyll-redirect-from pygments.rb
sudo Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2'), repos='https://cloud.r-project.org/')"
- name: Scala linter
run: ./dev/lint-scala
- name: Java linter
run: ./dev/lint-java
- name: Python linter
run: ./dev/lint-python
- name: R linter
run: ./dev/lint-r
- name: License test
run: ./dev/check-license
- name: Dependencies test
run: ./dev/test-dependencies.sh
- name: Run documentation build
run: |
cd docs
jekyll build
24 changes: 24 additions & 0 deletions .github/workflows/test_report.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name: Report test results
on:
workflow_run:
workflows: ["Build and test"]
types:
- completed

jobs:
test_report:
runs-on: ubuntu-latest
steps:
- name: Download test results to report
uses: dawidd6/action-download-artifact@v2
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
workflow: ${{ github.event.workflow_run.workflow_id }}
commit: ${{ github.event.workflow_run.head_commit.id }}
- name: Publish test report
uses: scacap/action-surefire-report@v1
with:
check_name: Report test results
github_token: ${{ secrets.GITHUB_TOKEN }}
report_paths: "**/target/test-reports/*.xml"
commit: ${{ github.event.workflow_run.head_commit.id }}
Loading