From 7b47107fe616ca5882c5e817ac91b4c50593911c Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Wed, 15 Jul 2020 10:31:26 +0900 Subject: [PATCH] Test PySpark with Python 3.8 in Github Actions --- .github/workflows/master.yml | 32 ++++++++++++++++++-------------- python/run-tests.py | 1 - 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 5cf00c6ed9e67..fe01b92036377 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -117,38 +117,42 @@ jobs: java-version: ${{ matrix.java }} # PySpark - name: Install PyPy3 - # SQL component also has Python related tests, for example, IntegratedUDFTestUtils. # Note that order of Python installations here matters because default python3 is # overridden by pypy3. uses: actions/setup-python@v2 - if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) + if: contains(matrix.modules, 'pyspark') with: python-version: pypy3 architecture: x64 - - name: Install Python 2.7 + - name: Install Python 3.6 uses: actions/setup-python@v2 - if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) + if: contains(matrix.modules, 'pyspark') with: - python-version: 2.7 + python-version: 3.6 architecture: x64 - - name: Install Python 3.6 + - name: Install Python 3.8 uses: actions/setup-python@v2 - # Yarn has a Python specific test too, for example, YarnClusterSuite. + # We should install one Python that is higher then 3+ for SQL and Yarn because: + # - SQL component also has Python related tests, for example, IntegratedUDFTestUtils. + # - Yarn has a Python specific test too, for example, YarnClusterSuite. if: contains(matrix.modules, 'yarn') || contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) with: - python-version: 3.6 + python-version: 3.8 architecture: x64 - - name: Install Python packages - if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) + - name: Install Python packages (Python 3.6 and PyPy3) + if: contains(matrix.modules, 'pyspark') # PyArrow is not supported in PyPy yet, see ARROW-2651. # TODO(SPARK-32247): scipy installation with PyPy fails for an unknown reason. run: | - python3 -m pip install numpy pyarrow pandas scipy - python3 -m pip list - python2 -m pip install numpy pyarrow pandas scipy - python2 -m pip list + python3.6 -m pip install numpy pyarrow pandas scipy + python3.6 -m pip list pypy3 -m pip install numpy pandas pypy3 -m pip list + - name: Install Python packages (Python 3.8) + if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) + run: | + python3.8 -m pip install numpy pyarrow pandas scipy + python3.8 -m pip list # SparkR - name: Install R 3.6 uses: r-lib/actions/setup-r@v1 diff --git a/python/run-tests.py b/python/run-tests.py index 23076eab1c3e4..357eb8f449beb 100755 --- a/python/run-tests.py +++ b/python/run-tests.py @@ -157,7 +157,6 @@ def run_individual_python_test(target_dir, test_name, pyspark_python): def get_default_python_executables(): - # TODO(SPARK-32278): install PyPy3 in Jenkins to test python_execs = [x for x in ["python3.6", "python3.8", "pypy3"] if which(x)] if "python3.6" not in python_execs: