From 92f7268eac3efb2c2ff49239807eaea693d29027 Mon Sep 17 00:00:00 2001 From: shane knapp Date: Wed, 30 Oct 2019 13:46:42 -0700 Subject: [PATCH 01/31] remove py27 support for tests --- python/run-tests.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/python/run-tests.py b/python/run-tests.py index b1119b044d71..21d25cc8b6a9 100755 --- a/python/run-tests.py +++ b/python/run-tests.py @@ -160,11 +160,15 @@ def run_individual_python_test(target_dir, test_name, pyspark_python): def get_default_python_executables(): - python_execs = [x for x in ["python2.7", "python3.6", "pypy"] if which(x)] - if "python2.7" not in python_execs: - LOGGER.warning("Not testing against `python2.7` because it could not be found; falling" - " back to `python` instead") - python_execs.insert(0, "python") + python_execs = [x for x in ["python3.6", "pypy"] if which(x)] + if "python3.6" not in python_execs: + if which('python'): + LOGGER.warning("Not testing against `python3.6` because it could not be found; falling" + " back to `python` instead") + python_execs.insert(0, "python") + else: + LOGGER.error("No python executable found! Exiting!") + os._exit(1) return python_execs From 7c0fe469175d42867e87870a95e2a930acd58e8f Mon Sep 17 00:00:00 2001 From: shane knapp Date: Wed, 30 Oct 2019 14:14:58 -0700 Subject: [PATCH 02/31] follow double quoted convention --- python/run-tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/run-tests.py b/python/run-tests.py index 21d25cc8b6a9..c7241671efde 100755 --- a/python/run-tests.py +++ b/python/run-tests.py @@ -162,7 +162,7 @@ def run_individual_python_test(target_dir, test_name, pyspark_python): def get_default_python_executables(): python_execs = [x for x in ["python3.6", "pypy"] if which(x)] if "python3.6" not in python_execs: - if which('python'): + if which("python"): LOGGER.warning("Not testing against `python3.6` because it could not be found; falling" " back to `python` instead") python_execs.insert(0, "python") From 226c1fa2d0aae8b7af64caac3ff671ec0c57de3d Mon Sep 17 00:00:00 2001 From: shane knapp Date: Wed, 30 Oct 2019 15:14:52 -0700 Subject: [PATCH 03/31] have pip packaging tests run with py36 --- dev/run-pip-tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/run-pip-tests b/dev/run-pip-tests index 60cf4d820941..3ddd8cdae262 100755 --- a/dev/run-pip-tests +++ b/dev/run-pip-tests @@ -53,7 +53,7 @@ if hash virtualenv 2>/dev/null && [ ! -n "$USE_CONDA" ]; then fi elif hash conda 2>/dev/null; then echo "Using conda virtual environments" - PYTHON_EXECS=('3.5') + PYTHON_EXECS=('3.6') USE_CONDA=1 else echo "Missing virtualenv & conda, skipping pip installability tests" From f25c4b3e10340ac713cf8b8c3cc82ee65d9bdd76 Mon Sep 17 00:00:00 2001 From: shane knapp Date: Wed, 30 Oct 2019 15:44:52 -0700 Subject: [PATCH 04/31] remove py27 altogether --- dev/run-pip-tests | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/dev/run-pip-tests b/dev/run-pip-tests index 3ddd8cdae262..c25ee7839a80 100755 --- a/dev/run-pip-tests +++ b/dev/run-pip-tests @@ -39,17 +39,12 @@ PYTHON_EXECS=() # Some systems don't have pip or virtualenv - in those cases our tests won't work. if hash virtualenv 2>/dev/null && [ ! -n "$USE_CONDA" ]; then echo "virtualenv installed - using. Note if this is a conda virtual env you may wish to set USE_CONDA" - # Figure out which Python execs we should test pip installation with - if hash python2 2>/dev/null; then - # We do this since we are testing with virtualenv and the default virtual env python - # is in /usr/bin/python - PYTHON_EXECS+=('python2') - elif hash python 2>/dev/null; then - # If python2 isn't installed fallback to python if available - PYTHON_EXECS+=('python') - fi + # test only against python3 if hash python3 2>/dev/null; then - PYTHON_EXECS+=('python3') + PYTHON_EXECS=('python3') + else + echo "Python3 not installed on system, skipping pip installability tests" + exit 0 fi elif hash conda 2>/dev/null; then echo "Using conda virtual environments" From b57acd9b5fe0d8df7e782b12ff92c3f01dbc9bd5 Mon Sep 17 00:00:00 2001 From: shane knapp Date: Thu, 31 Oct 2019 12:49:26 -0700 Subject: [PATCH 05/31] testing pypy3 --- python/run-tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/run-tests.py b/python/run-tests.py index c7241671efde..ebb407a84457 100755 --- a/python/run-tests.py +++ b/python/run-tests.py @@ -160,7 +160,7 @@ def run_individual_python_test(target_dir, test_name, pyspark_python): def get_default_python_executables(): - python_execs = [x for x in ["python3.6", "pypy"] if which(x)] + python_execs = [x for x in ["python3.6", "pypy3"] if which(x)] if "python3.6" not in python_execs: if which("python"): LOGGER.warning("Not testing against `python3.6` because it could not be found; falling" From 0b37b71167c192f726600fecd4a29b33e928c245 Mon Sep 17 00:00:00 2001 From: shane knapp Date: Thu, 31 Oct 2019 19:06:30 -0700 Subject: [PATCH 06/31] Revert "testing pypy3" This reverts commit b57acd9b5fe0d8df7e782b12ff92c3f01dbc9bd5. --- python/run-tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/run-tests.py b/python/run-tests.py index ebb407a84457..c7241671efde 100755 --- a/python/run-tests.py +++ b/python/run-tests.py @@ -160,7 +160,7 @@ def run_individual_python_test(target_dir, test_name, pyspark_python): def get_default_python_executables(): - python_execs = [x for x in ["python3.6", "pypy3"] if which(x)] + python_execs = [x for x in ["python3.6", "pypy"] if which(x)] if "python3.6" not in python_execs: if which("python"): LOGGER.warning("Not testing against `python3.6` because it could not be found; falling" From d0e49bbae5b350aa674286fbba3816b10867ec79 Mon Sep 17 00:00:00 2001 From: shane knapp Date: Fri, 1 Nov 2019 10:07:21 -0700 Subject: [PATCH 07/31] fallback to python3, check if version to test against is compatible --- python/run-tests.py | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/python/run-tests.py b/python/run-tests.py index c7241671efde..ecd2ff646535 100755 --- a/python/run-tests.py +++ b/python/run-tests.py @@ -57,6 +57,10 @@ def print_red(text): FAILURE_REPORTING_LOCK = Lock() LOGGER = logging.getLogger() +PYTHON_MAJOR_VERSION=3 +PYTHON_MINOR_VERSION=6 +PYTHON_MICRO_VERSION=0 + # Find out where the assembly jars are located. # TODO: revisit for Scala 2.13 for scala in ["2.12"]: @@ -161,13 +165,31 @@ def run_individual_python_test(target_dir, test_name, pyspark_python): def get_default_python_executables(): python_execs = [x for x in ["python3.6", "pypy"] if which(x)] + if "python3.6" not in python_execs: - if which("python"): - LOGGER.warning("Not testing against `python3.6` because it could not be found; falling" - " back to `python` instead") - python_execs.insert(0, "python") + p = which("python3") + + if p: + py_out = subprocess.run([p, "--version"], stderr=subprocess.PIPE) + py_out = py_out.stderr.decode("utf-8") + python_version = tuple([int(i) for i in re.findall("\d", py_out)]) + + if python_version < (PYTHON_MAJOR_VERSION, PYTHON_MINOR_VERSION, PYTHON_MICRO_VERSION): + LOGGER.error("Python version %s.%s.%s is lower than minimum " + "required: %s.%s.%s" % (python_version[0], + python_version[1], + python_version[2], + PYTHON_MAJOR_VERSION, + PYTHON_MINOR_VERSION, + PYTHON_MICRO_VERSION)) + LOGGER.error("Exiting!") + os._exit(1) + else: + LOGGER.warning("Not testing against `python3.6` because it could not be found; falling" + " back to %s instead" % p) + python_execs.insert(0, "python3") else: - LOGGER.error("No python executable found! Exiting!") + LOGGER.error("No python3 executable found. Exiting!") os._exit(1) return python_execs From be57b53b922f93275796a70ca5741a5f9f7f8e4b Mon Sep 17 00:00:00 2001 From: shane knapp Date: Fri, 1 Nov 2019 10:09:40 -0700 Subject: [PATCH 08/31] use the found python3 with full path --- python/run-tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/run-tests.py b/python/run-tests.py index ecd2ff646535..ac8ab38fe43e 100755 --- a/python/run-tests.py +++ b/python/run-tests.py @@ -187,7 +187,7 @@ def get_default_python_executables(): else: LOGGER.warning("Not testing against `python3.6` because it could not be found; falling" " back to %s instead" % p) - python_execs.insert(0, "python3") + python_execs.insert(0, p) else: LOGGER.error("No python3 executable found. Exiting!") os._exit(1) From 425976f7c3c83b1f8765916a2425165aec5a952f Mon Sep 17 00:00:00 2001 From: shane knapp Date: Fri, 1 Nov 2019 10:43:54 -0700 Subject: [PATCH 09/31] python style --- python/flycheck_run-tests.py | 346 +++++++++++++++++++++++++++++++++++ python/run-tests.py | 12 +- 2 files changed, 352 insertions(+), 6 deletions(-) create mode 100644 python/flycheck_run-tests.py diff --git a/python/flycheck_run-tests.py b/python/flycheck_run-tests.py new file mode 100644 index 000000000000..f45e5787b05b --- /dev/null +++ b/python/flycheck_run-tests.py @@ -0,0 +1,346 @@ +#!/usr/bin/env python + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from __future__ import print_function +import logging +from argparse import ArgumentParser +import os +import re +import shutil +import subprocess +import sys +import tempfile +from threading import Thread, Lock +import time +import uuid +if sys.version < '3': + import Queue +else: + import queue as Queue +from multiprocessing import Manager + + +# Append `SPARK_HOME/dev` to the Python path so that we can import the sparktestsupport module +sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), "../dev/")) + + +from sparktestsupport import SPARK_HOME # noqa (suppress pep8 warnings) +from sparktestsupport.shellutils import which, subprocess_check_output # noqa +from sparktestsupport.modules import all_modules, pyspark_sql # noqa + + +python_modules = dict((m.name, m) for m in all_modules if m.python_test_goals if m.name != 'root') + + +def print_red(text): + print('\033[31m' + text + '\033[0m') + + +SKIPPED_TESTS = Manager().dict() +LOG_FILE = os.path.join(SPARK_HOME, "python/unit-tests.log") +FAILURE_REPORTING_LOCK = Lock() +LOGGER = logging.getLogger() + +PYTHON_MAJOR_VERSION = 3 +PYTHON_MINOR_VERSION = 6 +PYTHON_MICRO_VERSION = 0 + +# Find out where the assembly jars are located. +# TODO: revisit for Scala 2.13 +for scala in ["2.12"]: + build_dir = os.path.join(SPARK_HOME, "assembly", "target", "scala-" + scala) + if os.path.isdir(build_dir): + SPARK_DIST_CLASSPATH = os.path.join(build_dir, "jars", "*") + break +else: + raise Exception("Cannot find assembly build directory, please build Spark first.") + + +def run_individual_python_test(target_dir, test_name, pyspark_python): + env = dict(os.environ) + env.update({ + 'SPARK_DIST_CLASSPATH': SPARK_DIST_CLASSPATH, + 'SPARK_TESTING': '1', + 'SPARK_PREPEND_CLASSES': '1', + 'PYSPARK_PYTHON': which(pyspark_python), + 'PYSPARK_DRIVER_PYTHON': which(pyspark_python) + }) + + # Create a unique temp directory under 'target/' for each run. The TMPDIR variable is + # recognized by the tempfile module to override the default system temp directory. + tmp_dir = os.path.join(target_dir, str(uuid.uuid4())) + while os.path.isdir(tmp_dir): + tmp_dir = os.path.join(target_dir, str(uuid.uuid4())) + os.mkdir(tmp_dir) + env["TMPDIR"] = tmp_dir + + # Also override the JVM's temp directory by setting driver and executor options. + spark_args = [ + "--conf", "spark.driver.extraJavaOptions=-Djava.io.tmpdir={0}".format(tmp_dir), + "--conf", "spark.executor.extraJavaOptions=-Djava.io.tmpdir={0}".format(tmp_dir), + "pyspark-shell" + ] + env["PYSPARK_SUBMIT_ARGS"] = " ".join(spark_args) + + LOGGER.info("Starting test(%s): %s", pyspark_python, test_name) + start_time = time.time() + try: + per_test_output = tempfile.TemporaryFile() + retcode = subprocess.Popen( + [os.path.join(SPARK_HOME, "bin/pyspark")] + test_name.split(), + stderr=per_test_output, stdout=per_test_output, env=env).wait() + shutil.rmtree(tmp_dir, ignore_errors=True) + except: + LOGGER.exception("Got exception while running %s with %s", test_name, pyspark_python) + # Here, we use os._exit() instead of sys.exit() in order to force Python to exit even if + # this code is invoked from a thread other than the main thread. + os._exit(1) + duration = time.time() - start_time + # Exit on the first failure. + if retcode != 0: + try: + with FAILURE_REPORTING_LOCK: + with open(LOG_FILE, 'ab') as log_file: + per_test_output.seek(0) + log_file.writelines(per_test_output) + per_test_output.seek(0) + for line in per_test_output: + decoded_line = line.decode("utf-8", "replace") + if not re.match('[0-9]+', decoded_line): + print(decoded_line, end='') + per_test_output.close() + except: + LOGGER.exception("Got an exception while trying to print failed test output") + finally: + print_red("\nHad test failures in %s with %s; see logs." % (test_name, pyspark_python)) + # Here, we use os._exit() instead of sys.exit() in order to force Python to exit even if + # this code is invoked from a thread other than the main thread. + os._exit(-1) + else: + skipped_counts = 0 + try: + per_test_output.seek(0) + # Here expects skipped test output from unittest when verbosity level is + # 2 (or --verbose option is enabled). + decoded_lines = map(lambda line: line.decode("utf-8", "replace"), iter(per_test_output)) + skipped_tests = list(filter( + lambda line: re.search(r'test_.* \(pyspark\..*\) ... (skip|SKIP)', line), + decoded_lines)) + skipped_counts = len(skipped_tests) + if skipped_counts > 0: + key = (pyspark_python, test_name) + SKIPPED_TESTS[key] = skipped_tests + per_test_output.close() + except: + import traceback + print_red("\nGot an exception while trying to store " + "skipped test output:\n%s" % traceback.format_exc()) + # Here, we use os._exit() instead of sys.exit() in order to force Python to exit even if + # this code is invoked from a thread other than the main thread. + os._exit(-1) + if skipped_counts != 0: + LOGGER.info( + "Finished test(%s): %s (%is) ... %s tests were skipped", pyspark_python, test_name, + duration, skipped_counts) + else: + LOGGER.info( + "Finished test(%s): %s (%is)", pyspark_python, test_name, duration) + + +def get_default_python_executables(): + python_execs = [x for x in ["python3.6", "pypy"] if which(x)] + + if "python3.6" not in python_execs: + p = which("python3") + + if p: + py_out = subprocess.run([p, "--version"], stderr=subprocess.PIPE) + py_out = py_out.stderr.decode("utf-8") + python_version = tuple([int(i) for i in re.findall(r"\d", py_out)]) + + if python_version < (PYTHON_MAJOR_VERSION, PYTHON_MINOR_VERSION, PYTHON_MICRO_VERSION): + LOGGER.error("Python version %s.%s.%s is lower than minimum " + "required: %s.%s.%s" % (python_version[0], + python_version[1], + python_version[2], + PYTHON_MAJOR_VERSION, + PYTHON_MINOR_VERSION, + PYTHON_MICRO_VERSION)) + LOGGER.error("Exiting!") + os._exit(1) + else: + LOGGER.warning("Not testing against `python3.6` because it could not be found; " + "falling back to %s instead" % p) + python_execs.insert(0, p) + else: + LOGGER.error("No python3 executable found. Exiting!") + os._exit(1) + return python_execs + + +def parse_opts(): + parser = ArgumentParser( + prog="run-tests" + ) + parser.add_argument( + "--python-executables", type=str, default=','.join(get_default_python_executables()), + help="A comma-separated list of Python executables to test against (default: %(default)s)" + ) + parser.add_argument( + "--modules", type=str, + default=",".join(sorted(python_modules.keys())), + help="A comma-separated list of Python modules to test (default: %(default)s)" + ) + parser.add_argument( + "-p", "--parallelism", type=int, default=4, + help="The number of suites to test in parallel (default %(default)d)" + ) + parser.add_argument( + "--verbose", action="store_true", + help="Enable additional debug logging" + ) + + group = parser.add_argument_group("Developer Options") + group.add_argument( + "--testnames", type=str, + default=None, + help=( + "A comma-separated list of specific modules, classes and functions of doctest " + "or unittest to test. " + "For example, 'pyspark.sql.foo' to run the module as unittests or doctests, " + "'pyspark.sql.tests FooTests' to run the specific class of unittests, " + "'pyspark.sql.tests FooTests.test_foo' to run the specific unittest in the class. " + "'--modules' option is ignored if they are given.") + ) + + args, unknown = parser.parse_known_args() + if unknown: + parser.error("Unsupported arguments: %s" % ' '.join(unknown)) + if args.parallelism < 1: + parser.error("Parallelism cannot be less than 1") + return args + + +def _check_coverage(python_exec): + # Make sure if coverage is installed. + try: + subprocess_check_output( + [python_exec, "-c", "import coverage"], + stderr=open(os.devnull, 'w')) + except: + print_red("Coverage is not installed in Python executable '%s' " + "but 'COVERAGE_PROCESS_START' environment variable is set, " + "exiting." % python_exec) + sys.exit(-1) + + +def main(): + opts = parse_opts() + if opts.verbose: + log_level = logging.DEBUG + else: + log_level = logging.INFO + should_test_modules = opts.testnames is None + logging.basicConfig(stream=sys.stdout, level=log_level, format="%(message)s") + LOGGER.info("Running PySpark tests. Output is in %s", LOG_FILE) + if os.path.exists(LOG_FILE): + os.remove(LOG_FILE) + python_execs = opts.python_executables.split(',') + LOGGER.info("Will test against the following Python executables: %s", python_execs) + + if should_test_modules: + modules_to_test = [] + for module_name in opts.modules.split(','): + if module_name in python_modules: + modules_to_test.append(python_modules[module_name]) + else: + print("Error: unrecognized module '%s'. Supported modules: %s" % + (module_name, ", ".join(python_modules))) + sys.exit(-1) + LOGGER.info("Will test the following Python modules: %s", [x.name for x in modules_to_test]) + else: + testnames_to_test = opts.testnames.split(',') + LOGGER.info("Will test the following Python tests: %s", testnames_to_test) + + task_queue = Queue.PriorityQueue() + for python_exec in python_execs: + # Check if the python executable has coverage installed when 'COVERAGE_PROCESS_START' + # environmental variable is set. + if "COVERAGE_PROCESS_START" in os.environ: + _check_coverage(python_exec) + + python_implementation = subprocess_check_output( + [python_exec, "-c", "import platform; print(platform.python_implementation())"], + universal_newlines=True).strip() + LOGGER.debug("%s python_implementation is %s", python_exec, python_implementation) + LOGGER.debug("%s version is: %s", python_exec, subprocess_check_output( + [python_exec, "--version"], stderr=subprocess.STDOUT, universal_newlines=True).strip()) + if should_test_modules: + for module in modules_to_test: + if python_implementation not in module.blacklisted_python_implementations: + for test_goal in module.python_test_goals: + heavy_tests = ['pyspark.streaming.tests', 'pyspark.mllib.tests', + 'pyspark.tests', 'pyspark.sql.tests', 'pyspark.ml.tests'] + if any(map(lambda prefix: test_goal.startswith(prefix), heavy_tests)): + priority = 0 + else: + priority = 100 + task_queue.put((priority, (python_exec, test_goal))) + else: + for test_goal in testnames_to_test: + task_queue.put((0, (python_exec, test_goal))) + + # Create the target directory before starting tasks to avoid races. + target_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), 'target')) + if not os.path.isdir(target_dir): + os.mkdir(target_dir) + + def process_queue(task_queue): + while True: + try: + (priority, (python_exec, test_goal)) = task_queue.get_nowait() + except Queue.Empty: + break + try: + run_individual_python_test(target_dir, test_goal, python_exec) + finally: + task_queue.task_done() + + start_time = time.time() + for _ in range(opts.parallelism): + worker = Thread(target=process_queue, args=(task_queue,)) + worker.daemon = True + worker.start() + try: + task_queue.join() + except (KeyboardInterrupt, SystemExit): + print_red("Exiting due to interrupt") + sys.exit(-1) + total_duration = time.time() - start_time + LOGGER.info("Tests passed in %i seconds", total_duration) + + for key, lines in sorted(SKIPPED_TESTS.items()): + pyspark_python, test_name = key + LOGGER.info("\nSkipped tests in %s with %s:" % (test_name, pyspark_python)) + for line in lines: + LOGGER.info(" %s" % line.rstrip()) + + +if __name__ == "__main__": + main() diff --git a/python/run-tests.py b/python/run-tests.py index ac8ab38fe43e..f45e5787b05b 100755 --- a/python/run-tests.py +++ b/python/run-tests.py @@ -57,9 +57,9 @@ def print_red(text): FAILURE_REPORTING_LOCK = Lock() LOGGER = logging.getLogger() -PYTHON_MAJOR_VERSION=3 -PYTHON_MINOR_VERSION=6 -PYTHON_MICRO_VERSION=0 +PYTHON_MAJOR_VERSION = 3 +PYTHON_MINOR_VERSION = 6 +PYTHON_MICRO_VERSION = 0 # Find out where the assembly jars are located. # TODO: revisit for Scala 2.13 @@ -172,7 +172,7 @@ def get_default_python_executables(): if p: py_out = subprocess.run([p, "--version"], stderr=subprocess.PIPE) py_out = py_out.stderr.decode("utf-8") - python_version = tuple([int(i) for i in re.findall("\d", py_out)]) + python_version = tuple([int(i) for i in re.findall(r"\d", py_out)]) if python_version < (PYTHON_MAJOR_VERSION, PYTHON_MINOR_VERSION, PYTHON_MICRO_VERSION): LOGGER.error("Python version %s.%s.%s is lower than minimum " @@ -185,8 +185,8 @@ def get_default_python_executables(): LOGGER.error("Exiting!") os._exit(1) else: - LOGGER.warning("Not testing against `python3.6` because it could not be found; falling" - " back to %s instead" % p) + LOGGER.warning("Not testing against `python3.6` because it could not be found; " + "falling back to %s instead" % p) python_execs.insert(0, p) else: LOGGER.error("No python3 executable found. Exiting!") From 074947b274fb4e7ace9a405bcd1dbc9b95ee89d4 Mon Sep 17 00:00:00 2001 From: shane Date: Fri, 1 Nov 2019 10:46:16 -0700 Subject: [PATCH 10/31] Delete flycheck_run-tests.py no clue how this got here --- python/flycheck_run-tests.py | 346 ----------------------------------- 1 file changed, 346 deletions(-) delete mode 100644 python/flycheck_run-tests.py diff --git a/python/flycheck_run-tests.py b/python/flycheck_run-tests.py deleted file mode 100644 index f45e5787b05b..000000000000 --- a/python/flycheck_run-tests.py +++ /dev/null @@ -1,346 +0,0 @@ -#!/usr/bin/env python - -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from __future__ import print_function -import logging -from argparse import ArgumentParser -import os -import re -import shutil -import subprocess -import sys -import tempfile -from threading import Thread, Lock -import time -import uuid -if sys.version < '3': - import Queue -else: - import queue as Queue -from multiprocessing import Manager - - -# Append `SPARK_HOME/dev` to the Python path so that we can import the sparktestsupport module -sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), "../dev/")) - - -from sparktestsupport import SPARK_HOME # noqa (suppress pep8 warnings) -from sparktestsupport.shellutils import which, subprocess_check_output # noqa -from sparktestsupport.modules import all_modules, pyspark_sql # noqa - - -python_modules = dict((m.name, m) for m in all_modules if m.python_test_goals if m.name != 'root') - - -def print_red(text): - print('\033[31m' + text + '\033[0m') - - -SKIPPED_TESTS = Manager().dict() -LOG_FILE = os.path.join(SPARK_HOME, "python/unit-tests.log") -FAILURE_REPORTING_LOCK = Lock() -LOGGER = logging.getLogger() - -PYTHON_MAJOR_VERSION = 3 -PYTHON_MINOR_VERSION = 6 -PYTHON_MICRO_VERSION = 0 - -# Find out where the assembly jars are located. -# TODO: revisit for Scala 2.13 -for scala in ["2.12"]: - build_dir = os.path.join(SPARK_HOME, "assembly", "target", "scala-" + scala) - if os.path.isdir(build_dir): - SPARK_DIST_CLASSPATH = os.path.join(build_dir, "jars", "*") - break -else: - raise Exception("Cannot find assembly build directory, please build Spark first.") - - -def run_individual_python_test(target_dir, test_name, pyspark_python): - env = dict(os.environ) - env.update({ - 'SPARK_DIST_CLASSPATH': SPARK_DIST_CLASSPATH, - 'SPARK_TESTING': '1', - 'SPARK_PREPEND_CLASSES': '1', - 'PYSPARK_PYTHON': which(pyspark_python), - 'PYSPARK_DRIVER_PYTHON': which(pyspark_python) - }) - - # Create a unique temp directory under 'target/' for each run. The TMPDIR variable is - # recognized by the tempfile module to override the default system temp directory. - tmp_dir = os.path.join(target_dir, str(uuid.uuid4())) - while os.path.isdir(tmp_dir): - tmp_dir = os.path.join(target_dir, str(uuid.uuid4())) - os.mkdir(tmp_dir) - env["TMPDIR"] = tmp_dir - - # Also override the JVM's temp directory by setting driver and executor options. - spark_args = [ - "--conf", "spark.driver.extraJavaOptions=-Djava.io.tmpdir={0}".format(tmp_dir), - "--conf", "spark.executor.extraJavaOptions=-Djava.io.tmpdir={0}".format(tmp_dir), - "pyspark-shell" - ] - env["PYSPARK_SUBMIT_ARGS"] = " ".join(spark_args) - - LOGGER.info("Starting test(%s): %s", pyspark_python, test_name) - start_time = time.time() - try: - per_test_output = tempfile.TemporaryFile() - retcode = subprocess.Popen( - [os.path.join(SPARK_HOME, "bin/pyspark")] + test_name.split(), - stderr=per_test_output, stdout=per_test_output, env=env).wait() - shutil.rmtree(tmp_dir, ignore_errors=True) - except: - LOGGER.exception("Got exception while running %s with %s", test_name, pyspark_python) - # Here, we use os._exit() instead of sys.exit() in order to force Python to exit even if - # this code is invoked from a thread other than the main thread. - os._exit(1) - duration = time.time() - start_time - # Exit on the first failure. - if retcode != 0: - try: - with FAILURE_REPORTING_LOCK: - with open(LOG_FILE, 'ab') as log_file: - per_test_output.seek(0) - log_file.writelines(per_test_output) - per_test_output.seek(0) - for line in per_test_output: - decoded_line = line.decode("utf-8", "replace") - if not re.match('[0-9]+', decoded_line): - print(decoded_line, end='') - per_test_output.close() - except: - LOGGER.exception("Got an exception while trying to print failed test output") - finally: - print_red("\nHad test failures in %s with %s; see logs." % (test_name, pyspark_python)) - # Here, we use os._exit() instead of sys.exit() in order to force Python to exit even if - # this code is invoked from a thread other than the main thread. - os._exit(-1) - else: - skipped_counts = 0 - try: - per_test_output.seek(0) - # Here expects skipped test output from unittest when verbosity level is - # 2 (or --verbose option is enabled). - decoded_lines = map(lambda line: line.decode("utf-8", "replace"), iter(per_test_output)) - skipped_tests = list(filter( - lambda line: re.search(r'test_.* \(pyspark\..*\) ... (skip|SKIP)', line), - decoded_lines)) - skipped_counts = len(skipped_tests) - if skipped_counts > 0: - key = (pyspark_python, test_name) - SKIPPED_TESTS[key] = skipped_tests - per_test_output.close() - except: - import traceback - print_red("\nGot an exception while trying to store " - "skipped test output:\n%s" % traceback.format_exc()) - # Here, we use os._exit() instead of sys.exit() in order to force Python to exit even if - # this code is invoked from a thread other than the main thread. - os._exit(-1) - if skipped_counts != 0: - LOGGER.info( - "Finished test(%s): %s (%is) ... %s tests were skipped", pyspark_python, test_name, - duration, skipped_counts) - else: - LOGGER.info( - "Finished test(%s): %s (%is)", pyspark_python, test_name, duration) - - -def get_default_python_executables(): - python_execs = [x for x in ["python3.6", "pypy"] if which(x)] - - if "python3.6" not in python_execs: - p = which("python3") - - if p: - py_out = subprocess.run([p, "--version"], stderr=subprocess.PIPE) - py_out = py_out.stderr.decode("utf-8") - python_version = tuple([int(i) for i in re.findall(r"\d", py_out)]) - - if python_version < (PYTHON_MAJOR_VERSION, PYTHON_MINOR_VERSION, PYTHON_MICRO_VERSION): - LOGGER.error("Python version %s.%s.%s is lower than minimum " - "required: %s.%s.%s" % (python_version[0], - python_version[1], - python_version[2], - PYTHON_MAJOR_VERSION, - PYTHON_MINOR_VERSION, - PYTHON_MICRO_VERSION)) - LOGGER.error("Exiting!") - os._exit(1) - else: - LOGGER.warning("Not testing against `python3.6` because it could not be found; " - "falling back to %s instead" % p) - python_execs.insert(0, p) - else: - LOGGER.error("No python3 executable found. Exiting!") - os._exit(1) - return python_execs - - -def parse_opts(): - parser = ArgumentParser( - prog="run-tests" - ) - parser.add_argument( - "--python-executables", type=str, default=','.join(get_default_python_executables()), - help="A comma-separated list of Python executables to test against (default: %(default)s)" - ) - parser.add_argument( - "--modules", type=str, - default=",".join(sorted(python_modules.keys())), - help="A comma-separated list of Python modules to test (default: %(default)s)" - ) - parser.add_argument( - "-p", "--parallelism", type=int, default=4, - help="The number of suites to test in parallel (default %(default)d)" - ) - parser.add_argument( - "--verbose", action="store_true", - help="Enable additional debug logging" - ) - - group = parser.add_argument_group("Developer Options") - group.add_argument( - "--testnames", type=str, - default=None, - help=( - "A comma-separated list of specific modules, classes and functions of doctest " - "or unittest to test. " - "For example, 'pyspark.sql.foo' to run the module as unittests or doctests, " - "'pyspark.sql.tests FooTests' to run the specific class of unittests, " - "'pyspark.sql.tests FooTests.test_foo' to run the specific unittest in the class. " - "'--modules' option is ignored if they are given.") - ) - - args, unknown = parser.parse_known_args() - if unknown: - parser.error("Unsupported arguments: %s" % ' '.join(unknown)) - if args.parallelism < 1: - parser.error("Parallelism cannot be less than 1") - return args - - -def _check_coverage(python_exec): - # Make sure if coverage is installed. - try: - subprocess_check_output( - [python_exec, "-c", "import coverage"], - stderr=open(os.devnull, 'w')) - except: - print_red("Coverage is not installed in Python executable '%s' " - "but 'COVERAGE_PROCESS_START' environment variable is set, " - "exiting." % python_exec) - sys.exit(-1) - - -def main(): - opts = parse_opts() - if opts.verbose: - log_level = logging.DEBUG - else: - log_level = logging.INFO - should_test_modules = opts.testnames is None - logging.basicConfig(stream=sys.stdout, level=log_level, format="%(message)s") - LOGGER.info("Running PySpark tests. Output is in %s", LOG_FILE) - if os.path.exists(LOG_FILE): - os.remove(LOG_FILE) - python_execs = opts.python_executables.split(',') - LOGGER.info("Will test against the following Python executables: %s", python_execs) - - if should_test_modules: - modules_to_test = [] - for module_name in opts.modules.split(','): - if module_name in python_modules: - modules_to_test.append(python_modules[module_name]) - else: - print("Error: unrecognized module '%s'. Supported modules: %s" % - (module_name, ", ".join(python_modules))) - sys.exit(-1) - LOGGER.info("Will test the following Python modules: %s", [x.name for x in modules_to_test]) - else: - testnames_to_test = opts.testnames.split(',') - LOGGER.info("Will test the following Python tests: %s", testnames_to_test) - - task_queue = Queue.PriorityQueue() - for python_exec in python_execs: - # Check if the python executable has coverage installed when 'COVERAGE_PROCESS_START' - # environmental variable is set. - if "COVERAGE_PROCESS_START" in os.environ: - _check_coverage(python_exec) - - python_implementation = subprocess_check_output( - [python_exec, "-c", "import platform; print(platform.python_implementation())"], - universal_newlines=True).strip() - LOGGER.debug("%s python_implementation is %s", python_exec, python_implementation) - LOGGER.debug("%s version is: %s", python_exec, subprocess_check_output( - [python_exec, "--version"], stderr=subprocess.STDOUT, universal_newlines=True).strip()) - if should_test_modules: - for module in modules_to_test: - if python_implementation not in module.blacklisted_python_implementations: - for test_goal in module.python_test_goals: - heavy_tests = ['pyspark.streaming.tests', 'pyspark.mllib.tests', - 'pyspark.tests', 'pyspark.sql.tests', 'pyspark.ml.tests'] - if any(map(lambda prefix: test_goal.startswith(prefix), heavy_tests)): - priority = 0 - else: - priority = 100 - task_queue.put((priority, (python_exec, test_goal))) - else: - for test_goal in testnames_to_test: - task_queue.put((0, (python_exec, test_goal))) - - # Create the target directory before starting tasks to avoid races. - target_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), 'target')) - if not os.path.isdir(target_dir): - os.mkdir(target_dir) - - def process_queue(task_queue): - while True: - try: - (priority, (python_exec, test_goal)) = task_queue.get_nowait() - except Queue.Empty: - break - try: - run_individual_python_test(target_dir, test_goal, python_exec) - finally: - task_queue.task_done() - - start_time = time.time() - for _ in range(opts.parallelism): - worker = Thread(target=process_queue, args=(task_queue,)) - worker.daemon = True - worker.start() - try: - task_queue.join() - except (KeyboardInterrupt, SystemExit): - print_red("Exiting due to interrupt") - sys.exit(-1) - total_duration = time.time() - start_time - LOGGER.info("Tests passed in %i seconds", total_duration) - - for key, lines in sorted(SKIPPED_TESTS.items()): - pyspark_python, test_name = key - LOGGER.info("\nSkipped tests in %s with %s:" % (test_name, pyspark_python)) - for line in lines: - LOGGER.info(" %s" % line.rstrip()) - - -if __name__ == "__main__": - main() From 9b52677e178374c5e917a2f7006b55d205e10eb9 Mon Sep 17 00:00:00 2001 From: shane knapp Date: Tue, 5 Nov 2019 10:34:51 -0800 Subject: [PATCH 11/31] move python version check to bash wrapper --- python/run-tests | 8 +++++++- python/run-tests.py | 28 ++-------------------------- 2 files changed, 9 insertions(+), 27 deletions(-) diff --git a/python/run-tests b/python/run-tests index 24949657ed7a..b8c64d8a295a 100755 --- a/python/run-tests +++ b/python/run-tests @@ -21,4 +21,10 @@ FWDIR="$(cd "`dirname $0`"/..; pwd)" cd "$FWDIR" -exec python -u ./python/run-tests.py "$@" +PYTHON_VERSION_CHECK=$(python3 -c 'import sys; print(sys.version_info < (3, 6, 0))') +if [[ "$PYTHON_VERSION_CHECK" == "True" ]]; then + echo "Python versions prior to 3.6 are not supported." + exit -1 +fi + +exec python3 -u ./python/run-tests.py "$@" diff --git a/python/run-tests.py b/python/run-tests.py index f45e5787b05b..467469cbf6b2 100755 --- a/python/run-tests.py +++ b/python/run-tests.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # # Licensed to the Apache Software Foundation (ASF) under one or more @@ -57,10 +57,6 @@ def print_red(text): FAILURE_REPORTING_LOCK = Lock() LOGGER = logging.getLogger() -PYTHON_MAJOR_VERSION = 3 -PYTHON_MINOR_VERSION = 6 -PYTHON_MICRO_VERSION = 0 - # Find out where the assembly jars are located. # TODO: revisit for Scala 2.13 for scala in ["2.12"]: @@ -168,27 +164,7 @@ def get_default_python_executables(): if "python3.6" not in python_execs: p = which("python3") - - if p: - py_out = subprocess.run([p, "--version"], stderr=subprocess.PIPE) - py_out = py_out.stderr.decode("utf-8") - python_version = tuple([int(i) for i in re.findall(r"\d", py_out)]) - - if python_version < (PYTHON_MAJOR_VERSION, PYTHON_MINOR_VERSION, PYTHON_MICRO_VERSION): - LOGGER.error("Python version %s.%s.%s is lower than minimum " - "required: %s.%s.%s" % (python_version[0], - python_version[1], - python_version[2], - PYTHON_MAJOR_VERSION, - PYTHON_MINOR_VERSION, - PYTHON_MICRO_VERSION)) - LOGGER.error("Exiting!") - os._exit(1) - else: - LOGGER.warning("Not testing against `python3.6` because it could not be found; " - "falling back to %s instead" % p) - python_execs.insert(0, p) - else: + if not p: LOGGER.error("No python3 executable found. Exiting!") os._exit(1) return python_execs From d3226d24c18bcd49180f454cf56ae80a6701f901 Mon Sep 17 00:00:00 2001 From: shane knapp Date: Tue, 5 Nov 2019 10:36:30 -0800 Subject: [PATCH 12/31] bump test running infra to 3.6 min --- dev/run-tests | 6 +++--- dev/run-tests-jenkins | 6 +++--- dev/run-tests-jenkins.py | 2 +- dev/run-tests.py | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/dev/run-tests b/dev/run-tests index 9cf93d000d0e..bf4b46eb1e65 100755 --- a/dev/run-tests +++ b/dev/run-tests @@ -20,10 +20,10 @@ FWDIR="$(cd "`dirname $0`"/..; pwd)" cd "$FWDIR" -PYTHON_VERSION_CHECK=$(python -c 'import sys; print(sys.version_info < (2, 7, 0))') +PYTHON_VERSION_CHECK=$(python -c 'import sys; print(sys.version_info < (3, 6, 0))') if [[ "$PYTHON_VERSION_CHECK" == "True" ]]; then - echo "Python versions prior to 2.7 are not supported." + echo "Python versions prior to 3.6 are not supported." exit -1 fi -exec python -u ./dev/run-tests.py "$@" +exec python3 -u ./dev/run-tests.py "$@" diff --git a/dev/run-tests-jenkins b/dev/run-tests-jenkins index 5bc03e41d1f2..27beb0352212 100755 --- a/dev/run-tests-jenkins +++ b/dev/run-tests-jenkins @@ -25,10 +25,10 @@ FWDIR="$( cd "$( dirname "$0" )/.." && pwd )" cd "$FWDIR" -PYTHON_VERSION_CHECK=$(python -c 'import sys; print(sys.version_info < (2, 7, 0))') +PYTHON_VERSION_CHECK=$(python -c 'import sys; print(sys.version_info < (3, 6, 0))') if [[ "$PYTHON_VERSION_CHECK" == "True" ]]; then - echo "Python versions prior to 2.7 are not supported." + echo "Python versions prior to 3.6 are not supported." exit -1 fi -exec python -u ./dev/run-tests-jenkins.py "$@" +exec python3 -u ./dev/run-tests-jenkins.py "$@" diff --git a/dev/run-tests-jenkins.py b/dev/run-tests-jenkins.py index e9b0b327603b..6ffe7d8cd136 100755 --- a/dev/run-tests-jenkins.py +++ b/dev/run-tests-jenkins.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # # Licensed to the Apache Software Foundation (ASF) under one or more diff --git a/dev/run-tests.py b/dev/run-tests.py index ea515708124d..7a1729c66474 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # # Licensed to the Apache Software Foundation (ASF) under one or more From a8a249e491b6e38d0f0dbe8b02b81b9574384ad4 Mon Sep 17 00:00:00 2001 From: shane knapp Date: Tue, 5 Nov 2019 10:36:51 -0800 Subject: [PATCH 13/31] pesky flycheck wtf --- python/flycheck_run-tests.py | 346 ----------------------------------- 1 file changed, 346 deletions(-) delete mode 100644 python/flycheck_run-tests.py diff --git a/python/flycheck_run-tests.py b/python/flycheck_run-tests.py deleted file mode 100644 index f45e5787b05b..000000000000 --- a/python/flycheck_run-tests.py +++ /dev/null @@ -1,346 +0,0 @@ -#!/usr/bin/env python - -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from __future__ import print_function -import logging -from argparse import ArgumentParser -import os -import re -import shutil -import subprocess -import sys -import tempfile -from threading import Thread, Lock -import time -import uuid -if sys.version < '3': - import Queue -else: - import queue as Queue -from multiprocessing import Manager - - -# Append `SPARK_HOME/dev` to the Python path so that we can import the sparktestsupport module -sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), "../dev/")) - - -from sparktestsupport import SPARK_HOME # noqa (suppress pep8 warnings) -from sparktestsupport.shellutils import which, subprocess_check_output # noqa -from sparktestsupport.modules import all_modules, pyspark_sql # noqa - - -python_modules = dict((m.name, m) for m in all_modules if m.python_test_goals if m.name != 'root') - - -def print_red(text): - print('\033[31m' + text + '\033[0m') - - -SKIPPED_TESTS = Manager().dict() -LOG_FILE = os.path.join(SPARK_HOME, "python/unit-tests.log") -FAILURE_REPORTING_LOCK = Lock() -LOGGER = logging.getLogger() - -PYTHON_MAJOR_VERSION = 3 -PYTHON_MINOR_VERSION = 6 -PYTHON_MICRO_VERSION = 0 - -# Find out where the assembly jars are located. -# TODO: revisit for Scala 2.13 -for scala in ["2.12"]: - build_dir = os.path.join(SPARK_HOME, "assembly", "target", "scala-" + scala) - if os.path.isdir(build_dir): - SPARK_DIST_CLASSPATH = os.path.join(build_dir, "jars", "*") - break -else: - raise Exception("Cannot find assembly build directory, please build Spark first.") - - -def run_individual_python_test(target_dir, test_name, pyspark_python): - env = dict(os.environ) - env.update({ - 'SPARK_DIST_CLASSPATH': SPARK_DIST_CLASSPATH, - 'SPARK_TESTING': '1', - 'SPARK_PREPEND_CLASSES': '1', - 'PYSPARK_PYTHON': which(pyspark_python), - 'PYSPARK_DRIVER_PYTHON': which(pyspark_python) - }) - - # Create a unique temp directory under 'target/' for each run. The TMPDIR variable is - # recognized by the tempfile module to override the default system temp directory. - tmp_dir = os.path.join(target_dir, str(uuid.uuid4())) - while os.path.isdir(tmp_dir): - tmp_dir = os.path.join(target_dir, str(uuid.uuid4())) - os.mkdir(tmp_dir) - env["TMPDIR"] = tmp_dir - - # Also override the JVM's temp directory by setting driver and executor options. - spark_args = [ - "--conf", "spark.driver.extraJavaOptions=-Djava.io.tmpdir={0}".format(tmp_dir), - "--conf", "spark.executor.extraJavaOptions=-Djava.io.tmpdir={0}".format(tmp_dir), - "pyspark-shell" - ] - env["PYSPARK_SUBMIT_ARGS"] = " ".join(spark_args) - - LOGGER.info("Starting test(%s): %s", pyspark_python, test_name) - start_time = time.time() - try: - per_test_output = tempfile.TemporaryFile() - retcode = subprocess.Popen( - [os.path.join(SPARK_HOME, "bin/pyspark")] + test_name.split(), - stderr=per_test_output, stdout=per_test_output, env=env).wait() - shutil.rmtree(tmp_dir, ignore_errors=True) - except: - LOGGER.exception("Got exception while running %s with %s", test_name, pyspark_python) - # Here, we use os._exit() instead of sys.exit() in order to force Python to exit even if - # this code is invoked from a thread other than the main thread. - os._exit(1) - duration = time.time() - start_time - # Exit on the first failure. - if retcode != 0: - try: - with FAILURE_REPORTING_LOCK: - with open(LOG_FILE, 'ab') as log_file: - per_test_output.seek(0) - log_file.writelines(per_test_output) - per_test_output.seek(0) - for line in per_test_output: - decoded_line = line.decode("utf-8", "replace") - if not re.match('[0-9]+', decoded_line): - print(decoded_line, end='') - per_test_output.close() - except: - LOGGER.exception("Got an exception while trying to print failed test output") - finally: - print_red("\nHad test failures in %s with %s; see logs." % (test_name, pyspark_python)) - # Here, we use os._exit() instead of sys.exit() in order to force Python to exit even if - # this code is invoked from a thread other than the main thread. - os._exit(-1) - else: - skipped_counts = 0 - try: - per_test_output.seek(0) - # Here expects skipped test output from unittest when verbosity level is - # 2 (or --verbose option is enabled). - decoded_lines = map(lambda line: line.decode("utf-8", "replace"), iter(per_test_output)) - skipped_tests = list(filter( - lambda line: re.search(r'test_.* \(pyspark\..*\) ... (skip|SKIP)', line), - decoded_lines)) - skipped_counts = len(skipped_tests) - if skipped_counts > 0: - key = (pyspark_python, test_name) - SKIPPED_TESTS[key] = skipped_tests - per_test_output.close() - except: - import traceback - print_red("\nGot an exception while trying to store " - "skipped test output:\n%s" % traceback.format_exc()) - # Here, we use os._exit() instead of sys.exit() in order to force Python to exit even if - # this code is invoked from a thread other than the main thread. - os._exit(-1) - if skipped_counts != 0: - LOGGER.info( - "Finished test(%s): %s (%is) ... %s tests were skipped", pyspark_python, test_name, - duration, skipped_counts) - else: - LOGGER.info( - "Finished test(%s): %s (%is)", pyspark_python, test_name, duration) - - -def get_default_python_executables(): - python_execs = [x for x in ["python3.6", "pypy"] if which(x)] - - if "python3.6" not in python_execs: - p = which("python3") - - if p: - py_out = subprocess.run([p, "--version"], stderr=subprocess.PIPE) - py_out = py_out.stderr.decode("utf-8") - python_version = tuple([int(i) for i in re.findall(r"\d", py_out)]) - - if python_version < (PYTHON_MAJOR_VERSION, PYTHON_MINOR_VERSION, PYTHON_MICRO_VERSION): - LOGGER.error("Python version %s.%s.%s is lower than minimum " - "required: %s.%s.%s" % (python_version[0], - python_version[1], - python_version[2], - PYTHON_MAJOR_VERSION, - PYTHON_MINOR_VERSION, - PYTHON_MICRO_VERSION)) - LOGGER.error("Exiting!") - os._exit(1) - else: - LOGGER.warning("Not testing against `python3.6` because it could not be found; " - "falling back to %s instead" % p) - python_execs.insert(0, p) - else: - LOGGER.error("No python3 executable found. Exiting!") - os._exit(1) - return python_execs - - -def parse_opts(): - parser = ArgumentParser( - prog="run-tests" - ) - parser.add_argument( - "--python-executables", type=str, default=','.join(get_default_python_executables()), - help="A comma-separated list of Python executables to test against (default: %(default)s)" - ) - parser.add_argument( - "--modules", type=str, - default=",".join(sorted(python_modules.keys())), - help="A comma-separated list of Python modules to test (default: %(default)s)" - ) - parser.add_argument( - "-p", "--parallelism", type=int, default=4, - help="The number of suites to test in parallel (default %(default)d)" - ) - parser.add_argument( - "--verbose", action="store_true", - help="Enable additional debug logging" - ) - - group = parser.add_argument_group("Developer Options") - group.add_argument( - "--testnames", type=str, - default=None, - help=( - "A comma-separated list of specific modules, classes and functions of doctest " - "or unittest to test. " - "For example, 'pyspark.sql.foo' to run the module as unittests or doctests, " - "'pyspark.sql.tests FooTests' to run the specific class of unittests, " - "'pyspark.sql.tests FooTests.test_foo' to run the specific unittest in the class. " - "'--modules' option is ignored if they are given.") - ) - - args, unknown = parser.parse_known_args() - if unknown: - parser.error("Unsupported arguments: %s" % ' '.join(unknown)) - if args.parallelism < 1: - parser.error("Parallelism cannot be less than 1") - return args - - -def _check_coverage(python_exec): - # Make sure if coverage is installed. - try: - subprocess_check_output( - [python_exec, "-c", "import coverage"], - stderr=open(os.devnull, 'w')) - except: - print_red("Coverage is not installed in Python executable '%s' " - "but 'COVERAGE_PROCESS_START' environment variable is set, " - "exiting." % python_exec) - sys.exit(-1) - - -def main(): - opts = parse_opts() - if opts.verbose: - log_level = logging.DEBUG - else: - log_level = logging.INFO - should_test_modules = opts.testnames is None - logging.basicConfig(stream=sys.stdout, level=log_level, format="%(message)s") - LOGGER.info("Running PySpark tests. Output is in %s", LOG_FILE) - if os.path.exists(LOG_FILE): - os.remove(LOG_FILE) - python_execs = opts.python_executables.split(',') - LOGGER.info("Will test against the following Python executables: %s", python_execs) - - if should_test_modules: - modules_to_test = [] - for module_name in opts.modules.split(','): - if module_name in python_modules: - modules_to_test.append(python_modules[module_name]) - else: - print("Error: unrecognized module '%s'. Supported modules: %s" % - (module_name, ", ".join(python_modules))) - sys.exit(-1) - LOGGER.info("Will test the following Python modules: %s", [x.name for x in modules_to_test]) - else: - testnames_to_test = opts.testnames.split(',') - LOGGER.info("Will test the following Python tests: %s", testnames_to_test) - - task_queue = Queue.PriorityQueue() - for python_exec in python_execs: - # Check if the python executable has coverage installed when 'COVERAGE_PROCESS_START' - # environmental variable is set. - if "COVERAGE_PROCESS_START" in os.environ: - _check_coverage(python_exec) - - python_implementation = subprocess_check_output( - [python_exec, "-c", "import platform; print(platform.python_implementation())"], - universal_newlines=True).strip() - LOGGER.debug("%s python_implementation is %s", python_exec, python_implementation) - LOGGER.debug("%s version is: %s", python_exec, subprocess_check_output( - [python_exec, "--version"], stderr=subprocess.STDOUT, universal_newlines=True).strip()) - if should_test_modules: - for module in modules_to_test: - if python_implementation not in module.blacklisted_python_implementations: - for test_goal in module.python_test_goals: - heavy_tests = ['pyspark.streaming.tests', 'pyspark.mllib.tests', - 'pyspark.tests', 'pyspark.sql.tests', 'pyspark.ml.tests'] - if any(map(lambda prefix: test_goal.startswith(prefix), heavy_tests)): - priority = 0 - else: - priority = 100 - task_queue.put((priority, (python_exec, test_goal))) - else: - for test_goal in testnames_to_test: - task_queue.put((0, (python_exec, test_goal))) - - # Create the target directory before starting tasks to avoid races. - target_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), 'target')) - if not os.path.isdir(target_dir): - os.mkdir(target_dir) - - def process_queue(task_queue): - while True: - try: - (priority, (python_exec, test_goal)) = task_queue.get_nowait() - except Queue.Empty: - break - try: - run_individual_python_test(target_dir, test_goal, python_exec) - finally: - task_queue.task_done() - - start_time = time.time() - for _ in range(opts.parallelism): - worker = Thread(target=process_queue, args=(task_queue,)) - worker.daemon = True - worker.start() - try: - task_queue.join() - except (KeyboardInterrupt, SystemExit): - print_red("Exiting due to interrupt") - sys.exit(-1) - total_duration = time.time() - start_time - LOGGER.info("Tests passed in %i seconds", total_duration) - - for key, lines in sorted(SKIPPED_TESTS.items()): - pyspark_python, test_name = key - LOGGER.info("\nSkipped tests in %s with %s:" % (test_name, pyspark_python)) - for line in lines: - LOGGER.info(" %s" % line.rstrip()) - - -if __name__ == "__main__": - main() From 8441af0506d76c8351d8decfe37d49425aee04cf Mon Sep 17 00:00:00 2001 From: shane knapp Date: Tue, 5 Nov 2019 12:32:49 -0800 Subject: [PATCH 14/31] need to set PATH here otherwise it will fail the build --- dev/run-tests-jenkins | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dev/run-tests-jenkins b/dev/run-tests-jenkins index 27beb0352212..c3adc696a512 100755 --- a/dev/run-tests-jenkins +++ b/dev/run-tests-jenkins @@ -25,7 +25,9 @@ FWDIR="$( cd "$( dirname "$0" )/.." && pwd )" cd "$FWDIR" -PYTHON_VERSION_CHECK=$(python -c 'import sys; print(sys.version_info < (3, 6, 0))') +export PATH=/home/anaconda/envs/py36/bin:$PATH + +PYTHON_VERSION_CHECK=$(python3 -c 'import sys; print(sys.version_info < (3, 6, 0))') if [[ "$PYTHON_VERSION_CHECK" == "True" ]]; then echo "Python versions prior to 3.6 are not supported." exit -1 From cab1683df562ec716a33b3eafe274db2520bed35 Mon Sep 17 00:00:00 2001 From: shane knapp Date: Tue, 5 Nov 2019 12:33:13 -0800 Subject: [PATCH 15/31] s/python/python3 --- dev/run-tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/run-tests b/dev/run-tests index bf4b46eb1e65..143d78ec6373 100755 --- a/dev/run-tests +++ b/dev/run-tests @@ -20,7 +20,7 @@ FWDIR="$(cd "`dirname $0`"/..; pwd)" cd "$FWDIR" -PYTHON_VERSION_CHECK=$(python -c 'import sys; print(sys.version_info < (3, 6, 0))') +PYTHON_VERSION_CHECK=$(python3 -c 'import sys; print(sys.version_info < (3, 6, 0))') if [[ "$PYTHON_VERSION_CHECK" == "True" ]]; then echo "Python versions prior to 3.6 are not supported." exit -1 From a5b056b10b73dcf6c1aaafea65acca87ec03f1ea Mon Sep 17 00:00:00 2001 From: shane knapp Date: Thu, 7 Nov 2019 11:10:40 -0800 Subject: [PATCH 16/31] remove __future__.print_function import as it's not necessary and try to fix build output --- dev/pip-sanity-check.py | 2 -- dev/run-tests-jenkins.py | 1 - dev/run-tests.py | 1 - dev/sparktestsupport/shellutils.py | 1 - python/run-tests.py | 1 - 5 files changed, 6 deletions(-) diff --git a/dev/pip-sanity-check.py b/dev/pip-sanity-check.py index 4171f28684d5..e9f10233b12b 100644 --- a/dev/pip-sanity-check.py +++ b/dev/pip-sanity-check.py @@ -15,8 +15,6 @@ # limitations under the License. # -from __future__ import print_function - from pyspark.sql import SparkSession from pyspark.mllib.linalg import * import sys diff --git a/dev/run-tests-jenkins.py b/dev/run-tests-jenkins.py index 6ffe7d8cd136..5429aeba8ea1 100755 --- a/dev/run-tests-jenkins.py +++ b/dev/run-tests-jenkins.py @@ -17,7 +17,6 @@ # limitations under the License. # -from __future__ import print_function import os import sys import json diff --git a/dev/run-tests.py b/dev/run-tests.py index 7a1729c66474..e8f1bd0b371f 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -17,7 +17,6 @@ # limitations under the License. # -from __future__ import print_function import itertools from argparse import ArgumentParser import os diff --git a/dev/sparktestsupport/shellutils.py b/dev/sparktestsupport/shellutils.py index ec6ea86269f5..f9c800b77764 100644 --- a/dev/sparktestsupport/shellutils.py +++ b/dev/sparktestsupport/shellutils.py @@ -15,7 +15,6 @@ # limitations under the License. # -from __future__ import print_function import os import shutil import subprocess diff --git a/python/run-tests.py b/python/run-tests.py index 467469cbf6b2..bf752b111351 100755 --- a/python/run-tests.py +++ b/python/run-tests.py @@ -17,7 +17,6 @@ # limitations under the License. # -from __future__ import print_function import logging from argparse import ArgumentParser import os From aa893c60b2af080d887468042c8a2c403b9d4100 Mon Sep 17 00:00:00 2001 From: shane knapp Date: Thu, 7 Nov 2019 13:37:57 -0800 Subject: [PATCH 17/31] python 3.5+ should use subprocess.run, also might fix bytestring encoding --- dev/sparktestsupport/shellutils.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/dev/sparktestsupport/shellutils.py b/dev/sparktestsupport/shellutils.py index f9c800b77764..756137847a1c 100644 --- a/dev/sparktestsupport/shellutils.py +++ b/dev/sparktestsupport/shellutils.py @@ -21,8 +21,6 @@ import sys subprocess_check_output = subprocess.check_output -subprocess_check_call = subprocess.check_call - def exit_from_command_with_retcode(cmd, retcode): if retcode < 0: @@ -56,7 +54,7 @@ def run_cmd(cmd, return_output=False): if return_output: return subprocess_check_output(cmd).decode(sys.getdefaultencoding()) else: - return subprocess_check_call(cmd) + return subprocess.run(cmd, encoding=sys.getdefaultencoding()).returncode except subprocess.CalledProcessError as e: exit_from_command_with_retcode(e.cmd, e.returncode) From 3036fa66826bac32d28cd9b8f39a50a32872393a Mon Sep 17 00:00:00 2001 From: shane knapp Date: Thu, 7 Nov 2019 13:54:32 -0800 Subject: [PATCH 18/31] pycodestyle --- dev/sparktestsupport/shellutils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dev/sparktestsupport/shellutils.py b/dev/sparktestsupport/shellutils.py index 756137847a1c..cb83f1d81b87 100644 --- a/dev/sparktestsupport/shellutils.py +++ b/dev/sparktestsupport/shellutils.py @@ -22,6 +22,7 @@ subprocess_check_output = subprocess.check_output + def exit_from_command_with_retcode(cmd, retcode): if retcode < 0: print("[error] running", ' '.join(cmd), "; process was terminated by signal", -retcode) From 99a945bc517c4f54d5a8d235a076e5e763ef13c1 Mon Sep 17 00:00:00 2001 From: shane knapp Date: Thu, 7 Nov 2019 14:45:23 -0800 Subject: [PATCH 19/31] check=True needed --- dev/sparktestsupport/shellutils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/sparktestsupport/shellutils.py b/dev/sparktestsupport/shellutils.py index cb83f1d81b87..7a67093755d5 100644 --- a/dev/sparktestsupport/shellutils.py +++ b/dev/sparktestsupport/shellutils.py @@ -55,7 +55,7 @@ def run_cmd(cmd, return_output=False): if return_output: return subprocess_check_output(cmd).decode(sys.getdefaultencoding()) else: - return subprocess.run(cmd, encoding=sys.getdefaultencoding()).returncode + return subprocess.run(cmd, check=True, encoding=sys.getdefaultencoding()).returncode except subprocess.CalledProcessError as e: exit_from_command_with_retcode(e.cmd, e.returncode) From 4986769ea094c97d06f2d328dc086982acd23229 Mon Sep 17 00:00:00 2001 From: shane knapp Date: Thu, 7 Nov 2019 15:25:21 -0800 Subject: [PATCH 20/31] think i found a way to decode the bytestring --- dev/sparktestsupport/shellutils.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dev/sparktestsupport/shellutils.py b/dev/sparktestsupport/shellutils.py index 7a67093755d5..e28379224f91 100644 --- a/dev/sparktestsupport/shellutils.py +++ b/dev/sparktestsupport/shellutils.py @@ -55,7 +55,12 @@ def run_cmd(cmd, return_output=False): if return_output: return subprocess_check_output(cmd).decode(sys.getdefaultencoding()) else: - return subprocess.run(cmd, check=True, encoding=sys.getdefaultencoding()).returncode + popen = subprocess.Popen(cmd, stdout=subprocess.PIPE, universal_newlines=True) + for stdout_line in popen.stdout: + print(stdout_line.decode(sys.getdefaultencoding()), end='') + popen.stdout.close() + return_code = popen.wait() + return return_code except subprocess.CalledProcessError as e: exit_from_command_with_retcode(e.cmd, e.returncode) From 465acf183b30cc971ae9fefdc40861cad40a2d34 Mon Sep 17 00:00:00 2001 From: shane knapp Date: Thu, 7 Nov 2019 16:30:43 -0800 Subject: [PATCH 21/31] encode/recode --- dev/sparktestsupport/shellutils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dev/sparktestsupport/shellutils.py b/dev/sparktestsupport/shellutils.py index e28379224f91..807b6cddb6f2 100644 --- a/dev/sparktestsupport/shellutils.py +++ b/dev/sparktestsupport/shellutils.py @@ -57,7 +57,8 @@ def run_cmd(cmd, return_output=False): else: popen = subprocess.Popen(cmd, stdout=subprocess.PIPE, universal_newlines=True) for stdout_line in popen.stdout: - print(stdout_line.decode(sys.getdefaultencoding()), end='') + line = stdout_line.encode(sys.getdefaultencoding()) + print(line.decode(sys.getdefaultencoding()), end='') popen.stdout.close() return_code = popen.wait() return return_code From b66b3894c9a0f47e55cbada7b13255cbbb96de8a Mon Sep 17 00:00:00 2001 From: shane knapp Date: Thu, 7 Nov 2019 17:02:17 -0800 Subject: [PATCH 22/31] reading docs helps --- dev/sparktestsupport/shellutils.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/dev/sparktestsupport/shellutils.py b/dev/sparktestsupport/shellutils.py index 807b6cddb6f2..29773b1bfe35 100644 --- a/dev/sparktestsupport/shellutils.py +++ b/dev/sparktestsupport/shellutils.py @@ -55,10 +55,9 @@ def run_cmd(cmd, return_output=False): if return_output: return subprocess_check_output(cmd).decode(sys.getdefaultencoding()) else: - popen = subprocess.Popen(cmd, stdout=subprocess.PIPE, universal_newlines=True) + popen = subprocess.Popen(cmd, stdout=subprocess.PIPE, encoding=sys.getdefaultencoding()) for stdout_line in popen.stdout: - line = stdout_line.encode(sys.getdefaultencoding()) - print(line.decode(sys.getdefaultencoding()), end='') + print(stdout_line.decode(sys.getdefaultencoding()), end='') popen.stdout.close() return_code = popen.wait() return return_code From 64ccb8102dff674ab0d359797f18edebe34b7ef9 Mon Sep 17 00:00:00 2001 From: shane knapp Date: Thu, 7 Nov 2019 17:06:00 -0800 Subject: [PATCH 23/31] reading docs helps even more --- dev/sparktestsupport/shellutils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/sparktestsupport/shellutils.py b/dev/sparktestsupport/shellutils.py index 29773b1bfe35..8ab784480c60 100644 --- a/dev/sparktestsupport/shellutils.py +++ b/dev/sparktestsupport/shellutils.py @@ -55,7 +55,7 @@ def run_cmd(cmd, return_output=False): if return_output: return subprocess_check_output(cmd).decode(sys.getdefaultencoding()) else: - popen = subprocess.Popen(cmd, stdout=subprocess.PIPE, encoding=sys.getdefaultencoding()) + popen = subprocess.Popen(cmd, stdout=subprocess.PIPE) for stdout_line in popen.stdout: print(stdout_line.decode(sys.getdefaultencoding()), end='') popen.stdout.close() From 16ae236d0748abd5cbc9223480c7a3531a20e183 Mon Sep 17 00:00:00 2001 From: shane knapp Date: Thu, 7 Nov 2019 17:45:23 -0800 Subject: [PATCH 24/31] whackamole with bytstring decoding --- dev/run-tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/run-tests.py b/dev/run-tests.py index e8f1bd0b371f..d08e30f8785a 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -264,7 +264,7 @@ def exec_sbt(sbt_args=()): echo_proc.wait() for line in iter(sbt_proc.stdout.readline, b''): if not sbt_output_filter.match(line): - print(line, end='') + print(line.decode(sys.getdefaultencoding()), end='') retcode = sbt_proc.wait() if retcode != 0: From 5b61ade3266d7af3418edacdf004a19c06e3a5c1 Mon Sep 17 00:00:00 2001 From: shane knapp Date: Thu, 7 Nov 2019 19:21:04 -0800 Subject: [PATCH 25/31] remove pypy2.5.1 --- python/run-tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/run-tests.py b/python/run-tests.py index bf752b111351..ebf6b63f64f4 100755 --- a/python/run-tests.py +++ b/python/run-tests.py @@ -159,7 +159,7 @@ def run_individual_python_test(target_dir, test_name, pyspark_python): def get_default_python_executables(): - python_execs = [x for x in ["python3.6", "pypy"] if which(x)] + python_execs = [x for x in ["python3.6"] if which(x)] if "python3.6" not in python_execs: p = which("python3") From 3bfd18c05ba6cc6265050cbdf32a2d33716edc30 Mon Sep 17 00:00:00 2001 From: shane knapp Date: Thu, 7 Nov 2019 20:29:27 -0800 Subject: [PATCH 26/31] specify encoding rather than system default --- dev/run-tests.py | 2 +- dev/sparktestsupport/shellutils.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dev/run-tests.py b/dev/run-tests.py index d08e30f8785a..d82d15caba45 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -264,7 +264,7 @@ def exec_sbt(sbt_args=()): echo_proc.wait() for line in iter(sbt_proc.stdout.readline, b''): if not sbt_output_filter.match(line): - print(line.decode(sys.getdefaultencoding()), end='') + print(line.decode('utf-8', end='') retcode = sbt_proc.wait() if retcode != 0: diff --git a/dev/sparktestsupport/shellutils.py b/dev/sparktestsupport/shellutils.py index 8ab784480c60..b52a43db4e6c 100644 --- a/dev/sparktestsupport/shellutils.py +++ b/dev/sparktestsupport/shellutils.py @@ -53,11 +53,11 @@ def run_cmd(cmd, return_output=False): cmd = cmd.split() try: if return_output: - return subprocess_check_output(cmd).decode(sys.getdefaultencoding()) + return subprocess_check_output(cmd).decode('utf-8') else: popen = subprocess.Popen(cmd, stdout=subprocess.PIPE) for stdout_line in popen.stdout: - print(stdout_line.decode(sys.getdefaultencoding()), end='') + print(stdout_line.decode('utf-8', end='') popen.stdout.close() return_code = popen.wait() return return_code From 137371d434f3c536c331a01dbb00a7929f27f9f8 Mon Sep 17 00:00:00 2001 From: shane knapp Date: Thu, 7 Nov 2019 20:39:51 -0800 Subject: [PATCH 27/31] parens ftw --- dev/run-tests.py | 2 +- dev/sparktestsupport/shellutils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dev/run-tests.py b/dev/run-tests.py index d82d15caba45..82277720bb52 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -264,7 +264,7 @@ def exec_sbt(sbt_args=()): echo_proc.wait() for line in iter(sbt_proc.stdout.readline, b''): if not sbt_output_filter.match(line): - print(line.decode('utf-8', end='') + print(line.decode('utf-8'), end='') retcode = sbt_proc.wait() if retcode != 0: diff --git a/dev/sparktestsupport/shellutils.py b/dev/sparktestsupport/shellutils.py index b52a43db4e6c..cd2c160e0580 100644 --- a/dev/sparktestsupport/shellutils.py +++ b/dev/sparktestsupport/shellutils.py @@ -57,7 +57,7 @@ def run_cmd(cmd, return_output=False): else: popen = subprocess.Popen(cmd, stdout=subprocess.PIPE) for stdout_line in popen.stdout: - print(stdout_line.decode('utf-8', end='') + print(stdout_line.decode('utf-8'), end='') popen.stdout.close() return_code = popen.wait() return return_code From 48e318b9aa41e90f1768e5b6a583215a0cbb513a Mon Sep 17 00:00:00 2001 From: shane knapp Date: Fri, 8 Nov 2019 09:04:53 -0800 Subject: [PATCH 28/31] try subprocess.run() --- dev/sparktestsupport/shellutils.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/dev/sparktestsupport/shellutils.py b/dev/sparktestsupport/shellutils.py index cd2c160e0580..d9cb8aa45c8d 100644 --- a/dev/sparktestsupport/shellutils.py +++ b/dev/sparktestsupport/shellutils.py @@ -55,12 +55,7 @@ def run_cmd(cmd, return_output=False): if return_output: return subprocess_check_output(cmd).decode('utf-8') else: - popen = subprocess.Popen(cmd, stdout=subprocess.PIPE) - for stdout_line in popen.stdout: - print(stdout_line.decode('utf-8'), end='') - popen.stdout.close() - return_code = popen.wait() - return return_code + return subprocess.run(cmd, universal_newlines=True, check=True) except subprocess.CalledProcessError as e: exit_from_command_with_retcode(e.cmd, e.returncode) From a432afc74126a4376d893f2e2197b32af2ca8c7c Mon Sep 17 00:00:00 2001 From: shane knapp Date: Fri, 8 Nov 2019 10:59:10 -0800 Subject: [PATCH 29/31] add python3 to executable list if found --- python/run-tests.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/run-tests.py b/python/run-tests.py index ebf6b63f64f4..182868af9ea1 100755 --- a/python/run-tests.py +++ b/python/run-tests.py @@ -166,6 +166,8 @@ def get_default_python_executables(): if not p: LOGGER.error("No python3 executable found. Exiting!") os._exit(1) + else: + python_execs.insert(0, p) return python_execs From 4ad08b03c0f1de956be2fdc79ab7bfcdcf1abf45 Mon Sep 17 00:00:00 2001 From: shane knapp Date: Tue, 12 Nov 2019 10:14:09 -0800 Subject: [PATCH 30/31] last batch of run-pip-tests python3 ports --- dev/run-pip-tests | 6 +++--- python/pyspark/context.py | 2 -- python/pyspark/version.py | 2 +- python/setup.py | 7 +++---- 4 files changed, 7 insertions(+), 10 deletions(-) mode change 100644 => 100755 python/setup.py diff --git a/dev/run-pip-tests b/dev/run-pip-tests index c25ee7839a80..1294a9096fb9 100755 --- a/dev/run-pip-tests +++ b/dev/run-pip-tests @@ -92,7 +92,7 @@ for python in "${PYTHON_EXECS[@]}"; do cd "$FWDIR"/python # Delete the egg info file if it exists, this can cache the setup file. rm -rf pyspark.egg-info || echo "No existing egg info file, skipping deletion" - python setup.py sdist + python3 setup.py sdist echo "Installing dist into virtual env" @@ -112,9 +112,9 @@ for python in "${PYTHON_EXECS[@]}"; do echo "Run basic sanity check on pip installed version with spark-submit" spark-submit "$FWDIR"/dev/pip-sanity-check.py echo "Run basic sanity check with import based" - python "$FWDIR"/dev/pip-sanity-check.py + python3 "$FWDIR"/dev/pip-sanity-check.py echo "Run the tests for context.py" - python "$FWDIR"/python/pyspark/context.py + python3 "$FWDIR"/python/pyspark/context.py cd "$FWDIR" diff --git a/python/pyspark/context.py b/python/pyspark/context.py index 4d140f91f032..80aa41e59f76 100644 --- a/python/pyspark/context.py +++ b/python/pyspark/context.py @@ -15,8 +15,6 @@ # limitations under the License. # -from __future__ import print_function - import os import shutil import signal diff --git a/python/pyspark/version.py b/python/pyspark/version.py index ba2a40cec01e..1abc41279ebe 100644 --- a/python/pyspark/version.py +++ b/python/pyspark/version.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # # Licensed to the Apache Software Foundation (ASF) under one or more diff --git a/python/setup.py b/python/setup.py old mode 100644 new mode 100755 index ea672309703b..092bdd3f9011 --- a/python/setup.py +++ b/python/setup.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # # Licensed to the Apache Software Foundation (ASF) under one or more @@ -16,15 +16,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function import glob import os import sys from setuptools import setup from shutil import copyfile, copytree, rmtree -if sys.version_info < (2, 7): - print("Python versions prior to 2.7 are not supported for pip installed PySpark.", +if sys.version_info < (3, 6): + print("Python versions prior to 3.6 are not supported for pip installed PySpark.", file=sys.stderr) sys.exit(-1) From b5bada3edaedb7b21d108e90047f5c9f77e6f6a9 Mon Sep 17 00:00:00 2001 From: shane knapp Date: Wed, 13 Nov 2019 10:14:30 -0800 Subject: [PATCH 31/31] testing to see if pypy/py27 tests will work in py3 env --- python/run-tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/run-tests.py b/python/run-tests.py index 182868af9ea1..5bcf8b066912 100755 --- a/python/run-tests.py +++ b/python/run-tests.py @@ -159,7 +159,7 @@ def run_individual_python_test(target_dir, test_name, pyspark_python): def get_default_python_executables(): - python_execs = [x for x in ["python3.6"] if which(x)] + python_execs = [x for x in ["python3.6", "python2.7", "pypy"] if which(x)] if "python3.6" not in python_execs: p = which("python3")