From af4cef40fa48feee32389e2a7fa8f96c1f82fd91 Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Thu, 25 Jun 2015 19:54:33 -0700 Subject: [PATCH 01/15] Initial attempt at parallelizing Python test execution --- python/run-tests.py | 85 ++++++++++++++++++++++++++++++++------------- 1 file changed, 60 insertions(+), 25 deletions(-) diff --git a/python/run-tests.py b/python/run-tests.py index 7d485b500ee3..c1463f4ec606 100755 --- a/python/run-tests.py +++ b/python/run-tests.py @@ -18,12 +18,19 @@ # from __future__ import print_function +import logging from optparse import OptionParser import os import re import subprocess import sys +import tempfile +from threading import Thread, Lock import time +if sys.version < '3': + import Queue +else: + import queue as Queue # Append `SPARK_HOME/dev` to the Python path so that we can import the sparktestsupport module @@ -43,34 +50,43 @@ def print_red(text): LOG_FILE = os.path.join(SPARK_HOME, "python/unit-tests.log") +LOG_FILE_LOCK = Lock() +LOGGER = logging.getLogger() def run_individual_python_test(test_name, pyspark_python): env = {'SPARK_TESTING': '1', 'PYSPARK_PYTHON': which(pyspark_python)} - print(" Running test: %s ..." % test_name, end='') + LOGGER.info("Starting test(%s): %s" % (pyspark_python, test_name)) start_time = time.time() - with open(LOG_FILE, 'a') as log_file: - retcode = subprocess.call( - [os.path.join(SPARK_HOME, "bin/pyspark"), test_name], - stderr=log_file, stdout=log_file, env=env) + per_test_output = tempfile.TemporaryFile() + retcode = subprocess.Popen( + [os.path.join(SPARK_HOME, "bin/pyspark"), test_name], + stderr=per_test_output, stdout=per_test_output, env=env).wait() duration = time.time() - start_time + with LOG_FILE_LOCK: + with open(LOG_FILE, 'ab') as log_file: + per_test_output.seek(0) + log_file.writelines(per_test_output.readlines()) + per_test_output.close() # Exit on the first failure. if retcode != 0: with open(LOG_FILE, 'r') as log_file: for line in log_file: if not re.match('[0-9]+', line): print(line, end='') - print_red("\nHad test failures in %s; see logs." % test_name) - exit(-1) + print_red("\nHad test failures in %s with %s; see logs." % (test_name, pyspark_python)) + # Here, we use os._exit() instead of sys.exit() in order to force Python to exit even if + # this code is invoked from a thread other than the main thread. + os._exit(-1) else: - print("ok (%is)" % duration) + LOGGER.info("Finished test(%s): %s (%is)" % (pyspark_python, test_name, duration)) def get_default_python_executables(): python_execs = [x for x in ["python2.6", "python3.4", "pypy"] if which(x)] if "python2.6" not in python_execs: - print("WARNING: Not testing against `python2.6` because it could not be found; falling" - " back to `python` instead") + LOGGER.warning("Not testing against `python2.6` because it could not be found; falling" + " back to `python` instead") python_execs.insert(0, "python") return python_execs @@ -88,6 +104,10 @@ def parse_opts(): default=",".join(sorted(python_modules.keys())), help="A comma-separated list of Python modules to test (default: %default)" ) + parser.add_option( + "-p", "--parallelism", type="int", default=4, + help="The number of suites to test in parallel (default %default)" + ) (opts, args) = parser.parse_args() if args: @@ -96,8 +116,9 @@ def parse_opts(): def main(): + logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, format="%(message)s") opts = parse_opts() - print("Running PySpark tests. Output is in python/%s" % LOG_FILE) + LOGGER.info("Running PySpark tests. Output is in python/%s" % LOG_FILE) if os.path.exists(LOG_FILE): os.remove(LOG_FILE) python_execs = opts.python_executables.split(',') @@ -108,24 +129,38 @@ def main(): else: print("Error: unrecognized module %s" % module_name) sys.exit(-1) - print("Will test against the following Python executables: %s" % python_execs) - print("Will test the following Python modules: %s" % [x.name for x in modules_to_test]) + LOGGER.info("Will test against the following Python executables: %s" % python_execs) + LOGGER.info("Will test the following Python modules: %s" % [x.name for x in modules_to_test]) - start_time = time.time() + task_queue = Queue.Queue() for python_exec in python_execs: - python_implementation = subprocess.check_output( - [python_exec, "-c", "import platform; print(platform.python_implementation())"], - universal_newlines=True).strip() - print("Testing with `%s`: " % python_exec, end='') - subprocess.call([python_exec, "--version"]) - for module in modules_to_test: - if python_implementation not in module.blacklisted_python_implementations: - print("Running %s tests ..." % module.name) - for test_goal in module.python_test_goals: - run_individual_python_test(test_goal, python_exec) + for test_goal in module.python_test_goals: + task_queue.put((python_exec, test_goal)) + + def process_queue(task_queue): + while True: + try: + (python_exec, test_goal) = task_queue.get_nowait() + except Queue.Empty: + break + try: + run_individual_python_test(test_goal, python_exec) + finally: + task_queue.task_done() + + start_time = time.time() + for _ in range(opts.parallelism): + worker = Thread(target=process_queue, args=(task_queue,)) + worker.daemon = True + worker.start() + try: + task_queue.join() + except (KeyboardInterrupt, SystemExit): + print_red("Exiting due to interrupt") + sys.exit(-1) total_duration = time.time() - start_time - print("Tests passed in %i seconds" % total_duration) + LOGGER.info("Tests passed in %i seconds" % total_duration) if __name__ == "__main__": From 037b6860a1a3caa8c5bab14447bad62b92a486a2 Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Thu, 25 Jun 2015 19:55:23 -0700 Subject: [PATCH 02/15] Temporarily disable JVM tests so we can test Python speedup in Jenkins. --- dev/run-tests.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dev/run-tests.py b/dev/run-tests.py index c51b0d3010a0..ce4f28b7c957 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -454,16 +454,16 @@ def main(): build_apache_spark(build_tool, hadoop_version) # backwards compatibility checks - detect_binary_inop_with_mima() + # detect_binary_inop_with_mima() # run the test suites - run_scala_tests(build_tool, hadoop_version, test_modules) + # run_scala_tests(build_tool, hadoop_version, test_modules) modules_with_python_tests = [m for m in test_modules if m.python_test_goals] if modules_with_python_tests: run_python_tests(modules_with_python_tests) - if any(m.should_run_r_tests for m in test_modules): - run_sparkr_tests() + # if any(m.should_run_r_tests for m in test_modules): + # run_sparkr_tests() def _test(): From 9129027ab0b0e21f87152ca2dcff31010df1f9cd Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Thu, 25 Jun 2015 22:21:05 -0700 Subject: [PATCH 03/15] Disable Spark UI in Python tests --- python/pyspark/java_gateway.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py index 3cee4ea6e3a3..90cd342a6cf7 100644 --- a/python/pyspark/java_gateway.py +++ b/python/pyspark/java_gateway.py @@ -51,6 +51,8 @@ def launch_gateway(): on_windows = platform.system() == "Windows" script = "./bin/spark-submit.cmd" if on_windows else "./bin/spark-submit" submit_args = os.environ.get("PYSPARK_SUBMIT_ARGS", "pyspark-shell") + if os.environ.get("SPARK_TESTING"): + submit_args = "--conf spark.ui.enabled=false " + submit_args command = [os.path.join(SPARK_HOME, script)] + shlex.split(submit_args) # Start a socket that will be used by PythonGatewayServer to communicate its port to us From 8309bfe46c0ed395f9288c772e2eedbfa28a3c14 Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Fri, 26 Jun 2015 00:18:40 -0700 Subject: [PATCH 04/15] Temporarily disable parallelism to debug a failure --- python/run-tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/run-tests.py b/python/run-tests.py index c1463f4ec606..9e156be3dcfb 100755 --- a/python/run-tests.py +++ b/python/run-tests.py @@ -105,7 +105,7 @@ def parse_opts(): help="A comma-separated list of Python modules to test (default: %default)" ) parser.add_option( - "-p", "--parallelism", type="int", default=4, + "-p", "--parallelism", type="int", default=1, help="The number of suites to test in parallel (default %default)" ) From 87cb988b2544a0a39dd6cf463609298feca8a2b0 Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Fri, 26 Jun 2015 09:47:33 -0700 Subject: [PATCH 05/15] Skip MLLib tests for PyPy --- python/run-tests.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/python/run-tests.py b/python/run-tests.py index 9e156be3dcfb..9090f494f76f 100755 --- a/python/run-tests.py +++ b/python/run-tests.py @@ -134,9 +134,12 @@ def main(): task_queue = Queue.Queue() for python_exec in python_execs: + python_implementation = subprocess.check_output( + [python_exec, "-c", "import platform; print platform.python_implementation()"]).strip() for module in modules_to_test: - for test_goal in module.python_test_goals: - task_queue.put((python_exec, test_goal)) + if python_implementation not in module.blacklisted_python_implementations: + for test_goal in module.python_test_goals: + task_queue.put((python_exec, test_goal)) def process_queue(task_queue): while True: From 866b5b9948309b48928f0b1c1d74a043c34119f2 Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Fri, 26 Jun 2015 09:50:45 -0700 Subject: [PATCH 06/15] Fix lazy logging warnings in Prospector checks --- python/run-tests.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/python/run-tests.py b/python/run-tests.py index 9090f494f76f..ea6eb5e086b5 100755 --- a/python/run-tests.py +++ b/python/run-tests.py @@ -56,7 +56,7 @@ def print_red(text): def run_individual_python_test(test_name, pyspark_python): env = {'SPARK_TESTING': '1', 'PYSPARK_PYTHON': which(pyspark_python)} - LOGGER.info("Starting test(%s): %s" % (pyspark_python, test_name)) + LOGGER.info("Starting test(%s): %s", pyspark_python, test_name) start_time = time.time() per_test_output = tempfile.TemporaryFile() retcode = subprocess.Popen( @@ -79,7 +79,7 @@ def run_individual_python_test(test_name, pyspark_python): # this code is invoked from a thread other than the main thread. os._exit(-1) else: - LOGGER.info("Finished test(%s): %s (%is)" % (pyspark_python, test_name, duration)) + LOGGER.info("Finished test(%s): %s (%is)", pyspark_python, test_name, duration) def get_default_python_executables(): @@ -118,7 +118,7 @@ def parse_opts(): def main(): logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, format="%(message)s") opts = parse_opts() - LOGGER.info("Running PySpark tests. Output is in python/%s" % LOG_FILE) + LOGGER.info("Running PySpark tests. Output is in python/%s", LOG_FILE) if os.path.exists(LOG_FILE): os.remove(LOG_FILE) python_execs = opts.python_executables.split(',') @@ -129,8 +129,8 @@ def main(): else: print("Error: unrecognized module %s" % module_name) sys.exit(-1) - LOGGER.info("Will test against the following Python executables: %s" % python_execs) - LOGGER.info("Will test the following Python modules: %s" % [x.name for x in modules_to_test]) + LOGGER.info("Will test against the following Python executables: %s", python_execs) + LOGGER.info("Will test the following Python modules: %s", [x.name for x in modules_to_test]) task_queue = Queue.Queue() for python_exec in python_execs: @@ -163,7 +163,7 @@ def process_queue(task_queue): print_red("Exiting due to interrupt") sys.exit(-1) total_duration = time.time() - start_time - LOGGER.info("Tests passed in %i seconds" % total_duration) + LOGGER.info("Tests passed in %i seconds", total_duration) if __name__ == "__main__": From 5552380cd904b1d25575ea60ff88ff7333d718a4 Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Fri, 26 Jun 2015 10:02:33 -0700 Subject: [PATCH 07/15] Python 3 fix --- python/run-tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/run-tests.py b/python/run-tests.py index ea6eb5e086b5..0d273550453f 100755 --- a/python/run-tests.py +++ b/python/run-tests.py @@ -135,7 +135,7 @@ def main(): task_queue = Queue.Queue() for python_exec in python_execs: python_implementation = subprocess.check_output( - [python_exec, "-c", "import platform; print platform.python_implementation()"]).strip() + [python_exec, "-c", "import platform; print(platform.python_implementation())"]).strip() for module in modules_to_test: if python_implementation not in module.blacklisted_python_implementations: for test_goal in module.python_test_goals: From a2b9094469dda569f3bb94d2ed0b0ab6ccb717f5 Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Fri, 26 Jun 2015 12:19:03 -0700 Subject: [PATCH 08/15] Bump up parallelism. --- python/run-tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/run-tests.py b/python/run-tests.py index 0d273550453f..b173ea7a988c 100755 --- a/python/run-tests.py +++ b/python/run-tests.py @@ -105,7 +105,7 @@ def parse_opts(): help="A comma-separated list of Python modules to test (default: %default)" ) parser.add_option( - "-p", "--parallelism", type="int", default=1, + "-p", "--parallelism", type="int", default=4, help="The number of suites to test in parallel (default %default)" ) From 9e31127b3253c66869e3354c73b51c6b9d3ccdd6 Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Fri, 26 Jun 2015 13:41:59 -0700 Subject: [PATCH 09/15] Log Python --version output for each executable. --- python/run-tests.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/run-tests.py b/python/run-tests.py index b173ea7a988c..deb1a9d19fd3 100755 --- a/python/run-tests.py +++ b/python/run-tests.py @@ -136,6 +136,8 @@ def main(): for python_exec in python_execs: python_implementation = subprocess.check_output( [python_exec, "-c", "import platform; print(platform.python_implementation())"]).strip() + LOGGER.debug("`%s` version is: %s", python_exec, subprocess.check_output( + [python_exec, "--version"], stderr=subprocess.STDOUT).strip()) for module in modules_to_test: if python_implementation not in module.blacklisted_python_implementations: for test_goal in module.python_test_goals: From cd13db84075c5ae828beeafa3043e7f9c0d150e9 Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Fri, 26 Jun 2015 13:43:30 -0700 Subject: [PATCH 10/15] Also log python_implementation --- python/run-tests.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/run-tests.py b/python/run-tests.py index deb1a9d19fd3..4012a448dfd4 100755 --- a/python/run-tests.py +++ b/python/run-tests.py @@ -136,7 +136,8 @@ def main(): for python_exec in python_execs: python_implementation = subprocess.check_output( [python_exec, "-c", "import platform; print(platform.python_implementation())"]).strip() - LOGGER.debug("`%s` version is: %s", python_exec, subprocess.check_output( + LOGGER.debug("%s python_implementation is %s", python_exec, python_implementation) + LOGGER.debug("%s version is: %s", python_exec, subprocess.check_output( [python_exec, "--version"], stderr=subprocess.STDOUT).strip()) for module in modules_to_test: if python_implementation not in module.blacklisted_python_implementations: From 110cd9dd9877eb5b6934639b5e5adde15748cd57 Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Fri, 26 Jun 2015 18:33:07 -0700 Subject: [PATCH 11/15] Fix universal_newlines for Python 3 --- python/run-tests.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/run-tests.py b/python/run-tests.py index 4012a448dfd4..a2b0ac4aeb8e 100755 --- a/python/run-tests.py +++ b/python/run-tests.py @@ -135,10 +135,11 @@ def main(): task_queue = Queue.Queue() for python_exec in python_execs: python_implementation = subprocess.check_output( - [python_exec, "-c", "import platform; print(platform.python_implementation())"]).strip() + [python_exec, "-c", "import platform; print(platform.python_implementation())"], + universal_newlines=True).strip() LOGGER.debug("%s python_implementation is %s", python_exec, python_implementation) LOGGER.debug("%s version is: %s", python_exec, subprocess.check_output( - [python_exec, "--version"], stderr=subprocess.STDOUT).strip()) + [python_exec, "--version"], stderr=subprocess.STDOUT, universal_newlines=True).strip()) for module in modules_to_test: if python_implementation not in module.blacklisted_python_implementations: for test_goal in module.python_test_goals: From a2717e1ab33f3a3ec82106be4646e5692535c410 Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Mon, 29 Jun 2015 17:08:06 -0700 Subject: [PATCH 12/15] Make parallelism configurable via dev/run-tests --- dev/run-tests | 2 +- dev/run-tests.py | 24 ++++++++++++++++++++++-- dev/sparktestsupport/shellutils.py | 1 + python/run-tests.py | 2 ++ 4 files changed, 26 insertions(+), 3 deletions(-) diff --git a/dev/run-tests b/dev/run-tests index a00d9f0c2763..257d1e8d50bb 100755 --- a/dev/run-tests +++ b/dev/run-tests @@ -20,4 +20,4 @@ FWDIR="$(cd "`dirname $0`"/..; pwd)" cd "$FWDIR" -exec python -u ./dev/run-tests.py +exec python -u ./dev/run-tests.py "$@" diff --git a/dev/run-tests.py b/dev/run-tests.py index b9058642205a..93234aad6875 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -19,6 +19,7 @@ from __future__ import print_function import itertools +from optparse import OptionParser import os import re import sys @@ -360,12 +361,13 @@ def run_scala_tests(build_tool, hadoop_version, test_modules): run_scala_tests_sbt(test_modules, test_profiles) -def run_python_tests(test_modules): +def run_python_tests(test_modules, parallelism): set_title_and_block("Running PySpark tests", "BLOCK_PYSPARK_UNIT_TESTS") command = [os.path.join(SPARK_HOME, "python", "run-tests")] if test_modules != [modules.root]: command.append("--modules=%s" % ','.join(m.name for m in test_modules)) + command.append("--parallelism=%i" % parallelism) run_cmd(command) @@ -379,7 +381,25 @@ def run_sparkr_tests(): print("Ignoring SparkR tests as R was not found in PATH") +def parse_opts(): + parser = OptionParser( + prog="run-tests" + ) + parser.add_option( + "-p", "--parallelism", type="int", default=4, + help="The number of suites to test in parallel (default %default)" + ) + + (opts, args) = parser.parse_args() + if args: + parser.error("Unsupported arguments: %s" % ' '.join(args)) + if opts.parallelism < 1: + parser.error("Parallelism cannot be less than 1") + return opts + + def main(): + opts = parse_opts() # Ensure the user home directory (HOME) is valid and is an absolute directory if not USER_HOME or not os.path.isabs(USER_HOME): print("[error] Cannot determine your home directory as an absolute path;", @@ -461,7 +481,7 @@ def main(): modules_with_python_tests = [m for m in test_modules if m.python_test_goals] if modules_with_python_tests: - run_python_tests(modules_with_python_tests) + run_python_tests(modules_with_python_tests, opts.parallelism) # if any(m.should_run_r_tests for m in test_modules): # run_sparkr_tests() diff --git a/dev/sparktestsupport/shellutils.py b/dev/sparktestsupport/shellutils.py index ad9b0cc89e4a..12bd0bf3a4fe 100644 --- a/dev/sparktestsupport/shellutils.py +++ b/dev/sparktestsupport/shellutils.py @@ -15,6 +15,7 @@ # limitations under the License. # +from __future__ import print_function import os import shutil import subprocess diff --git a/python/run-tests.py b/python/run-tests.py index a2b0ac4aeb8e..24681d427c56 100755 --- a/python/run-tests.py +++ b/python/run-tests.py @@ -112,6 +112,8 @@ def parse_opts(): (opts, args) = parser.parse_args() if args: parser.error("Unsupported arguments: %s" % ' '.join(args)) + if opts.parallelism < 1: + parser.error("Parallelism cannot be less than 1") return opts From d4ded7368f6c3d9c7e7c958ee2925f3867116aa9 Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Mon, 29 Jun 2015 17:23:09 -0700 Subject: [PATCH 13/15] Logging improvements --- python/run-tests.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/python/run-tests.py b/python/run-tests.py index 24681d427c56..78bf2a8b7502 100755 --- a/python/run-tests.py +++ b/python/run-tests.py @@ -56,7 +56,7 @@ def print_red(text): def run_individual_python_test(test_name, pyspark_python): env = {'SPARK_TESTING': '1', 'PYSPARK_PYTHON': which(pyspark_python)} - LOGGER.info("Starting test(%s): %s", pyspark_python, test_name) + LOGGER.debug("Starting test(%s): %s", pyspark_python, test_name) start_time = time.time() per_test_output = tempfile.TemporaryFile() retcode = subprocess.Popen( @@ -108,6 +108,10 @@ def parse_opts(): "-p", "--parallelism", type="int", default=4, help="The number of suites to test in parallel (default %default)" ) + parser.add_option( + "--verbose", action="store_true", + help="Enable additional debug logging" + ) (opts, args) = parser.parse_args() if args: @@ -118,8 +122,12 @@ def parse_opts(): def main(): - logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, format="%(message)s") opts = parse_opts() + if (opts.verbose): + log_level = logging.DEBUG + else: + log_level = logging.INFO + logging.basicConfig(stream=sys.stdout, level=log_level, format="%(message)s") LOGGER.info("Running PySpark tests. Output is in python/%s", LOG_FILE) if os.path.exists(LOG_FILE): os.remove(LOG_FILE) From f87ea8128eaf2e0285bf27f4f693e30f87b512b9 Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Mon, 29 Jun 2015 17:32:07 -0700 Subject: [PATCH 14/15] Only log output from failed tests --- python/run-tests.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/python/run-tests.py b/python/run-tests.py index 78bf2a8b7502..aaa35e936a80 100755 --- a/python/run-tests.py +++ b/python/run-tests.py @@ -50,7 +50,7 @@ def print_red(text): LOG_FILE = os.path.join(SPARK_HOME, "python/unit-tests.log") -LOG_FILE_LOCK = Lock() +FAILURE_REPORTING_LOCK = Lock() LOGGER = logging.getLogger() @@ -63,22 +63,23 @@ def run_individual_python_test(test_name, pyspark_python): [os.path.join(SPARK_HOME, "bin/pyspark"), test_name], stderr=per_test_output, stdout=per_test_output, env=env).wait() duration = time.time() - start_time - with LOG_FILE_LOCK: - with open(LOG_FILE, 'ab') as log_file: - per_test_output.seek(0) - log_file.writelines(per_test_output.readlines()) - per_test_output.close() # Exit on the first failure. if retcode != 0: - with open(LOG_FILE, 'r') as log_file: - for line in log_file: + with FAILURE_REPORTING_LOCK: + with open(LOG_FILE, 'ab') as log_file: + per_test_output.seek(0) + log_file.writelines(per_test_output.readlines()) + per_test_output.seek(0) + for line in per_test_output: if not re.match('[0-9]+', line): print(line, end='') - print_red("\nHad test failures in %s with %s; see logs." % (test_name, pyspark_python)) - # Here, we use os._exit() instead of sys.exit() in order to force Python to exit even if - # this code is invoked from a thread other than the main thread. - os._exit(-1) + per_test_output.close() + print_red("\nHad test failures in %s with %s; see logs." % (test_name, pyspark_python)) + # Here, we use os._exit() instead of sys.exit() in order to force Python to exit even if + # this code is invoked from a thread other than the main thread. + os._exit(-1) else: + per_test_output.close() LOGGER.info("Finished test(%s): %s (%is)", pyspark_python, test_name, duration) From feb3763df3e7a25c833f4240ccf85c45dd56711f Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Mon, 29 Jun 2015 17:35:28 -0700 Subject: [PATCH 15/15] Re-enable other tests --- dev/run-tests.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dev/run-tests.py b/dev/run-tests.py index 93234aad6875..4596e0701473 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -474,16 +474,16 @@ def main(): build_apache_spark(build_tool, hadoop_version) # backwards compatibility checks - # detect_binary_inop_with_mima() + detect_binary_inop_with_mima() # run the test suites - # run_scala_tests(build_tool, hadoop_version, test_modules) + run_scala_tests(build_tool, hadoop_version, test_modules) modules_with_python_tests = [m for m in test_modules if m.python_test_goals] if modules_with_python_tests: run_python_tests(modules_with_python_tests, opts.parallelism) - # if any(m.should_run_r_tests for m in test_modules): - # run_sparkr_tests() + if any(m.should_run_r_tests for m in test_modules): + run_sparkr_tests() def _test():