Skip to content

Commit

Permalink
wait_for_tests will now always specifically wait for the RUN phase
Browse files Browse the repository at this point in the history
It used to terminate as soon as it saw no phases in the PEND state. This
created a race condition between the end of the build phase where no states
were in a PEND state but the run phase hadn't happened yet.
  • Loading branch information
jgfouca committed Aug 9, 2016
1 parent db1538e commit f25a518
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 31 deletions.
11 changes: 2 additions & 9 deletions scripts/Tools/wait_for_tests
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,6 @@ formatter_class=argparse.ArgumentDefaultsHelpFormatter
parser.add_argument("-n", "--no-wait", action="store_true",
help="Do not wait for tests to finish")

parser.add_argument("-r", "--wait-for-run", action="store_true",
help="Continue to wait until the RUN phase has passed")

parser.add_argument("-t", "--check-throughput", action="store_true",
help="Fail if throughput check fails (fail if tests slow down)")

Expand All @@ -75,10 +72,7 @@ formatter_class=argparse.ArgumentDefaultsHelpFormatter

CIME.utils.handle_standard_logging_options(args)

CIME.utils.expect(not (args.no_wait and args.wait_for_run),
"Does not make sense to use --no-wait and --wait-for-run together")

return args.paths, args.no_wait, args.wait_for_run, args.check_throughput, args.check_memory, args.ignore_namelist_diffs, args.ignore_memleak, args.cdash_build_name, args.cdash_project, args.cdash_build_group
return args.paths, args.no_wait, args.check_throughput, args.check_memory, args.ignore_namelist_diffs, args.ignore_memleak, args.cdash_build_name, args.cdash_project, args.cdash_build_group

###############################################################################
def _main_func(description):
Expand All @@ -87,12 +81,11 @@ def _main_func(description):
CIME.utils.run_cmd_no_fail("python -m doctest %s/wait_for_tests.py -v" % CIME.utils.get_python_libs_root(), arg_stdout=None, arg_stderr=None)
return

test_paths, no_wait, wait_for_run, check_throughput, check_memory, ignore_namelist_diffs, ignore_memleak, cdash_build_name, cdash_project, cdash_build_group = \
test_paths, no_wait, check_throughput, check_memory, ignore_namelist_diffs, ignore_memleak, cdash_build_name, cdash_project, cdash_build_group = \
parse_command_line(sys.argv, description)

sys.exit(0 if CIME.wait_for_tests.wait_for_tests(test_paths,
no_wait=no_wait,
wait_for_run=wait_for_run,
check_throughput=check_throughput,
check_memory=check_memory,
ignore_namelists=ignore_namelist_diffs,
Expand Down
8 changes: 8 additions & 0 deletions utils/python/CIME/test_status.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,14 @@ def get_overall_test_status(self, wait_for_run=False, check_throughput=False, ch
'PASS'
>>> _test_helper2('PASS ERS.foo.A MODEL_BUILD', wait_for_run=True)
'PEND'
>>> _test_helper2('FAIL ERS.foo.A MODEL_BUILD', wait_for_run=True)
'FAIL'
>>> _test_helper2('PASS ERS.foo.A MODEL_BUILD\nPEND ERS.foo.A RUN', wait_for_run=True)
'PEND'
>>> _test_helper2('PASS ERS.foo.A MODEL_BUILD\nFAIL ERS.foo.A RUN', wait_for_run=True)
'FAIL'
>>> _test_helper2('PASS ERS.foo.A MODEL_BUILD\nPASS ERS.foo.A RUN', wait_for_run=True)
'PASS'
"""
rv = TEST_PASS_STATUS
run_phase_found = False
Expand Down
12 changes: 6 additions & 6 deletions utils/python/CIME/wait_for_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ def create_cdash_xml(results, cdash_build_name, cdash_project, cdash_build_group
CIME.utils.run_cmd_no_fail("ctest -VV -D NightlySubmit", verbose=True)

###############################################################################
def wait_for_test(test_path, results, wait, wait_for_run, check_throughput, check_memory, ignore_namelists, ignore_memleak):
def wait_for_test(test_path, results, wait, check_throughput, check_memory, ignore_namelists, ignore_memleak):
###############################################################################
if (os.path.isdir(test_path)):
test_status_filepath = os.path.join(test_path, TEST_STATUS_FILENAME)
Expand All @@ -271,7 +271,8 @@ def wait_for_test(test_path, results, wait, wait_for_run, check_throughput, chec
if (os.path.exists(test_status_filepath)):
ts = TestStatus(test_dir=os.path.dirname(test_status_filepath))
test_name = ts.get_name()
test_status = ts.get_overall_test_status(wait_for_run=wait_for_run, check_throughput=check_throughput,
test_status = ts.get_overall_test_status(wait_for_run=True, # Important
check_throughput=check_throughput,
check_memory=check_memory, ignore_namelists=ignore_namelists,
ignore_memleak=ignore_memleak)

Expand All @@ -292,12 +293,12 @@ def wait_for_test(test_path, results, wait, wait_for_run, check_throughput, chec
break

###############################################################################
def wait_for_tests_impl(test_paths, no_wait=False, wait_for_run=False, check_throughput=False, check_memory=False, ignore_namelists=False, ignore_memleak=False):
def wait_for_tests_impl(test_paths, no_wait=False, check_throughput=False, check_memory=False, ignore_namelists=False, ignore_memleak=False):
###############################################################################
results = Queue.Queue()

for test_path in test_paths:
t = threading.Thread(target=wait_for_test, args=(test_path, results, not no_wait, wait_for_run, check_throughput, check_memory, ignore_namelists, ignore_memleak))
t = threading.Thread(target=wait_for_test, args=(test_path, results, not no_wait, check_throughput, check_memory, ignore_namelists, ignore_memleak))
t.daemon = True
t.start()

Expand Down Expand Up @@ -328,7 +329,6 @@ def wait_for_tests_impl(test_paths, no_wait=False, wait_for_run=False, check_thr
###############################################################################
def wait_for_tests(test_paths,
no_wait=False,
wait_for_run=False,
check_throughput=False,
check_memory=False,
ignore_namelists=False,
Expand All @@ -341,7 +341,7 @@ def wait_for_tests(test_paths,
# is terminated
set_up_signal_handlers()

test_results = wait_for_tests_impl(test_paths, no_wait, wait_for_run, check_throughput, check_memory, ignore_namelists, ignore_memleak)
test_results = wait_for_tests_impl(test_paths, no_wait, check_throughput, check_memory, ignore_namelists, ignore_memleak)

all_pass = True
for test_name, test_data in sorted(test_results.iteritems()):
Expand Down
52 changes: 36 additions & 16 deletions utils/python/tests/scripts_regression_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,25 +169,24 @@ class D_TestWaitForTests(unittest.TestCase):
def setUp(self):
###########################################################################
self._testroot = MACHINE.get_value("CESMSCRATCHROOT")
self._testdir_all_pass = os.path.join(self._testroot, 'scripts_regression_tests.testdir_all_pass')
self._testdir_with_fail = os.path.join(self._testroot, 'scripts_regression_tests.testdir_with_fail')
self._testdir_unfinished = os.path.join(self._testroot, 'scripts_regression_tests.testdir_unfinished')
for testdir in self._testdir_all_pass, self._testdir_with_fail, self._testdir_unfinished:
self._testdir_all_pass = os.path.join(self._testroot, 'scripts_regression_tests.testdir_all_pass')
self._testdir_with_fail = os.path.join(self._testroot, 'scripts_regression_tests.testdir_with_fail')
self._testdir_unfinished = os.path.join(self._testroot, 'scripts_regression_tests.testdir_unfinished')
self._testdir_unfinished2 = os.path.join(self._testroot, 'scripts_regression_tests.testdir_unfinished2')
testdirs = [self._testdir_all_pass, self._testdir_with_fail, self._testdir_unfinished, self._testdir_unfinished2]
for testdir in testdirs:
if os.path.exists(testdir):
shutil.rmtree(testdir)
os.makedirs(testdir)

for r in range(10):
os.makedirs(os.path.join(self._testdir_all_pass, str(r)))
make_fake_teststatus(os.path.join(self._testdir_all_pass, str(r), "TestStatus"), "Test_%d" % r, "PASS", "RUN")
for testdir in testdirs:
os.makedirs(os.path.join(testdir, str(r)))
make_fake_teststatus(os.path.join(testdir, str(r), "TestStatus"), "Test_%d" % r, "PASS", "RUN")

for r in range(10):
os.makedirs(os.path.join(self._testdir_with_fail, str(r)))
make_fake_teststatus(os.path.join(self._testdir_with_fail, str(r), "TestStatus"), "Test_%d" % r, "PASS" if r % 2 == 0 else "FAIL", "SETUP" )

for r in range(10):
os.makedirs(os.path.join(self._testdir_unfinished, str(r)))
make_fake_teststatus(os.path.join(self._testdir_unfinished, str(r), "TestStatus"), "Test_%d" % r, "PEND" if r == 5 else "PASS", "COMPARE")
make_fake_teststatus(os.path.join(self._testdir_with_fail, "5", "TestStatus"), "Test_5", "FAIL", "RUN")
make_fake_teststatus(os.path.join(self._testdir_unfinished, "5", "TestStatus"), "Test_5", "PEND", "RUN")
make_fake_teststatus(os.path.join(self._testdir_unfinished2, "5", "TestStatus"), "Test_5", "PASS", "MODEL_BUILD")

# Set up proxy if possible
self._unset_proxy = setup_proxy()
Expand Down Expand Up @@ -244,7 +243,7 @@ def test_wait_for_test_all_pass(self):
###########################################################################
def test_wait_for_test_with_fail(self):
###########################################################################
expected_results = ["PASS" if item % 2 == 0 else "FAIL" for item in range(10)]
expected_results = ["FAIL" if item == 5 else "PASS" for item in range(10)]
self.simple_test(self._testdir_with_fail, expected_results)

###########################################################################
Expand All @@ -264,7 +263,28 @@ def test_wait_for_test_wait(self):

self.assertTrue(run_thread.isAlive(), msg="wait_for_tests should have waited")

make_fake_teststatus(os.path.join(self._testdir_unfinished, "5", "TestStatus"), "Test_5", "PASS", "RUN")
with TestStatus(test_dir=os.path.join(self._testdir_unfinished, "5")) as ts:
ts.set_status(RUN_PHASE, TEST_PASS_STATUS)

run_thread.join(timeout=10)

self.assertFalse(run_thread.isAlive(), msg="wait_for_tests should have finished")

self.assertTrue(self._thread_error is None, msg="Thread had failure: %s" % self._thread_error)

###########################################################################
def test_wait_for_test_wait2(self):
###########################################################################
run_thread = threading.Thread(target=self.threaded_test, args=(self._testdir_unfinished2, ["PASS"] * 10))
run_thread.daemon = True
run_thread.start()

time.sleep(5) # Kinda hacky

self.assertTrue(run_thread.isAlive(), msg="wait_for_tests should have waited")

with TestStatus(test_dir=os.path.join(self._testdir_unfinished2, "5")) as ts:
ts.set_status(RUN_PHASE, TEST_PASS_STATUS)

run_thread.join(timeout=10)

Expand Down Expand Up @@ -889,7 +909,7 @@ def test_single_submit(self):
self.assertEqual(stat, 0,
msg="COMMAND SHOULD HAVE WORKED\ncreate_test output:\n%s\n\nerrput:\n%s\n\ncode: %d" % (output, errput, stat))

stat, output, errput = run_cmd("%s/wait_for_tests *%s/TestStatus -r" % (TOOLS_DIR, self._baseline_name),
stat, output, errput = run_cmd("%s/wait_for_tests *%s/TestStatus" % (TOOLS_DIR, self._baseline_name),
from_dir=self._testroot)
self.assertEqual(stat, 0,
msg="COMMAND SHOULD HAVE WORKED\nwait_for_tests output:\n%s\n\nerrput:\n%s\n\ncode: %d" % (output, errput, stat))
Expand Down

0 comments on commit f25a518

Please sign in to comment.