progress

E3SM-Project · Aug 4, 2016 · 6ca6b59 · 6ca6b59
1 parent c4817d7
commit 6ca6b59
Show file tree

Hide file tree

Showing 7 changed files with 203 additions and 222 deletions.
diff --git a/scripts/create_test b/scripts/create_test
@@ -58,6 +58,7 @@ OR
 
         description=description,
 
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
     )
 
     CIME.utils.setup_standard_logging_options(parser)

diff --git a/utils/python/CIME/SystemTests/eri.py b/utils/python/CIME/SystemTests/eri.py
@@ -99,8 +99,7 @@ def run(self):
                 with open("user_nl_cam", "a") as fd:
                     fd.write("inithist = 'ENDOFRUN'\n")
 
-        success = self._run(suffix="base",
-                            coupler_log_path=os.path.join(dout_sr1, "logs"),
+        success = self._run(coupler_log_path=os.path.join(dout_sr1, "logs"),
                             st_archive=True)
         if not success:
             return False

diff --git a/utils/python/CIME/SystemTests/system_tests_common.py b/utils/python/CIME/SystemTests/system_tests_common.py
@@ -376,13 +376,13 @@ def generate_baseline(self):
             append_status("Error in Baseline Generate: %s"%err,sfile="TestStatus.log")
 
 class FakeTest(SystemTestsCommon):
-    '''
+    """
     Inheriters of the FakeTest Class are intended to test the code.
 
     All members of the FakeTest Class must
     have names beginnig with "TEST" this is so that the find_system_test
     in utils.py will work with these classes.
-    '''
+    """
     def _set_script(self, script):
         self._script = script # pylint: disable=attribute-defined-outside-init
 
@@ -419,9 +419,13 @@ def build(self, sharedlib_only=False, model_only=False):
                        sharedlib_only=sharedlib_only, model_only=model_only)
 
 class TESTRUNDIFF(FakeTest):
-    '''
-    This test is intended to be run from scripts_regression_tests.py only
-    '''
+    """
+    You can generate a diff with this test as follows:
+    1) Run the test and generate a baseline
+    2) set TESTRUNDIFF_ALTERNATE environment variable to TRUE
+    3) Re-run the same test from step 1 but do a baseline comparison instead of generation
+      3.a) This should give you a DIFF
+    """
     def build(self, sharedlib_only=False, model_only=False):
         rundir = self._case.get_value("RUNDIR")
         cimeroot = self._case.get_value("CIMEROOT")

diff --git a/utils/python/CIME/test_scheduler.py b/utils/python/CIME/test_scheduler.py
@@ -2,16 +2,19 @@
 A library for scheduling/running through the phases of a set
 of system tests. Supports phase-level parallelism (can make progres
 on multiple system tests at once).
+
+TestScheduler will handle the TestStatus for the 1-time setup
+phases. All other phases need to handle their own status because
+they can be run outside the context of TestScheduler.
 """
 
 import shutil, traceback, stat, glob, threading, time, thread
 from CIME.XML.standard_module_setup import *
 import compare_namelists
 import CIME.utils
 from CIME.utils import append_status
-import wait_for_tests, update_acme_tests
-from wait_for_tests import TEST_PASS_STATUS, TEST_FAIL_STATUS, TEST_PENDING_STATUS, \
-    TEST_STATUS_FILENAME, NAMELIST_FAIL_STATUS, RUN_PHASE, NAMELIST_PHASE
+from test_status import *
+import update_acme_tests
 from CIME.XML.machines import Machines
 from CIME.XML.env_test import EnvTest
 from CIME.XML.files import Files
@@ -20,18 +23,13 @@
 from CIME.case import Case
 import CIME.test_utils
 
-INITIAL_PHASE         = "INIT"
-CREATE_NEWCASE_PHASE  = "CREATE_NEWCASE"
-XML_PHASE             = "XML"
-SETUP_PHASE           = "SETUP"
-SHAREDLIB_BUILD_PHASE = "SHAREDLIB_BUILD"
-MODEL_BUILD_PHASE     = "MODEL_BUILD"
+logger = logging.getLogger(__name__)
+
+# Phases managed by TestScheduler
 PHASES = [INITIAL_PHASE, CREATE_NEWCASE_PHASE, XML_PHASE, SETUP_PHASE,
           NAMELIST_PHASE, SHAREDLIB_BUILD_PHASE, MODEL_BUILD_PHASE, RUN_PHASE] # Order matters
 CONTINUE = [TEST_PASS_STATUS, NAMELIST_FAIL_STATUS]
 
-logger = logging.getLogger(__name__)
-
 ###############################################################################
 class TestScheduler(object):
 ###############################################################################
@@ -610,33 +608,6 @@ def _run_phase(self, test):
 
         return self._shell_cmd_for_phase(test, cmd, RUN_PHASE, from_dir=test_dir)
 
-    ###########################################################################
-    def _update_test_status_file(self, test):
-    ###########################################################################
-        # TODO: The run scripts heavily use the TestStatus file. So we write out
-        # the phases we have taken care of and then let the run scrips go from there
-        # Eventually, it would be nice to have TestStatus management encapsulated
-        # into a single place.
-
-        str_to_write = ""
-        made_it_to_phase = self._get_test_phase(test)
-
-        made_it_to_phase_idx = self._phases.index(made_it_to_phase)
-        for phase in self._phases[0:made_it_to_phase_idx+1]:
-            if "BUILD" not in phase:
-                # the build phase status is always write by the test itself
-                str_to_write += "%s %s %s\n" % (self._get_test_status(test, phase), test, phase)
-
-        if not self._no_run and not self._is_broken(test) and made_it_to_phase == MODEL_BUILD_PHASE:
-            # Ensure PEND state always gets added to TestStatus file if we are
-            # about to run test
-            str_to_write += "%s %s %s\n" % (TEST_PENDING_STATUS, test, RUN_PHASE)
-
-        test_status_file = os.path.join(self._get_test_dir(test), TEST_STATUS_FILENAME)
-
-        with open(test_status_file, "w") as fd:
-            fd.write(str_to_write)
-
     ###########################################################################
     def _run_catch_exceptions(self, test, phase, run):
     ###########################################################################
@@ -672,33 +643,6 @@ def _get_procs_needed(self, test, phase, threads_in_flight=None):
         else:
             return 1
 
-    ###########################################################################
-    def _handle_test_status_file(self, test, test_phase, success):
-    ###########################################################################
-        #
-        # This complexity is due to sharing of TestStatus responsibilities
-        #
-        try:
-            if test_phase != RUN_PHASE and (not success or test_phase == SETUP_PHASE
-                                            or test_phase == self._phases[-1]):
-                self._update_test_status_file(test)
-
-            # If we failed VERY early on in the run phase, it's possible that
-            # the CIME scripts never got a chance to set the state.
-            elif test_phase == RUN_PHASE and not success:
-                test_status_file = os.path.join(self._get_test_dir(test), TEST_STATUS_FILENAME)
-                statuses = wait_for_tests.parse_test_status_file(test_status_file)[0]
-                if RUN_PHASE not in statuses or\
-                   (statuses[RUN_PHASE] in [TEST_PASS_STATUS, TEST_PENDING_STATUS]):
-                    self._update_test_status_file(test)
-
-        except Exception as e:
-            # TODO: What to do here? This failure is very severe because the
-            # only way for test results to be communicated is by the TestStatus
-            # file.
-            logger.critical("VERY BAD! Could not handle TestStatus file '%s': '%s'" %
-                             (os.path.join(self._get_test_dir(test), TEST_STATUS_FILENAME), str(e)))
-            thread.interrupt_main()
 
     ###########################################################################
     def _wait_for_something_to_finish(self, threads_in_flight):
@@ -728,14 +672,18 @@ def _consumer(self, test, test_phase, phase_method):
 
         if status != TEST_PENDING_STATUS:
             self._update_test_status(test, test_phase, status)
-        self._handle_test_status_file(test, test_phase, success)
 
         status_str = "Finished %s for test %s in %f seconds (%s)" %\
                      (test_phase, test, elapsed_time, status)
         if not success:
             status_str += "    Case dir: %s" % self._get_test_dir(test)
         logger.info(status_str)
 
+        if test_phase in [CREATE_NEWCASE_PHASE, XML_PHASE]:
+            # These are the phases for which TestScheduler is reponsible for
+            # updating the TestStatus file
+            append_status("%s %s %s" % (status, test, test_phase))
+
         # On batch systems, we want to immediately submit to the queue, because
         # it's very cheap to submit and will get us a better spot in line
         if (success and not self._no_run and not self._no_batch and test_phase == MODEL_BUILD_PHASE):

diff --git a/utils/python/CIME/test_status.py b/utils/python/CIME/test_status.py
@@ -0,0 +1,174 @@
+"""
+Functions for managing the TestStatus file
+"""
+
+from CIME.XML.standard_module_setup import *
+
+from collections import OrderedDict
+
+TEST_STATUS_FILENAME = "TestStatus"
+
+# The statuses that a phase can be in
+TEST_PENDING_STATUS  = "PEND"
+TEST_PASS_STATUS     = "PASS"
+TEST_FAIL_STATUS     = "FAIL"
+
+ALL_PHASE_STATUSES = [TEST_PENDING_STATUS, TEST_PASS_STATUS, TEST_FAIL_STATUS]
+
+# Special statuses that the overall test can be in
+TEST_DIFF_STATUS     = "DIFF"   # Implies a failure in one of the COMPARE phases
+NAMELIST_FAIL_STATUS = "NLFAIL" # Implies a failure in the NLCOMP phase
+
+# The valid phases
+INITIAL_PHASE         = "INIT"
+CREATE_NEWCASE_PHASE  = "CREATE_NEWCASE"
+XML_PHASE             = "XML"
+SETUP_PHASE           = "SETUP"
+NAMELIST_PHASE        = "NLCOMP"
+SHAREDLIB_BUILD_PHASE = "SHAREDLIB_BUILD"
+MODEL_BUILD_PHASE     = "MODEL_BUILD"
+RUN_PHASE             = "RUN"
+THROUGHPUT_PHASE      = "TPUTCOMP"
+MEMORY_PHASE          = "MEMCOMP"
+COMPARE_PHASE         = "COMPARE"
+
+ALL_PHASES = [INITIAL_PHASE,
+              CREATE_NEWCASE_PHASE,
+              XML_PHASE,
+              SETUP_PHASE,
+              NAMELIST_PHASE,
+              SHAREDLIB_BUILD_PHASE,
+              MODEL_BUILD_PHASE,
+              RUN_PHASE,
+              THROUGHPUT_PHASE,
+              MEMORY_PHASE,
+              COMPARE_PHASE]
+
+MULTI_PHASES = [COMPARE_PHASE]
+
+class TestStatus(object):
+
+    def __init__(self, test_dir=os.getcwd()):
+        self._filename = os.path.join(test_dir, TEST_STATUS_FILENAME)
+        self._phase_statuses = OrderedDict() # {name -> (status, comments)}
+        self._test_name = None
+
+        if os.path.exists(self._filename):
+            self._parse_test_status_file()
+
+    def _parse_test_status(self, file_contents):
+        """
+        >>> ts = TestStatus()
+        >>> contents = '''
+        ... CREATE_NEWCASE ERS.foo.A PASS
+        ... XML_PHASE      ERS.foo.A PASS
+        ... SETUP_PHASE    ERS.foo.A FAIL
+        ... '''
+        >>> ts._parse_test_status(contents)
+        >>> ts._phase_statuses
+        '{stuff}'
+        """
+        for line in file_contents.splitlines():
+            line = line.strip()
+            tokens = line.split()
+            if line == "":
+                pass # skip blank lines
+            elif len(tokens) >= 3:
+                status, curr_test_name, phase = tokens[:3]
+                if (self._test_name is None):
+                    self._test_name = curr_test_name
+                else:
+                    expect(self._test_name == curr_test_name, "inconsistent test name in parse_test_status: '%s' != '%s'"%(self._test_name, curr_test_name))
+
+                expect(status in ALL_PHASE_STATUSES,
+                       "Unexpected status '%s' in parse_test_status for test '%s'" % (status, self._test_name))
+                expect(phase in ALL_PHASES,
+                       "phase '%s' not expected in parse_test_status for test '%s'" % (phase, self._test_name))
+
+                if (phase in rv):
+                    # Phase names don't matter here, just need something unique
+                    rv[phase] = reduce_stati({"%s_" % phase : status, phase : rv[phase]})
+                else:
+                    rv[phase] = status
+            else:
+                logging.warning("In TestStatus file for test '%s', line '%s' not in expected format" % (self._test_name, line))
+
+    def _parse_test_status_file(self):
+        with open(self._filename, "r") as fd:
+            self._parse_test_status(fd.read())
+
+
+def reduce_stati(stati, wait_for_run=False, check_throughput=False, check_memory=False, ignore_namelists=False):
+    """
+    Given a collection of stati for a test, produce a single result. Preference
+    is given to unfinished stati since we don't want to stop waiting for a test
+    that hasn't finished. Namelist diffs are given the lowest precedence.
+    """
+    rv = TEST_PASS_STATUS
+    run_phase_found = False
+    for phase, status in stati.iteritems():
+        if phase == RUN_PHASE:
+            run_phase_found = True
+
+        if (status == TEST_PENDING_STATUS):
+            return status
+
+        elif (status != TEST_PASS_STATUS):
+            if ( (not check_throughput and THROUGHPUT_TEST_STR in phase) or
+                 (not check_memory and MEMORY_TEST_STR in phase) or
+                 (ignore_namelists and phase == NAMELIST_PHASE) ):
+                continue
+
+            if (status == NAMELIST_FAIL_STATUS):
+                if (rv == TEST_PASS_STATUS):
+                    rv = NAMELIST_FAIL_STATUS
+
+            elif (rv in [NAMELIST_FAIL_STATUS, TEST_PASS_STATUS] and phase == HIST_COMPARE_PHASE):
+                rv = TEST_DIFF_STATUS
+
+            else:
+                rv = status
+
+    # The test did not fail but the RUN phase was not found, so if the user requested
+    # that we wait for the RUN phase, then the test must still be considered pending.
+    if rv != TEST_FAIL_STATUS and not run_phase_found and wait_for_run:
+        rv = TEST_PENDING_STATUS
+
+    return rv
+
+
+def interpret_status(file_contents, wait_for_run=False, check_throughput=False, check_memory=False, ignore_namelists=False):
+    r"""
+    >>> interpret_status('PASS testname RUN')
+    ('testname', 'PASS')
+    >>> interpret_status('PASS testname SHAREDLIB_BUILD\nPEND testname RUN')
+    ('testname', 'PEND')
+    >>> interpret_status('FAIL testname MODEL_BUILD\nPEND testname RUN')
+    ('testname', 'PEND')
+    >>> interpret_status('PASS testname MODEL_BUILD\nPASS testname RUN')
+    ('testname', 'PASS')
+    >>> interpret_status('PASS testname RUN\nFAIL testname tputcomp')
+    ('testname', 'PASS')
+    >>> interpret_status('PASS testname RUN\nFAIL testname tputcomp', check_throughput=True)
+    ('testname', 'FAIL')
+    >>> interpret_status('PASS testname RUN\nNLFAIL testname nlcomp')
+    ('testname', 'NLFAIL')
+    >>> interpret_status('PASS testname RUN\nFAIL testname memleak')
+    ('testname', 'FAIL')
+    >>> interpret_status('PASS testname RUN\nNLFAIL testname nlcomp', ignore_namelists=True)
+    ('testname', 'PASS')
+    >>> interpret_status('PASS testname compare\nNLFAIL testname nlcomp\nFAIL testname compare\nPASS testname RUN')
+    ('testname', 'DIFF')
+    >>> interpret_status('PASS testname MODEL_BUILD')
+    ('testname', 'PASS')
+    >>> interpret_status('PASS testname MODEL_BUILD', wait_for_run=True)
+    ('testname', 'PEND')
+    """
+    statuses, test_name = parse_test_status(file_contents)
+    reduced_status = reduce_stati(statuses, wait_for_run, check_throughput, check_memory, ignore_namelists)
+
+    return test_name, reduced_status
+
+def interpret_status_file(file_name, wait_for_run=False, check_throughput=False, check_memory=False, ignore_namelists=False):
+    with open(file_name, "r") as fd:
+        return interpret_status(fd.read(), wait_for_run, check_throughput, check_memory, ignore_namelists)
diff --git a/utils/python/CIME/utils.py b/utils/python/CIME/utils.py
@@ -875,13 +875,13 @@ def touch(fname):
         open(fname, 'a').close()
 
 def find_system_test(testname, case):
-    '''
+    """
     Find and import the test matching testname
     Look through the paths set in config_files.xml variable SYSTEM_TESTS_DIR
     for components used in this case to find a test matching testname.  Add the
     path to that directory to sys.path if its not there and return the test object
     Fail if the test is not found in any of the paths.
-    '''
+    """
     system_test_path = None
     if testname.startswith("TEST"):
         system_test_path =  "CIME.SystemTests.system_tests_common.%s"%(testname)
@@ -913,8 +913,3 @@ def find_system_test(testname, case):
     mod = import_module(path)
     return getattr(mod, m)
 
-
-
-
-
-