Skip to content

Commit

Permalink
Merge branch 'jgfouca/scripts-acme/create_test_impl_parallel' into ne…
Browse files Browse the repository at this point in the history
…xt (PR #426)

create_test: Implement full parallelism

Create_test will now automatically parallelize processing of
test cases up to MAX_TASKS_PER_NODE.

[BFB]
  • Loading branch information
Jeffrey Johnson committed Oct 31, 2015
2 parents c89680f + 7f133b6 commit 357bb6b
Show file tree
Hide file tree
Showing 6 changed files with 264 additions and 125 deletions.
2 changes: 1 addition & 1 deletion cime/machines-acme/config_machines.xml
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@
<BATCHREDIRECT></BATCHREDIRECT>
<SUPPORTED_BY>jgfouca at sandia dot gov</SUPPORTED_BY>
<GMAKE_J>4</GMAKE_J>
<MAX_TASKS_PER_NODE>8</MAX_TASKS_PER_NODE>
<MAX_TASKS_PER_NODE>16</MAX_TASKS_PER_NODE>
<PIO_BUFFER_SIZE_LIMIT>1</PIO_BUFFER_SIZE_LIMIT>
<PROJECT_REQUIRED>TRUE</PROJECT_REQUIRED>
<batch_system type="slurm" version="x.y">
Expand Down
6 changes: 3 additions & 3 deletions cime/scripts-acme/bless_test_results
Original file line number Diff line number Diff line change
Expand Up @@ -148,16 +148,16 @@ def bless_test_results(baseline_name, test_root, compiler, namelists_only=False,
expect(create_test_impl.NAMELIST_PHASE in test_result,
"Test '%s' had no namelist phase" % test_name)

run_phase_pass = test_result[wait_for_tests.RUN_PHASE] == wait_for_tests.TEST_PASSED_STATUS
nl_pass = test_result[create_test_impl.NAMELIST_PHASE] == wait_for_tests.TEST_PASSED_STATUS
run_phase_pass = test_result[wait_for_tests.RUN_PHASE] == wait_for_tests.TEST_PASS_STATUS
nl_pass = test_result[create_test_impl.NAMELIST_PHASE] == wait_for_tests.TEST_PASS_STATUS

if (not run_phase_pass):
warning("Test '%s' did not run successfully, it is not safe to bless results" % test_name)
time.sleep(2)
else:
expect(wait_for_tests.HIST_COMPARE_PHASE in test_result,
"Test '%s' had no history compare phase" % test_name)
hist_pass = test_result[wait_for_tests.HIST_COMPARE_PHASE] == wait_for_tests.TEST_PASSED_STATUS
hist_pass = test_result[wait_for_tests.HIST_COMPARE_PHASE] == wait_for_tests.TEST_PASS_STATUS

if ( (nl_pass and hist_pass) or (nl_pass and namelists_only) or (hist_pass and hist_only) ):
print "Nothing to bless for test:", test_name, " overall status:", overall_result
Expand Down
20 changes: 15 additions & 5 deletions cime/scripts-acme/create_test
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,10 @@ formatter_class=argparse.ArgumentDefaultsHelpFormatter
"If no testid is specified, then a time stamp will be"
"used.")

parser.add_argument("-j", "--parallel-jobs", type=int, default=None,
help="Number of tasks create_test should perform simultaneously. Default "
"will be min(num_cores, num_tests).")

parser.add_argument("--old", action="store_true", help="Use CIME Perl impl")

args = parser.parse_args(args[1:])
Expand All @@ -123,6 +127,9 @@ formatter_class=argparse.ArgumentDefaultsHelpFormatter
"Provided baseline name but did not specify compare or generate")
expect(not (args.namelists_only and not (args.generate or args.compare)),
"Must provide either --compare or --generate with --namelists-only")
if (args.parallel_jobs is not None):
expect(args.parallel_jobs > 0,
"Invalid value for parallel_jobs: %d" % args.parallel_jobs)

if (args.no_build):
args.no_run = True
Expand Down Expand Up @@ -163,7 +170,7 @@ formatter_class=argparse.ArgumentDefaultsHelpFormatter
args.test_id = acme_util.get_utc_timestamp()

return args.testargs, args.compiler, args.no_run, args.no_build, args.no_batch, args.test_root, args.baseline_root, \
args.clean, args.compare, args.generate, args.baseline_name, args.namelists_only, args.project, args.test_id, args.old
args.clean, args.compare, args.generate, args.baseline_name, args.namelists_only, args.project, args.test_id, args.old, args.parallel_jobs

###############################################################################
def get_tests_from_args(testargs, machine, compiler):
Expand Down Expand Up @@ -205,12 +212,15 @@ def get_tests_from_args(testargs, machine, compiler):
###############################################################################
def create_test(testargs, compiler, no_run, no_build, no_batch, test_root,
baseline_root, clean, compare, generate,
baseline_name, namelists_only, project, test_id, old):
baseline_name, namelists_only, project, test_id, old, parallel_jobs):
###############################################################################
machine = acme_util.probe_machine_name()

tests_to_run = get_tests_from_args(testargs, machine, compiler)

if (parallel_jobs is None):
parallel_jobs = min(len(tests_to_run), int(acme_util.get_machine_info("MAX_TASKS_PER_NODE")))

expect(len(tests_to_run) > 0, "No tests to run")

if (not old):
Expand All @@ -220,7 +230,7 @@ def create_test(testargs, compiler, no_run, no_build, no_batch, test_root,
baseline_root, baseline_name,
clean,
compare, generate, namelists_only,
project)
project, parallel_jobs)
return 0 if impl.create_test() else 1
else:

Expand Down Expand Up @@ -274,11 +284,11 @@ def _main_func(description):
acme_util.stop_buffering_output()

testargs, compiler, no_run, no_build, no_batch, test_root, baseline_root, clean, \
compare, generate, baseline_name, namelists_only, project, test_id, old = \
compare, generate, baseline_name, namelists_only, project, test_id, old, parallel_jobs = \
parse_command_line(sys.argv, description)

sys.exit(create_test(testargs, compiler, no_run, no_build, no_batch, test_root, baseline_root, clean,
compare, generate, baseline_name, namelists_only, project, test_id, old))
compare, generate, baseline_name, namelists_only, project, test_id, old, parallel_jobs))

###############################################################################

Expand Down
Loading

0 comments on commit 357bb6b

Please sign in to comment.