Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changes needed to support jenkins testing on cori-knl #1432

Merged
merged 3 commits into from
Apr 24, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cime/config/acme/machines/config_machines.xml
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@
<TESTS>acme_developer</TESTS>
<COMPILERS>intel,gnu,cray</COMPILERS>
<MPILIBS>mpt,mpi-serial</MPILIBS>
<CIME_OUTPUT_ROOT>$ENV{SCRATCH}/acme_scratch</CIME_OUTPUT_ROOT>
<CIME_OUTPUT_ROOT>$ENV{SCRATCH}/acme_scratch/cori-haswell</CIME_OUTPUT_ROOT>
<RUNDIR>$CIME_OUTPUT_ROOT/$CASE/run</RUNDIR>
<EXEROOT>$CIME_OUTPUT_ROOT/$CASE/bld</EXEROOT>
<DIN_LOC_ROOT>/project/projectdirs/acme/inputdata</DIN_LOC_ROOT>
Expand Down Expand Up @@ -321,7 +321,7 @@
<TESTS>acme_developer</TESTS>
<COMPILERS>intel,gnu,cray</COMPILERS>
<MPILIBS>mpt,mpi-serial</MPILIBS>
<CIME_OUTPUT_ROOT>$ENV{SCRATCH}/acme_scratch</CIME_OUTPUT_ROOT>
<CIME_OUTPUT_ROOT>$ENV{SCRATCH}/acme_scratch/cori-knl</CIME_OUTPUT_ROOT>
<RUNDIR>$CIME_OUTPUT_ROOT/$CASE/run</RUNDIR>
<EXEROOT>$CIME_OUTPUT_ROOT/$CASE/bld</EXEROOT>
<DIN_LOC_ROOT>/project/projectdirs/acme/inputdata</DIN_LOC_ROOT>
Expand Down
44 changes: 30 additions & 14 deletions cime/scripts/Tools/jenkins_generic_job
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@ from jenkins_generic_job import jenkins_generic_job
# Don't know if this belongs here longterm
MACHINES_THAT_MAINTAIN_BASELINES = ("redsky", "melvin", "skybridge")

_MACHINE = Machines()

###############################################################################
def parse_command_line(args, description):
###############################################################################
Expand All @@ -43,18 +41,13 @@ description=description,
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)

machine = _MACHINE.get_machine_name()
default_test_suite = _MACHINE.get_value("TESTS")
default_maintain_baselines = machine in MACHINES_THAT_MAINTAIN_BASELINES
default_scratch_root = _MACHINE.get_value("CIME_OUTPUT_ROOT")

CIME.utils.setup_standard_logging_options(parser)

parser.add_argument("-g", "--generate-baselines", action="store_true",
help="Generate baselines")

parser.add_argument("--baseline-compare", action="store", choices=("yes", "no"), default=("yes" if default_maintain_baselines else "no"),
help="Do baseline comparisons")
parser.add_argument("--baseline-compare",
help="Do baseline comparisons (yes/no)")

parser.add_argument("--submit-to-cdash", action="store_true",
help="Send results to CDash")
Expand All @@ -71,10 +64,10 @@ formatter_class=argparse.ArgumentDefaultsHelpFormatter
parser.add_argument("-b", "--baseline-name", default=CIME.utils.get_current_branch(repo=CIME.utils.get_cime_root()),
help="Baseline name for baselines to use. Also impacts dashboard job name. Useful for testing a branch other than next or master")

parser.add_argument("-t", "--test-suite", default=default_test_suite,
parser.add_argument("-t", "--test-suite",
help="Override default acme test suite that will be run")

parser.add_argument("-r", "--scratch-root", default=default_scratch_root,
parser.add_argument("-r", "--scratch-root",
help="Override default acme scratch root. Use this to avoid conflicting with other jenkins jobs")

parser.add_argument("--cdash-build-group", default=CIME.wait_for_tests.CDASH_DEFAULT_BUILD_GROUP,
Expand All @@ -87,6 +80,17 @@ formatter_class=argparse.ArgumentDefaultsHelpFormatter
parser.add_argument("--walltime",
help="Force a specific walltime for all tests.")

parser.add_argument("-m", "--machine",
help="The machine for which to build tests, this machine must be defined"
" in the config_machines.xml file for the given model. "
"Default is to match the name of the machine in the test name or "
"the name of the machine this script is run on to the "
"NODENAME_REGEX field in config_machines.xml. This option is highly "
"unsafe and should only be used if you know what you're doing.")

parser.add_argument("--compiler",
help="Compiler to use to build cime. Default will be the default defined for the machine.")

args = parser.parse_args(args[1:])

CIME.utils.handle_standard_logging_options(args)
Expand All @@ -97,9 +101,21 @@ formatter_class=argparse.ArgumentDefaultsHelpFormatter
"Does not make sense to use --cdash-build-name without --submit-to-cdash")
expect(not (args.cdash_project is not CIME.wait_for_tests.ACME_MAIN_CDASH and not args.submit_to_cdash),
"Does not make sense to use --cdash-project without --submit-to-cdash")
expect (args.baseline_compare in [None, "yes", "no"],
"Valid args for --baseline-compare are 'yes' or 'no'")

machine = Machines(machine=args.machine)
machine_name = machine.get_machine_name()

args.machine = machine
args.test_suite = machine.get_value("TESTS") if args.test_suite is None else args.test_suite
default_maintain_baselines = machine_name in MACHINES_THAT_MAINTAIN_BASELINES
args.baseline_compare = default_maintain_baselines if args.baseline_compare is None else args.baseline_compare == "yes"
args.scratch_root = machine.get_value("CIME_OUTPUT_ROOT") if args.scratch_root is None else args.scratch_root
args.compiler = machine.get_default_compiler() if args.compiler is None else args.compiler

return args.generate_baselines, args.submit_to_cdash, args.no_batch, args.baseline_name, args.cdash_build_name, \
args.cdash_project, args.test_suite, args.cdash_build_group, args.baseline_compare, args.scratch_root, args.parallel_jobs, args.walltime
args.cdash_project, args.test_suite, args.cdash_build_group, args.baseline_compare, args.scratch_root, args.parallel_jobs, args.walltime, args.machine, args.compiler

###############################################################################
def _main_func(description):
Expand All @@ -108,10 +124,10 @@ def _main_func(description):
test_results = doctest.testmod(verbose=True)
sys.exit(1 if test_results.failed > 0 else 0)

generate_baselines, submit_to_cdash, no_batch, cdash_build_name, cdash_project, baseline_branch, test_suite, cdash_build_group, no_baseline_compare, scratch_root, parallel_jobs, walltime = \
generate_baselines, submit_to_cdash, no_batch, cdash_build_name, cdash_project, baseline_branch, test_suite, cdash_build_group, no_baseline_compare, scratch_root, parallel_jobs, walltime, machine, compiler = \
parse_command_line(sys.argv, description)

sys.exit(0 if jenkins_generic_job(generate_baselines, submit_to_cdash, no_batch, cdash_build_name, cdash_project, baseline_branch, test_suite, cdash_build_group, no_baseline_compare, scratch_root, parallel_jobs, walltime)
sys.exit(0 if jenkins_generic_job(generate_baselines, submit_to_cdash, no_batch, cdash_build_name, cdash_project, baseline_branch, test_suite, cdash_build_group, no_baseline_compare, scratch_root, parallel_jobs, walltime, machine, compiler)
else CIME.utils.TESTS_FAILED_ERR_CODE)

###############################################################################
Expand Down
8 changes: 8 additions & 0 deletions cime/scripts/lib/CIME/aprun.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,17 @@ def _get_aprun_cmd_for_case_impl(ntasks, nthreads, rootpes, pstrids,
>>> run_exe = "acme.exe"
>>> _get_aprun_cmd_for_case_impl(ntasks, nthreads, rootpes, pstrids, max_tasks_per_node, pes_per_node, pio_numtasks, pio_async_interface, compiler, machine, run_exe)
('aprun -S 4 -n 680 -N 8 -d 2 acme.exe : -S 2 -n 128 -N 4 -d 4 acme.exe ', 117)

>>> compiler = "intel"
>>> _get_aprun_cmd_for_case_impl(ntasks, nthreads, rootpes, pstrids, max_tasks_per_node, pes_per_node, pio_numtasks, pio_async_interface, compiler, machine, run_exe)
('aprun -S 4 -cc numa_node -n 680 -N 8 -d 2 acme.exe : -S 2 -cc numa_node -n 128 -N 4 -d 4 acme.exe ', 117)

>>> ntasks = [64, 64, 64, 64, 64, 64, 64, 64, 1]
>>> nthreads = [1, 1, 1, 1, 1, 1, 1, 1, 1]
>>> rootpes = [0, 0, 0, 0, 0, 0, 0, 0, 0]
>>> pstrids = [1, 1, 1, 1, 1, 1, 1, 1, 1]
>>> _get_aprun_cmd_for_case_impl(ntasks, nthreads, rootpes, pstrids, max_tasks_per_node, pes_per_node, pio_numtasks, pio_async_interface, compiler, machine, run_exe)
('aprun -S 8 -cc numa_node -n 64 -N 16 -d 1 acme.exe ', 4)
"""
max_tasks_per_node = 1 if max_tasks_per_node < 1 else max_tasks_per_node

Expand Down
21 changes: 9 additions & 12 deletions cime/scripts/lib/jenkins_generic_job.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
import CIME.wait_for_tests
from CIME.utils import expect
from CIME.XML.machines import Machines

import os, shutil, glob, signal, logging

_MACHINE = Machines()

###############################################################################
def cleanup_queue(set_of_jobs_we_created):
###############################################################################
Expand All @@ -27,20 +24,20 @@ def jenkins_generic_job(generate_baselines, submit_to_cdash, no_batch,
arg_cdash_build_name, cdash_project,
arg_test_suite,
cdash_build_group, baseline_compare,
scratch_root, parallel_jobs, walltime):
scratch_root, parallel_jobs, walltime,
machine, compiler):
###############################################################################
"""
Return True if all tests passed
"""
use_batch = _MACHINE.has_batch_system() and not no_batch
compiler = _MACHINE.get_default_compiler()
test_suite = _MACHINE.get_value("TESTS")
proxy = _MACHINE.get_value("PROXY")
use_batch = machine.has_batch_system() and not no_batch
test_suite = machine.get_value("TESTS")
proxy = machine.get_value("PROXY")
test_suite = test_suite if arg_test_suite is None else arg_test_suite
test_root = os.path.join(scratch_root, "jenkins")

if (use_batch):
batch_system = _MACHINE.get_value("BATCH_SYSTEM")
batch_system = machine.get_value("BATCH_SYSTEM")
expect(batch_system is not None, "Bad XML. Batch machine has no batch_system configuration.")

#
Expand Down Expand Up @@ -101,16 +98,16 @@ def jenkins_generic_job(generate_baselines, submit_to_cdash, no_batch,
baseline_args = ""
if (generate_baselines):
baseline_args = "-g -b %s" % baseline_name
elif (baseline_compare == "yes"):
elif (baseline_compare):
baseline_args = "-c -b %s" % baseline_name

batch_args = "--no-batch" if no_batch else ""
pjob_arg = "" if parallel_jobs is None else "-j %d" % parallel_jobs
walltime_arg = "" if walltime is None else " --walltime %s" % walltime

test_id = "%s_%s" % (test_id_root, CIME.utils.get_timestamp())
create_test_cmd = "./create_test %s --test-root %s -t %s %s %s %s %s" % \
(test_suite, test_root, test_id, baseline_args, batch_args, pjob_arg, walltime_arg)
create_test_cmd = "./create_test %s --test-root %s -t %s --machine %s --compiler %s %s %s %s %s" % \
(test_suite, test_root, test_id, machine.get_machine_name(), compiler, baseline_args, batch_args, pjob_arg, walltime_arg)

if (not CIME.wait_for_tests.SIGNAL_RECEIVED):
create_test_stat = CIME.utils.run_cmd(create_test_cmd, from_dir=CIME.utils.get_scripts_root(),
Expand Down