Skip to content

Commit

Permalink
Merge pull request #1606 from ACME-Climate/sarats/machinefiles/summit…
Browse files Browse the repository at this point in the history
…dev (# 1606

Added Summitdev configuration files for IBM and PGI compilers.

    Added specific machine configuration and modules info for both compilers in config_machines.xml.
    Added appropriate compiler options for IBM and PGI specific to Summitdev in config_compilers.xml.
    Incorporates fix for proper loading of lmod modules.
    Added batch queue, LSF info specific to Summitdev in config_batch.xml.
    Commented out option " a {{ poe }}" from generic lsf section in config_batch.xml. This should be moved to a specific machine section as needed.

[BFB]

P2-23
  • Loading branch information
minxu74 authored Jul 6, 2017
2 parents 891df3e + b5c8b93 commit 93c74b6
Show file tree
Hide file tree
Showing 5 changed files with 221 additions and 3 deletions.
68 changes: 68 additions & 0 deletions cime_config/acme/machines/Depends.summitdev.ibm
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# File copied from Depends.mira which also uses IBM compilers.
#
## These routines have problems with stacksize when omp is invoked add -qsmallstack to resolve
SSOBJS = mo_sethet.o mo_drydep.o time_management.o

$(SSOBJS): %.o: %.F90
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -qsmallstack $<

QSMPFLAGS:=
ifeq ($(compile_threaded), true)
QSMPFLAGS += -qsmp=noauto:noomp
endif
shr_reprosum_mod.o: shr_reprosum_mod.F90
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) $(QSMPFLAGS) $<

# These routines benefit from -qnostrict without violating the bfb test
PERFOBJS=\
prim_advection_mod_base.o \
vertremap_mod_base.o \
edge_mod_base.o \
derivative_mod_base.o \
bndry_mod_base.o \
prim_advance_mod.o \
uwshcu.o \
wetdep.o


ifeq ($(DEBUG),FALSE)
$(PERFOBJS): %.o: %.F90
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -qnostrict $<
#Model crashes if these files are compiled with O3(default) optimizations
seasalt_model.o: seasalt_model.F90
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O2 $<
linoz_data.o: linoz_data.F90
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O2 $<
endif

### These files take long time to compile with default optimization flags.
### Reducing optimization gives <1min build-times and little impact on model run time.
### begin
## atm files taking more than a minute to compile
# this takes 9 mins to compile at default -O3 level
buffer.o: buffer.F90
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O0 $<

## lnd files taking more than a minute to compile
# this takes 4 mins to compile with -O3 -qsmp=omp
BiogeophysRestMod.o: BiogeophysRestMod.F90
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O0 -qsmp=noopt $<

# this takes 17 mins to compile with -O3 -qsmp=omp
CNrestMod.o: CNrestMod.F90
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O0 -qsmp=noopt $<

# this takes 4 mins to compile with -qsmp=omp
clmtypeInitMod.o: clmtypeInitMod.F90
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -qsmp=noopt $<

# this takes 2 mins to compile with -qsmp=omp
clmtype.o: clmtype.F90
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -qsmp=noopt $<

# disable inlining (some issues with pure functions therein)
advance_xm_wpxp_module.o: advance_xm_wpxp_module.F90
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -Q! $<
advance_wp2_wp3_module.o: advance_wp2_wp3_module.F90
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -Q! $<
### end
22 changes: 20 additions & 2 deletions config/acme/machines/config_batch.xml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@
</submit_args>
</batch_system>

<batch_system type="lsf" version="9.1">
<batch_system type="lsf" version="10.1">
<batch_query args=" -w" >bjobs</batch_query>
<batch_submit>bsub</batch_submit>
<batch_redirect>&lt;</batch_redirect>
Expand All @@ -82,7 +82,9 @@
<directive > -n {{ total_tasks }} </directive>
<directive > -R "span[ptile={{ tasks_per_node }}]"</directive>
<directive > -N </directive>
<directive default="poe" > -a {{ poe }} </directive>
<!-- The following option causes problems with lsf version on Summitdev.
If desired, this should be in specific machine section. -->
<!-- <directive default="poe" > -a {{ poe }} </directive> -->
<directive default="acme.stdout" > -o {{ output_error_path }}.%J </directive>
<directive default="acme.stderr" > -e {{ output_error_path }}.%J </directive>
<directive > -J {{ job_id }} </directive>
Expand Down Expand Up @@ -393,13 +395,29 @@
<directives>
<directive>-A {{ project }}</directive>
<directive>-l nodes={{ num_nodes }}</directive>
<directive>-env "all"</directive>
</directives>
<queues>
<queue walltimemax="02:00:00" jobmin="0" jobmax="299008" default="true">batch</queue>
<queue walltimemax="01:00:00" jobmin="0" jobmax="299008" strict="true">debug</queue>
</queues>
</batch_system>

<batch_system MACH="summitdev" type="lsf" >
<directives>
<directive>-P {{ project }}</directive>
</directives>
<queues>
<queue walltimemax="01:00" jobmin="0" jobmax="8640" default="true">batch</queue>
<!--
jobmax = 54nodes*20cores*16th = 8640
Nodes Max Walltime
<=4 4 hours
>4 1 hour
-->
</queues>
</batch_system>

<batch_system MACH="lawrencium-lr2" type="slurm" >
<submit_args>
<arg flag="--qos" name="condo_esd2"/>
Expand Down
26 changes: 26 additions & 0 deletions config/acme/machines/config_compilers.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1026,6 +1026,32 @@ for mct, etc.
<ALBANY_PATH>/projects/ccsm/libs/AlbanyTrilinos/Albany/build/install</ALBANY_PATH>
</compiler>

<compiler COMPILER="ibm" MACH="summitdev">
<SFC> xlf_r </SFC>
<MPIFC> mpixlf </MPIFC>
<SCC> xlc_r </SCC>
<MPICC> mpixlc </MPICC>
<ADD_FFLAGS> -qzerosize -qfree=f90 -qxlf2003=polymorphic</ADD_FFLAGS>
<ADD_FFLAGS_NOOPT> -O0 -g -qfree=f90 </ADD_FFLAGS_NOOPT>
<CONFIG_ARGS> --host=Linux </CONFIG_ARGS>
<PIO_FILESYSTEM_HINTS>lustre</PIO_FILESYSTEM_HINTS>
<ADD_SLIBS>-L$(NETCDF_C_PATH)/lib -lnetcdf -L$(NETCDF_FORTRAN_PATH)/lib -lnetcdff -L$(HDF5_PATH)/lib -lhdf5_hl -lhdf5 -L$(ESSL_PATH)/lib64 -lessl -L$(NETLIB_LAPACK_PATH)/lib64 -llapack</ADD_SLIBS>
</compiler>

<compiler COMPILER="pgi" MACH="summitdev">
<ADD_CFLAGS DEBUG="FALSE"> -O2 </ADD_CFLAGS>
<ADD_FFLAGS DEBUG="FALSE"> -O2 </ADD_FFLAGS>
<NETCDF_PATH>$(NETCDFROOT)</NETCDF_PATH>
<PNETCDF_PATH>$(PNETCDFROOT)</PNETCDF_PATH>
<CONFIG_ARGS> --host=Linux </CONFIG_ARGS>
<PIO_FILESYSTEM_HINTS>lustre</PIO_FILESYSTEM_HINTS>
<ADD_LDFLAGS> $(shell nc-config --libs) $(shell nf-config --flibs) -L$OLCF_NETLIB_LAPACK_ROOT/lib -lblas -llapack </ADD_LDFLAGS>
<ADD_SLIBS> $(shell nf-config --flibs) </ADD_SLIBS>
<MPIFC> mpif90 </MPIFC>
<MPICC> mpicc </MPICC>
<MPICXX> mpiCC </MPICXX>
</compiler>


<compiler COMPILER="ibm" MACH="cetus">
<SFC> mpixlf2003_r </SFC>
Expand Down
104 changes: 104 additions & 0 deletions config/acme/machines/config_machines.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2554,6 +2554,110 @@
<module_system type="none"/>
</machine>

<machine MACH="summitdev">
<DESC>ORNL pre-Summit testbed. Node: 2x POWER8 + 4x Tesla P100, 20 cores/node, 8 HW threads/core.</DESC>
<NODENAME_REGEX>summitdev-*</NODENAME_REGEX>
<NODE_FAIL_REGEX>Received node event ec_node</NODE_FAIL_REGEX>
<TESTS>acme_developer</TESTS>
<COMPILERS>ibm,pgi</COMPILERS>
<MPILIBS>openmpi,mpi_serial</MPILIBS>
<CIME_OUTPUT_ROOT>$ENV{HOME}/acme_scratch/$PROJECT</CIME_OUTPUT_ROOT>
<RUNDIR>/lustre/atlas/scratch/$ENV{USER}/$PROJECT/$CASE/run</RUNDIR>
<EXEROOT>$CIME_OUTPUT_ROOT/$CASE/bld</EXEROOT>
<DIN_LOC_ROOT>/lustre/atlas1/cli900/world-shared/cesm/inputdata</DIN_LOC_ROOT>
<DIN_LOC_ROOT_CLMFORC>/lustre/atlas1/cli900/world-shared/cesm/inputdata/atm/datm7</DIN_LOC_ROOT_CLMFORC>
<DOUT_S_ROOT>/lustre/atlas/scratch/$ENV{USER}/$PROJECT/archive/$CASE</DOUT_S_ROOT>
<DOUT_L_MSROOT>csm/$CASE</DOUT_L_MSROOT>
<BASELINE_ROOT>/lustre/atlas1/cli900/world-shared/cesm/baselines</BASELINE_ROOT>
<CCSM_CPRNC>/lustre/atlas1/cli900/world-shared/cesm/tools/cprnc/cprnc</CCSM_CPRNC>
<SAVE_TIMING_DIR>/lustre/atlas/proj-shared/$PROJECT</SAVE_TIMING_DIR>
<OS>LINUX</OS>
<BATCH_SYSTEM>lsf</BATCH_SYSTEM>
<SUPPORTED_BY>acme</SUPPORTED_BY>
<GMAKE_J>32</GMAKE_J>
<PES_PER_NODE>20</PES_PER_NODE>
<MAX_TASKS_PER_NODE>160</MAX_TASKS_PER_NODE>
<PROJECT_REQUIRED>TRUE</PROJECT_REQUIRED>
<PROJECT>csc190</PROJECT>
<PIO_CONFIG_OPTS> -D PIO_BUILD_TIMING:BOOL=ON </PIO_CONFIG_OPTS>

<mpirun mpilib="openmpi">
<executable args="default">mpirun</executable>
<arguments>
<arg name="num_tasks" > -np $TOTALPES</arg>
<arg name="binding_core"> --map-by core:PE=$ENV{OMP_NUM_THREADS} --bind-to core </arg>
<arg name="thread_count"> -x OMP_NUM_THREADS=$ENV{OMP_NUM_THREADS}</arg>
<arg name="show-binding"> --report-bindings </arg>
<arg name="show-processmap"> --display-map </arg>
<!--
<arg name="tasks_per_node"> map-by ppr:$PES_PER_NODE:node</arg>
-->
</arguments>
</mpirun>

<module_system type="module_lmod">
<!-- list of init_path elements, one per supported language e.g. sh, perl, python-->
<init_path lang="sh">/sw/summitdev/lmod/7.4.0/rhel7.2_gnu4.8.5/lmod/7.4/init/sh</init_path>
<init_path lang="csh">/sw/summitdev/lmod/7.4.0/rhel7.2_gnu4.8.5/lmod/7.4/init/csh</init_path>
<init_path lang="python">/sw/summitdev/lmod/7.4.0/rhel7.2_gnu4.8.5/lmod/7.4/init/env_modules_python.py</init_path>
<init_path lang="perl">/sw/summitdev/lmod/7.4.0/rhel7.2_gnu4.8.5/lmod/7.4/init/perl</init_path>
<!-- list of cmd_path elements, one for every supported language, e.g. sh, perl, python -->
<cmd_path lang="perl">module</cmd_path>
<cmd_path lang="python">module</cmd_path>
<cmd_path lang="sh">module</cmd_path>
<cmd_path lang="csh">module</cmd_path>

<!-- Always execute -->
<modules>
<command name="ls"/>
<command name="purge"/>
<command name="ls"/>
<command name="load">DefApps</command>
<command name="load">python/3.5.2</command>
<command name="load">subversion/1.9.3</command>
<command name="load">git/2.13.0</command>
<command name="load">cmake/3.6.1</command>
<command name="load">essl/5.5.0-20161110</command>
<command name="load">netlib-lapack/3.6.1</command>
</modules>
<!-- List of modules elements, executing commands if compiler and mpilib condition applies -->
<modules compiler="pgi">
<command name="rm">xl</command>
<command name="load">pgi/17.4</command>
<command name="load">netcdf-fortran/4.4.4</command>
<command name="ls"/>
</modules>
<modules compiler="ibm">
<command name="rm">pgi</command>
<command name="load">xl/20161123</command>
<command name="ls"/>
</modules>

<!-- mpi lib settings -->
<modules mpilib="mpi-serial">
<command name="load">netcdf/4.4.1</command>
</modules>
<modules mpilib="!mpi-serial">
<command name="load">spectrum_mpi/10.1.0.2-20161221</command>
<command name="load">netcdf/4.4.1</command>
<command name="load">parallel-netcdf/1.7.0</command>
<command name="load">hdf5/1.10.0-patch1-parallel</command>
</modules>
<!-- Default -->
<environment_variables>
<env name="COMPILER">$COMPILER</env>
<env name="MPILIB">$MPILIB</env>
<env name="OMP_STACKSIZE">128M</env>
<env name="NETCDF_C_PATH">$ENV{OLCF_NETCDF_ROOT}</env>
<env name="NETCDF_FORTRAN_PATH" compiler="pgi">$ENV{OLCF_NETCDF_FORTRAN_ROOT}</env>
<env name="PNETCDF_PATH" compiler="pgi" mpilib="!mpi-serial">$ENV{OLCF_PARALLEL_NETCDF_ROOT}</env>
<env name="HDF5_PATH">$ENV{OLCF_HDF5_ROOT}</env>
<env name="ESSL_PATH">$ENV{OLCF_ESSL_ROOT}</env>
<env name="NETCDF_FORTRAN_PATH" compiler="ibm">/lustre/atlas/proj-shared/cli115/summitdev/soft/netcdf/fortran-4.4.4</env>
<env name="LD_LIBRARY_PATH" compiler="ibm">$ENV{LD_LIBRARY_PATH}:/lustre/atlas/proj-shared/cli115/summitdev/soft/netcdf/fortran-4.4.4/lib</env>
</environment_variables>
</module_system>
</machine>

<default_run_suffix>
<default_run_exe>${EXEROOT}/acme.exe </default_run_exe>
Expand Down
4 changes: 3 additions & 1 deletion scripts/lib/CIME/XML/env_mach_specific.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ def load_modules(self, modules_to_load):
module_system = self.get_module_system_type()
if (module_system == "module"):
self._load_module_modules(modules_to_load)
elif (module_system == "module_lmod"):
self._load_modules_generic(modules_to_load)
elif (module_system == "soft"):
self._load_modules_generic(modules_to_load)
elif (module_system == "generic"):
Expand All @@ -109,7 +111,7 @@ def list_modules(self):
else:
source_cmd = ""

if (module_system == "module"):
if (module_system in ["module", "module_lmod"]):
return run_cmd_no_fail("%smodule list" % source_cmd, combine_output=True)
elif (module_system == "soft"):
# Does soft really not provide this capability?
Expand Down

0 comments on commit 93c74b6

Please sign in to comment.