From c37ddebddfb682e088e33995738bdc37ddbefac9 Mon Sep 17 00:00:00 2001 From: Jinyun Tang Date: Wed, 22 Mar 2017 10:22:06 -0700 Subject: [PATCH] bug fix for array-passing related crash of betr With the most recent intel compiler, the betr code crashes when running on more than 24 cpus on edison. The cause of this crash was traced and found related to the passing of arrays between different subroutines. Now all arrays are passed with explicit specification of their lower and upper bounds, so betr can run without noticable issue. However, the advection capability of betr has to be turned off, because the new intel compiler does not work well with passing generic data types. A workaround is developed in betr-v2, and will be integrated in the future. This fix does not affect codes other than betr, so is bfb. --- components/clm/src/betr/BetrBGCMod.F90 | 120 +++++++++--------- .../src/betr/betr_core/TracerBalanceMod.F90 | 9 +- .../src/betr/betr_core/TracerParamsMod.F90 | 11 +- .../clm/src/betr/betr_core/TransportMod.F90 | 8 +- 4 files changed, 76 insertions(+), 72 deletions(-) diff --git a/components/clm/src/betr/BetrBGCMod.F90 b/components/clm/src/betr/BetrBGCMod.F90 index 40c29c31af5b..223f947c3c96 100644 --- a/components/clm/src/betr/BetrBGCMod.F90 +++ b/components/clm/src/betr/BetrBGCMod.F90 @@ -13,6 +13,7 @@ module BetrBGCMod use clm_varctl , only : iulog use clm_time_manager , only : get_nstep use MathfuncMod , only : dot_sum + use spmdMod , only : iam implicit none private @@ -22,7 +23,8 @@ module BetrBGCMod real(r8), parameter :: tiny_val = 1.e-20_r8 !very small value, for tracer concentration etc. real(r8), parameter :: dtime_min = 1._r8 !minimum time step 1 second real(r8), parameter :: err_tol_transp = 1.e-8_r8 !error tolerance for tracer transport - + logical, parameter :: advection_on=.false. + logical, parameter :: diffusion_on=.true. public :: run_betr_one_step_without_drainage public :: run_betr_one_step_with_drainage public :: betrBGC_init @@ -157,7 +159,7 @@ subroutine run_betr_one_step_without_drainage(bounds, lbj, ubj, num_soilc, filte chemstate_vars%soil_pH(bounds%begc:bounds%endc,1:ubj)=7._r8 call set_phase_convert_coeff(bounds, lbj, ubj, & - tracerboundarycond_vars%jtops_col, & + tracerboundarycond_vars%jtops_col(bounds%begc:bounds%endc), & num_soilc, & filter_soilc, & col%dz(bounds%begc:bounds%endc, lbj:ubj), & @@ -169,7 +171,7 @@ subroutine run_betr_one_step_without_drainage(bounds, lbj, ubj, num_soilc, filte tracercoeff_vars=tracercoeff_vars) call set_multi_phase_diffusion(bounds, lbj, ubj, & - tracerboundarycond_vars%jtops_col, & + tracerboundarycond_vars%jtops_col(bounds%begc:bounds%endc), & num_soilc, & filter_soilc, & soilstate_vars=soilstate_vars, & @@ -185,9 +187,9 @@ subroutine run_betr_one_step_without_drainage(bounds, lbj, ubj, num_soilc, filte betrtracer_vars, & waterflux_vars, & tracerboundarycond_vars) - + if(advection_on)& call calc_tracer_infiltration(bounds, lbj, ubj, & - tracerboundarycond_vars%jtops_col, & + tracerboundarycond_vars%jtops_col(bounds%begc:bounds%endc), & num_soilc, & filter_soilc, & tracercoeff_vars%bunsencef_col(bounds%begc:bounds%endc, & @@ -196,15 +198,15 @@ subroutine run_betr_one_step_without_drainage(bounds, lbj, ubj, num_soilc, filte betrtracer_vars, & tracerboundarycond_vars, & waterflux_vars, & - tracerflux_vars%tracer_flx_infl_col) + tracerflux_vars%tracer_flx_infl_col(bounds%begc:bounds%endc, 1:betrtracer_vars%ngwmobile_tracers)) call set_gwdif_Rfactor(bounds, lbj, ubj, & - tracerboundarycond_vars%jtops_col, & + tracerboundarycond_vars%jtops_col(bounds%begc:bounds%endc), & num_soilc, & filter_soilc, & tracercoeff_vars, & betrtracer_vars, & - Rfactor) + Rfactor(bounds%begc:bounds%endc, lbj:ubj,1:betrtracer_vars%ngwmobile_tracers)) !calculate flux from merging topsoil with surface ponding water and snow call calc_tracer_h2osfc_snow_residual_combine(bounds, num_soilc, filter_soilc, & @@ -231,7 +233,7 @@ subroutine run_betr_one_step_without_drainage(bounds, lbj, ubj, num_soilc, filte filter_soilc, & num_soilp, & filter_soilp, & - tracerboundarycond_vars%jtops_col, & + tracerboundarycond_vars%jtops_col(bounds%begc:bounds%endc), & dtime, & betrtracer_vars, & tracercoeff_vars, & @@ -247,12 +249,11 @@ subroutine run_betr_one_step_without_drainage(bounds, lbj, ubj, num_soilc, filte tracerstate_vars, & tracerflux_vars, & plantsoilnutrientflux_vars) - call tracer_gw_transport(bounds, lbj, ubj, & - tracerboundarycond_vars%jtops_col, & + tracerboundarycond_vars%jtops_col(bounds%begc:bounds%endc), & num_soilc, & filter_soilc, & - Rfactor, & + Rfactor(bounds%begc:bounds%endc, lbj:ubj,1:betrtracer_vars%ngwmobile_tracers), & col%dz(bounds%begc:bounds%endc, lbj:ubj), & col%zi(bounds%begc:bounds%endc,lbj-1:ubj), & waterstate_vars%h2osoi_liqvol_col(bounds%begc:bounds%endc, lbj:ubj), & @@ -266,8 +267,7 @@ subroutine run_betr_one_step_without_drainage(bounds, lbj, ubj, num_soilc, filte tracerstate_vars, & tracerflux_vars, & waterstate_vars) - - call tracer_solid_transport(bounds, 1, ubj, & + call tracer_solid_transport(bounds, 1, ubj, & num_soilc, & filter_soilc, & dtime, & @@ -279,10 +279,10 @@ subroutine run_betr_one_step_without_drainage(bounds, lbj, ubj, num_soilc, filte tracerstate_vars) call calc_ebullition(bounds, 1, ubj, & - tracerboundarycond_vars%jtops_col, & + tracerboundarycond_vars%jtops_col(bounds%begc:bounds%endc), & num_soilc, & filter_soilc, & - atm2lnd_vars%forc_pbot_downscaled_col, & + atm2lnd_vars%forc_pbot_downscaled_col(bounds%begc:bounds%endc), & col%zi(bounds%begc:bounds%endc, 0:ubj), & col%dz(bounds%begc:bounds%endc, 1:ubj), & dtime, & @@ -401,13 +401,13 @@ subroutine tracer_solid_transport(bounds, lbj, ubj, num_soilc, filter_soilc, dti do !do diffusive transport call DiffusTransp(bounds, lbj, ubj, jtops, num_soilc, filter_soilc, ntrcs, & - tracer_conc_solid_passive_col(:,:,difs_trc_group(1:ntrcs)), & - hmconductance_col(:,:,j), & - dtime_loc, & - dz, & - source=local_source(:,:,1:ntrcs), & + tracer_conc_solid_passive_col(bounds%begc:bounds%endc,lbj:ubj,difs_trc_group(1:ntrcs)), & + hmconductance_col(bounds%begc:bounds%endc,lbj:ubj-1,j), & + dtime_loc(bounds%begc:bounds%endc), & + dz(bounds%begc:bounds%endc,lbj:ubj), & + source=local_source(bounds%begc:bounds%endc,lbj:ubj,1:ntrcs), & update_col=update_col, & - dtracer=dtracer(:,:,1:ntrcs)) + dtracer=dtracer(bounds%begc:bounds%endc,lbj:ubj,1:ntrcs)) !do tracer update do fc = 1, num_soilc @@ -530,7 +530,6 @@ subroutine tracer_gw_transport(bounds, lbj, ubj, jtops, num_soilc, filter_soilc, SHR_ASSERT_ALL((ubound(zi) == (/bounds%endc, ubj/)), errMsg(__FILE__,__LINE__)) SHR_ASSERT_ALL((ubound(h2osoi_liqvol) == (/bounds%endc, ubj/)), errMsg(__FILE__,__LINE__)) SHR_ASSERT_ALL((ubound(Rfactor) == (/bounds%endc, ubj, betrtracer_vars%ngwmobile_tracer_groups/)), errMsg(__FILE__,__LINE__)) - ! !Exclude solid phase tracers, by doing tracer equilibration !This is equivalent to do aqueous chemistry without biological production/consumption @@ -538,7 +537,7 @@ subroutine tracer_gw_transport(bounds, lbj, ubj, jtops, num_soilc, filter_soilc, !partitioning due to change in hydrological status. call bgc_reaction%do_tracer_equilibration(bounds, lbj, ubj, & - jtops, & + jtops(bounds%begc:bounds%endc), & num_soilc, & filter_soilc, & betrtracer_vars, & @@ -547,14 +546,14 @@ subroutine tracer_gw_transport(bounds, lbj, ubj, jtops, num_soilc, filter_soilc, !do diffusive and advective transport, assuming aqueous and gaseous phase are in equilbrium do kk = 1 , 2 if (transp_pathway(kk) == do_diffusion) then - - call do_tracer_gw_diffusion(bounds, lbj, ubj, & - jtops, & + + if(diffusion_on) call do_tracer_gw_diffusion(bounds, lbj, ubj, & + jtops(bounds%begc:bounds%endc), & num_soilc, & filter_soilc, & betrtracer_vars, & tracerboundarycond_vars, & - Rfactor, & + Rfactor(bounds%begc:bounds%endc, lbj:ubj, 1:betrtracer_vars%ngwmobile_tracer_groups), & tracercoeff_vars%hmconductance_col(bounds%begc:bounds%endc, lbj:ubj-1, : ), & dz, & dtime, & @@ -564,15 +563,16 @@ subroutine tracer_gw_transport(bounds, lbj, ubj, jtops, num_soilc, filter_soilc, elseif (transp_pathway(kk) == do_advection)then jtops0(:) = 1 - call do_tracer_advection(bounds, lbj, ubj, & + + if(advection_on)call do_tracer_advection(bounds, lbj, ubj, & jtops0, & num_soilc, & filter_soilc, & betrtracer_vars, & - dz, & - zi, & + dz(bounds%begc:bounds%endc,lbj:ubj), & + zi(bounds%begc:bounds%endc,lbj-1:ubj),& dtime, & - h2osoi_liqvol, & + h2osoi_liqvol(bounds%begc:bounds%endc,lbj:ubj),& waterflux_vars, & tracercoeff_vars, & tracerstate_vars, & @@ -749,22 +749,21 @@ subroutine do_tracer_advection(bounds, lbj, ubj, jtops, num_soilc, filter_soilc, ! do semi-lagrangian tracer transport call semi_lagrange_adv_backward(bounds, lbj, ubj, & - jtops, & + jtops(bounds%begc:bounds%endc), & num_soilc, & filter_soilc, & ntrcs, & - dtime_loc, & - dz, & - zi, & + dtime_loc(bounds%begc:bounds%endc), & + dz(bounds%begc:bounds%endc ,lbj:ubj), & + zi(bounds%begc:bounds%endc ,lbj-1:ubj), & qflx_adv_local(bounds%begc:bounds%endc,lbj-1:ubj), & inflx_top(bounds%begc:bounds%endc, 1:ntrcs), & inflx_bot(bounds%begc:bounds%endc, 1:ntrcs), & - update_col, & - halfdt_col, & + update_col(bounds%begc:bounds%endc), & + halfdt_col(bounds%begc:bounds%endc), & tracer_conc_mobile_col(bounds%begc:bounds%endc, lbj:ubj,adv_trc_group(1:ntrcs)), & - trc_conc_out(:,:,1:ntrcs), & + trc_conc_out(bounds%begc:bounds%endc,lbj:ubj,1:ntrcs), & leaching_mass(bounds%begc:bounds%endc,1:ntrcs)) - !do soil-root tracer exchange do k = 1, ntrcs trcid = adv_trc_group(k) @@ -780,16 +779,15 @@ subroutine do_tracer_advection(bounds, lbj, ubj, jtops, num_soilc, filter_soilc, if(vtrans_scal(trcid)>0._r8)then call calc_root_uptake_as_perfect_sink(bounds, lbj, ubj, num_soilc, & filter_soilc, & - dtime_loc, & - dz, & - qflx_rootsoi_local, & - update_col, & - halfdt_col, & + dtime_loc(bounds%begc:bounds%endc), & + dz(bounds%begc:bounds%endc,lbj:ubj), & + qflx_rootsoi_local(bounds%begc:bounds%endc,lbj:ubj), & + update_col(bounds%begc:bounds%endc), & + halfdt_col(bounds%begc:bounds%endc), & tracer_conc_mobile_col(bounds%begc:bounds%endc, lbj:ubj,trcid), & transp_mass(bounds%begc:bounds%endc, k)) endif enddo - !do error budget and tracer flux update do k = 1, ntrcs trcid = adv_trc_group(k) @@ -812,6 +810,7 @@ subroutine do_tracer_advection(bounds, lbj, ubj, jtops, num_soilc, filter_soilc, else write(iulog,'(I8,X,A,6(X,A,X,E18.10))')c,tracernames(trcid),' err=',err_tracer(c,k),' transp=',transp_mass(c,k),' lech=',& leaching_mass(c,k),' infl=',inflx_top(c,k),' dmass=',dmass(c,k), ' mass0=',mass0,'err_rel=',err_relative + write(iulog,*) 'min,max val',minval(qflx_adv_local(c,lbj-1:ubj)),maxval(qflx_adv_local(c,lbj-1:ubj)),'lbj,ubj',lbj,ubj call endrun('mass balance error for tracer '//tracernames(j)//errMsg(__FILE__, __LINE__)) endif @@ -836,8 +835,8 @@ subroutine do_tracer_advection(bounds, lbj, ubj, jtops, num_soilc, filter_soilc, enddo ! do loop control test - lexit_loop=exit_loop_by_threshold(bounds%begc, bounds%endc, time_remain, & - dtime_min, num_soilc, filter_soilc, update_col) + lexit_loop=exit_loop_by_threshold(bounds%begc, bounds%endc, time_remain(bounds%begc:bounds%endc), & + dtime_min, num_soilc, filter_soilc, update_col(bounds%begc:bounds%endc)) if(lexit_loop)exit enddo @@ -971,17 +970,20 @@ subroutine do_tracer_gw_diffusion(bounds, lbj, ubj, jtops, num_soilc, filter_soi !Do adpative time stepping to avoid negative tracer do - call DiffusTransp(bounds, lbj, ubj, jtops, & - num_soilc, & - filter_soilc, ntrcs, & - tracer_conc_mobile_col( : , : ,dif_trc_group(1:ntrcs)), Rfactor( : , : ,j), & - hmconductance_col( : , : ,j), dtime_loc, dz, local_source(:,:, 1:ntrcs), & - tracer_gwdif_concflux_top( : , : ,dif_trc_group(1:ntrcs)), & - condc_toplay( : ,j), & - topbc_type(j), & - bot_concflux( : , : ,dif_trc_group(1:ntrcs)), & - update_col, & - dtracer(:,:,1:ntrcs)) + call DiffusTransp(bounds, lbj, ubj, jtops, & + num_soilc, & + filter_soilc, ntrcs, & + tracer_conc_mobile_col(bounds%begc:bounds%endc,lbj:ubj,dif_trc_group(1:ntrcs)),& + Rfactor(bounds%begc:bounds%endc, lbj:ubj ,j), & + hmconductance_col(bounds%begc:bounds%endc,lbj:ubj-1,j), & + dtime_loc(bounds%begc:bounds%endc), dz(bounds%begc:bounds%endc,lbj:ubj), & + local_source(bounds%begc:bounds%endc,lbj:ubj, 1:ntrcs), & + tracer_gwdif_concflux_top(bounds%begc:bounds%endc,1:2,dif_trc_group(1:ntrcs)), & + condc_toplay(bounds%begc :bounds%endc ,j), & + topbc_type(j), & + bot_concflux(bounds%begc:bounds%endc,1:2,dif_trc_group(1:ntrcs)), & + update_col, & + dtracer(bounds%begc:bounds%endc,lbj:ubj,1:ntrcs)) !do tracer update do fc = 1, num_soilc diff --git a/components/clm/src/betr/betr_core/TracerBalanceMod.F90 b/components/clm/src/betr/betr_core/TracerBalanceMod.F90 index f19b397b9138..6d52b2107e38 100644 --- a/components/clm/src/betr/betr_core/TracerBalanceMod.F90 +++ b/components/clm/src/betr/betr_core/TracerBalanceMod.F90 @@ -53,7 +53,7 @@ subroutine begin_betr_tracer_massbalance(bounds, lbj, ubj, numf, filter, & call tracerflux_vars%Reset(bounds, numf, filter) call betr_tracer_mass_summary(bounds, lbj, ubj, numf, filter, betrtracer_vars, tracerstate_vars, & - tracerstate_vars%beg_tracer_molarmass_col) + tracerstate_vars%beg_tracer_molarmass_col(bounds%begc:bounds%endc, 1:betrtracer_vars%ntracers)) end subroutine begin_betr_tracer_massbalance @@ -108,7 +108,7 @@ subroutine betr_tracer_massbalance_check(bounds, lbj, ubj, numf, filter, betrtra ) call betr_tracer_mass_summary(bounds, lbj, ubj, numf, filter, betrtracer_vars, tracerstate_vars, & - end_tracer_molarmass) + end_tracer_molarmass(bounds%begc:bounds%endc, 1:betrtracer_vars%ntracers)) dtime=get_step_size() @@ -118,8 +118,9 @@ subroutine betr_tracer_massbalance_check(bounds, lbj, ubj, numf, filter, betrtra call tracerflux_vars%flux_summary(c, betrtracer_vars) do kk = 1, ngwmobile_tracers - errtracer(c,kk) = beg_tracer_molarmass(c,kk)-end_tracer_molarmass(c,kk) & - + tracer_flx_netpro(c,kk)-tracer_flx_netphyloss(c,kk) + if(c>maxval(filter))print*,'crazy happend' + errtracer(c,kk) = beg_tracer_molarmass(c,kk)-end_tracer_molarmass(c,kk) + errtracer(c,kk) = errtracer(c,kk) + tracer_flx_netpro(c,kk)-tracer_flx_netphyloss(c,kk) if(abs(errtracer(c,kk))