diff --git a/components/mpas-ocean/src/mode_forward/mpas_ocn_time_integration_split.F b/components/mpas-ocean/src/mode_forward/mpas_ocn_time_integration_split.F index ceffbed38d1d..09e290ad5f69 100644 --- a/components/mpas-ocean/src/mode_forward/mpas_ocn_time_integration_split.F +++ b/components/mpas-ocean/src/mode_forward/mpas_ocn_time_integration_split.F @@ -1674,6 +1674,59 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ meshPool, swForcingPool, & dt, activeTracersOnly, 2) + call mpas_pool_get_array(tracersPool, 'activeTracers', & + tracersGroupCur, 1) + call mpas_pool_get_array(tracersPool, 'activeTracers', & + tracersGroupNew, 2) + call mpas_pool_get_array(statePool, 'normalVelocity', & + normalVelocityCur, 1) + call mpas_pool_get_array(tracersTendPool,'activeTracersTend', & + activeTracersTend) + +#ifdef MPAS_OPENACC + !$acc enter data copyin(layerThicknessNew, normalVelocityNew) + !$acc enter data copyin(normalBarotropicVelocityNew, & + !$acc normalBaroclinicVelocityNew) + !$acc enter data copyin(activeTracersNew) + !$acc update device(layerThickEdgeFlux) + if (config_use_freq_filtered_thickness) then + !$acc enter data copyin(lowFreqDivergenceNew) + !$acc enter data copyin(lowFreqDivergenceCur) + !$acc enter data copyin(lowFreqDivergenceTend) + endif + if (splitExplicitStep < numTSIterations) then + !$acc update device (normalTransportVelocity, & + !$acc normalGMBolusVelocity) + !$acc enter data copyin(atmosphericPressure, seaIcePressure) + !$acc enter data copyin(sshNew) + !$acc update device(tracersSurfaceValue) + if ( associated(frazilSurfacePressure) ) then + !$acc enter data copyin(frazilSurfacePressure) + endif + if (landIcePressureOn) then + !$acc enter data copyin(landIcePressure) + !$acc enter data copyin(landIceDraft) + endif + if (config_use_freq_filtered_thickness) then + !$acc enter data copyin(highFreqThicknessNew) + !$acc enter data copyin(highFreqThicknessCur) + endif + elseif (splitExplicitStep == numTSIterations) then + !$acc enter data copyin(layerThicknessCur) + !$acc enter data copyin(layerThicknessTend) + !$acc enter data copyin(normalBaroclinicVelocityCur) + if (config_compute_active_tracer_budgets) then + !$acc update device(activeTracerHorizontalAdvectionEdgeFlux) + !$acc update device(activeTracerHorizontalAdvectionTendency) + !$acc update device(activeTracerVerticalAdvectionTendency) + !$acc update device(activeTracerHorMixTendency) + !$acc update device(activeTracerSurfaceFluxTendency) + !$acc update device(temperatureShortWaveTendency) + !$acc update device(activeTracerNonLocalTendency) + endif + endif +#endif + call mpas_timer_stop('se tracer tend') ! update halo for tracer tendencies @@ -1695,14 +1748,6 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ call mpas_timer_stop("se halo tracers") call mpas_timer_start('se loop fini') - call mpas_pool_get_array(tracersPool, 'activeTracers', & - tracersGroupCur, 1) - call mpas_pool_get_array(tracersPool, 'activeTracers', & - tracersGroupNew, 2) - call mpas_pool_get_array(statePool, 'normalVelocity', & - normalVelocityCur, 1) - call mpas_pool_get_array(tracersTendPool,'activeTracersTend', & - activeTracersTend) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! If iterating, reset variables for next iteration @@ -1719,8 +1764,15 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ ! Only need T & S for earlier iterations, ! then all the tracers needed the last time through. +!! Keeping this calc on the CPU for now +#ifdef MPAS_OPENACC + !$acc update host(layerThicknessNew) + !$acc update host(activeTracersNew) + !$acc update host(tracersSurfaceValue) +#else !$omp parallel !$omp do schedule(runtime) private(i, k, temp_h, temp) +#endif do iCell = 1, nCellsAll do k = minLevelCell(iCell), maxLevelCell(iCell) @@ -1744,33 +1796,14 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ end do ! tracer index end do ! vertical end do ! iCell +#ifndef MPAS_OPENACC !$omp end do !$omp end parallel +#endif #ifdef MPAS_OPENACC - !$acc enter data copyin(layerThicknessNew, normalVelocityNew) - !$acc update device (normalTransportVelocity, & - !$acc normalGMBolusVelocity) - !$acc enter data copyin(atmosphericPressure, seaIcePressure) - !$acc enter data copyin(sshNew) - !$acc enter data copyin(activeTracersNew) - !$acc update device(tracersSurfaceValue) - if ( associated(frazilSurfacePressure) ) then - !$acc enter data copyin(frazilSurfacePressure) - endif - if (landIcePressureOn) then - !$acc enter data copyin(landIcePressure) - !$acc enter data copyin(landIceDraft) - endif - !$acc enter data copyin(normalBarotropicVelocityNew, & - !$acc normalBaroclinicVelocityNew) - if (config_use_freq_filtered_thickness) then - !$acc enter data copyin(highFreqThicknessNew) - !$acc enter data copyin(highFreqThicknessCur) - !$acc enter data copyin(lowFreqDivergenceNew) - !$acc enter data copyin(lowFreqDivergenceCur) - !$acc enter data copyin(lowFreqDivergenceTend) - endif + !$acc update device(layerThicknessNew) + !$acc update device(activeTracersNew) #endif if (config_use_freq_filtered_thickness) then @@ -1842,48 +1875,6 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ call ocn_diagnostic_solve(dt, statePool, forcingPool, & meshPool, scratchPool, & tracersPool, 2, full=.false.) -#ifdef MPAS_OPENACC - !$acc update host(layerThickEdgeFlux, layerThickEdgeMean) - !$acc update host(relativeVorticity, circulation) - !$acc update host(vertTransportVelocityTop, & - !$acc vertGMBolusVelocityTop, & - !$acc relativeVorticityCell, & - !$acc divergence, & - !$acc kineticEnergyCell, & - !$acc tangentialVelocity, & - !$acc vertVelocityTop) - !$acc update host(normRelVortEdge, normPlanetVortEdge, & - !$acc normalizedRelativeVorticityCell) - !$acc update host (surfacePressure) - !$acc update host(zMid, zTop) - !$acc exit data copyout(sshNew) - !$acc exit data delete(activeTracersNew) - !$acc update host(tracersSurfaceValue) - !$acc update host(normalVelocitySurfaceLayer) - !$acc exit data delete (atmosphericPressure, seaIcePressure) - if ( associated(frazilSurfacePressure) ) then - !$acc exit data delete(frazilSurfacePressure) - endif - if (landIcePressureOn) then - !$acc exit data delete(landIcePressure) - !$acc exit data delete(landIceDraft) - endif - !$acc exit data delete(layerThicknessNew) - !$acc exit data delete(normalBarotropicVelocityNew, & - !$acc normalBaroclinicVelocityNew) - !$acc exit data copyout(normalVelocityNew) - !$acc update host(density, potentialDensity, displacedDensity) - !$acc update host(thermExpCoeff, & - !$acc& salineContractCoeff) - !$acc update host(montgomeryPotential, pressure) - if (config_use_freq_filtered_thickness) then - !$acc exit data copyout(highFreqThicknessNew) - !$acc exit data delete(highFreqThicknessCur) - !$acc exit data copyout(lowFreqDivergenceNew) - !$acc exit data delete(lowFreqDivergenceCur) - !$acc exit data delete(lowFreqDivergenceTend) - endif -#endif !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! If large iteration complete, compute all variables at @@ -1892,9 +1883,33 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ elseif (splitExplicitStep == numTSIterations) then +#ifdef MPAS_OPENACC + !$acc parallel present(minLevelCell, maxLevelCell) & + !$acc present(layerThicknessTend) & + !$acc present(layerThicknessCur) & + !$acc present(minLevelEdgeBot, maxLevelEdgeTop) & + !$acc present(layerThickEdgeFlux) & + !$acc present(activeTracerHorizontalAdvectionEdgeFlux) & + !$acc present(layerThicknessNew) & + !$acc present(activeTracerHorizontalAdvectionTendency) & + !$acc present(activeTracerVerticalAdvectionTendency) & + !$acc present(activeTracerHorMixTendency) & + !$acc present(activeTracerSurfaceFluxTendency) & + !$acc present(temperatureShortWaveTendency) & + !$acc present(activeTracerNonLocalTendency) +#endif + + +#ifdef MPAS_OPENACC + !$acc loop gang +#else !$omp parallel !$omp do schedule(runtime) private(k) +#endif do iCell = 1, nCellsAll +#ifdef MPAS_OPENACC + !$acc loop vector +#endif do k = minLevelCell(iCell), maxLevelCell(iCell) ! this is h_{n+1} layerThicknessNew(k,iCell) = & @@ -1909,20 +1924,62 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ if (config_compute_active_tracer_budgets) then +#ifdef MPAS_OPENACC + !$acc loop gang +#else !$omp parallel !$omp do schedule(runtime) private(k) +#endif do iEdge = 1, nEdgesAll +#ifdef MPAS_OPENACC + !$acc loop seq +#endif do k= minLevelEdgeBot(iEdge), maxLevelEdgeTop(iEdge) - activeTracerHorizontalAdvectionEdgeFlux(:,k,iEdge) = & - activeTracerHorizontalAdvectionEdgeFlux(:,k,iEdge) / & - layerThickEdgeFlux(k,iEdge) +#ifdef MPAS_OPENACC + !$acc loop vector +#endif + do iTracer = 1, size(activeTracerHorizontalAdvectionEdgeFlux,1) + activeTracerHorizontalAdvectionEdgeFlux(iTracer,k,iEdge) = & + activeTracerHorizontalAdvectionEdgeFlux(iTracer,k,iEdge) / & + layerThickEdgeFlux(k,iEdge) + enddo enddo enddo +#ifndef MPAS_OPENACC !$omp end do +#endif +#ifdef MPAS_OPENACC + !$acc loop gang +#else !$omp do schedule(runtime) private(k) +#endif do iCell = 1, nCellsAll do k= minLevelCell(iCell), maxLevelCell(iCell) +#ifdef MPAS_OPENACC + !$acc loop vector + do iTracer = 1, size(activeTracerHorizontalAdvectionTendency,1) + activeTracerHorizontalAdvectionTendency(iTracer,k,iCell) = & + activeTracerHorizontalAdvectionTendency(iTracer,k,iCell) / & + layerThicknessNew(k,iCell) + + activeTracerVerticalAdvectionTendency(iTracer,k,iCell) = & + activeTracerVerticalAdvectionTendency(iTracer,k,iCell) / & + layerThicknessNew(k,iCell) + + activeTracerHorMixTendency(iTracer,k,iCell) = & + activeTracerHorMixTendency(iTracer,k,iCell) / & + layerThicknessNew(k,iCell) + + activeTracerSurfaceFluxTendency(iTracer,k,iCell) = & + activeTracerSurfaceFluxTendency(iTracer,k,iCell) / & + layerThicknessNew(k,iCell) + + activeTracerNonLocalTendency(iTracer,k,iCell) = & + activeTracerNonLocalTendency(iTracer,k,iCell) / & + layerThicknessNew(k,iCell) + end do +#else activeTracerHorizontalAdvectionTendency(:,k,iCell) = & activeTracerHorizontalAdvectionTendency(:,k,iCell) / & layerThicknessNew(k,iCell) @@ -1939,13 +1996,14 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ activeTracerSurfaceFluxTendency(:,k,iCell) / & layerThicknessNew(k,iCell) - temperatureShortWaveTendency(k,iCell) = & - temperatureShortWaveTendency(k,iCell) / & - layerThicknessNew(k,iCell) - activeTracerNonLocalTendency(:,k,iCell) = & activeTracerNonLocalTendency(:,k,iCell) / & layerThicknessNew(k,iCell) +#endif + + temperatureShortWaveTendency(k,iCell) = & + temperatureShortWaveTendency(k,iCell) / & + layerThicknessNew(k,iCell) end do end do #ifndef MPAS_OPENACC @@ -1954,6 +2012,18 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ #endif endif +#ifdef MPAS_OPENACC + !$acc end parallel +#endif + +! This tracer block is still computed on the CPU +#ifdef MPAS_OPENACC + !$acc update host(layerThicknessNew) + !$acc update host(layerThicknessCur, layerThicknessTend) + !$acc update host(activeTracersNew) + !$acc update host(tracersSurfaceValue) +#endif + call mpas_pool_begin_iteration(tracersPool) do while (mpas_pool_get_next_member(tracersPool, & groupItr) ) @@ -2068,10 +2138,29 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ end if ! tracer end do ! tracer group +#ifdef MPAS_OPENACC + !$acc update device(layerThicknessNew) + !$acc update device(activeTracersNew) +#endif + if (config_use_freq_filtered_thickness) then +#ifdef MPAS_OPENACC + !$acc parallel present(minLevelCell, maxLevelCell) & + !$acc present(lowFreqDivergenceNew) & + !$acc present(lowFreqDivergenceCur) & + !$acc present(lowFreqDivergenceTend) +#endif + +#ifdef MPAS_OPENACC + !$acc loop gang +#else !$omp parallel !$omp do schedule(runtime) private(k) +#endif do iCell = 1, nCellsAll +#ifdef MPAS_OPENACC + !$acc loop vector +#endif do k = minLevelCell(iCell), maxLevelCell(iCell) ! h^{hf}_{n+1} was computed in Stage 1 @@ -2087,6 +2176,10 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ !$omp end parallel #endif +#ifdef MPAS_OPENACC + !$acc end parallel +#endif + end if ! Recompute final u to go on to next step. @@ -2105,9 +2198,24 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ ! so normalBaroclinicVelocity does not have to be ! recomputed here. +#ifdef MPAS_OPENACC + !$acc parallel present(minLevelEdgeBot, maxLevelEdgeTop) & + !$acc present(normalVelocityNew) & + !$acc present(normalBarotropicVelocityNew) & + !$acc present(normalBaroclinicVelocityNew) & + !$acc present(normalBaroclinicVelocityCur) +#endif + +#ifdef MPAS_OPENACC + !$acc loop gang +#else !$omp parallel !$omp do schedule(runtime) private(k) +#endif do iEdge = 1, nEdgesAll +#ifdef MPAS_OPENACC + !$acc loop vector +#endif do k = minLevelEdgeBot(iEdge), maxLevelEdgeTop(iEdge) normalVelocityNew(k,iEdge) = & normalBarotropicVelocityNew(iEdge) + & @@ -2120,8 +2228,72 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ !$omp end parallel #endif +#ifdef MPAS_OPENACC + !$acc end parallel +#endif + endif ! splitExplicitStep +#ifdef MPAS_OPENACC + !$acc exit data copyout(layerThicknessNew) + !$acc exit data copyout(normalVelocityNew) + !$acc exit data delete(normalBarotropicVelocityNew, & + !$acc normalBaroclinicVelocityNew) + !$acc exit data copyout(activeTracersNew) + !$acc update host(layerThickEdgeFlux) + if (config_use_freq_filtered_thickness) then + !$acc exit data copyout(lowFreqDivergenceNew) + !$acc exit data delete(lowFreqDivergenceCur) + !$acc exit data delete(lowFreqDivergenceTend) + endif + if (splitExplicitStep < numTSIterations) then + !$acc exit data delete (atmosphericPressure, seaIcePressure) + !$acc exit data copyout(sshNew) + !$acc update host(layerThickEdgeMean) + !$acc update host(relativeVorticity, circulation) + !$acc update host(vertTransportVelocityTop, & + !$acc vertGMBolusVelocityTop, & + !$acc relativeVorticityCell, & + !$acc divergence, & + !$acc kineticEnergyCell, & + !$acc tangentialVelocity, & + !$acc vertVelocityTop) + !$acc update host(normRelVortEdge, normPlanetVortEdge, & + !$acc normalizedRelativeVorticityCell) + !$acc update host (surfacePressure) + !$acc update host(zMid, zTop) + !$acc update host(tracersSurfaceValue) + !$acc update host(normalVelocitySurfaceLayer) + !$acc update host(density, potentialDensity, displacedDensity) + !$acc update host(thermExpCoeff, & + !$acc& salineContractCoeff) + !$acc update host(montgomeryPotential, pressure) + if (landIcePressureOn) then + !$acc exit data delete(landIcePressure) + !$acc exit data delete(landIceDraft) + endif + if (config_use_freq_filtered_thickness) then + !$acc exit data copyout(highFreqThicknessNew) + !$acc exit data delete(highFreqThicknessCur) + endif + if ( associated(frazilSurfacePressure) ) then + !$acc exit data delete(frazilSurfacePressure) + endif + elseif (splitExplicitStep == numTSIterations) then + !$acc exit data delete(layerThicknessCur, layerThicknessTend) + !$acc exit data delete(normalBaroclinicVelocityCur) + if (config_compute_active_tracer_budgets) then + !$acc update host(activeTracerHorizontalAdvectionEdgeFlux) + !$acc update host(activeTracerHorizontalAdvectionTendency) + !$acc update host(activeTracerVerticalAdvectionTendency) + !$acc update host(activeTracerHorMixTendency) + !$acc update host(activeTracerSurfaceFluxTendency) + !$acc update host(temperatureShortWaveTendency) + !$acc update host(activeTracerNonLocalTendency) + endif + endif +#endif + call mpas_timer_stop('se loop fini') call mpas_timer_stop('se loop') @@ -2158,6 +2330,9 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ #endif call ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, & scratchPool, tracersPool, 2) + + call mpas_dmpar_field_halo_exch(domain, 'surfaceFrictionVelocity') + #ifdef MPAS_OPENACC !$acc update host(layerThickEdgeFlux, layerThickEdgeMean) !$acc update host(relativeVorticity, circulation) @@ -2198,8 +2373,6 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ !$acc exit data delete(layerThicknessNew, normalVelocityNew) #endif - call mpas_dmpar_field_halo_exch(domain, 'surfaceFrictionVelocity') - ! Compute normalGMBolusVelocity; it will be added to the ! baroclinic modes in Stage 2 above. !if (config_use_GM.or.config_use_Redi) then diff --git a/components/mpas-ocean/src/shared/mpas_ocn_diagnostics.F b/components/mpas-ocean/src/shared/mpas_ocn_diagnostics.F index d43e2bb7a8f6..abb537b5a029 100644 --- a/components/mpas-ocean/src/shared/mpas_ocn_diagnostics.F +++ b/components/mpas-ocean/src/shared/mpas_ocn_diagnostics.F @@ -110,7 +110,7 @@ subroutine ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, scratchPoo integer, intent(in), optional :: timeLevelIn !< Input: Time level in state logical, intent(in), optional :: full !< Input: Run all computations? - integer :: iEdge, iCell + integer :: iEdge, iCell, iTracer integer :: err, nCells, nEdges, nVertices real (kind=RKIND) :: coef_3rd_order @@ -331,16 +331,21 @@ subroutine ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, scratchPoo ! tracersSurfaceValue calc ! inputs: activeTracers, minLevelCell ! outputs: tracersSurfaceValue -! !$acc parallel loop !!!present(tracersSurfaceValue, activeTracers, minLevelCell) + !$acc kernels present(tracersSurfaceValue, activeTracers, minLevelCell) #else !$omp parallel !$omp do schedule(runtime) +#endif +#ifdef MPAS_OPENACC + !$acc loop independent #endif do iCell = 1, nCells - tracersSurfaceValue(:, iCell) = activeTracers(:, minLevelCell(iCell), iCell) + do iTracer = 1,size(tracersSurfaceValue,1) + tracersSurfaceValue(iTracer, iCell) = activeTracers(iTracer, minLevelCell(iCell), iCell) + enddo end do #ifdef MPAS_OPENACC - !$acc update device(tracersSurfaceValue) + !$acc end kernels #else !$omp end do #endif