From d7857cdb2e03e2e59a113640d0e5a9622d107f99 Mon Sep 17 00:00:00 2001 From: Tom Lin Date: Sun, 20 Sep 2020 21:55:38 +0100 Subject: [PATCH] Capture pointers before omp block --- src/PdV.cpp | 185 +++++------ src/accelerate.cpp | 84 ++--- src/advec_cell.cpp | 227 ++++++------- src/advec_mom.cpp | 291 +++++++---------- src/build_field.cpp | 214 ++++++------- src/calc_dt.cpp | 90 ++---- src/definitions.h | 34 +- src/field_summary.cpp | 26 +- src/flux_calc.cpp | 44 +-- src/generate_chunk.cpp | 166 +++++----- src/ideal_gas.cpp | 32 +- src/initialise_chunk.cpp | 66 ++-- src/pack_kernel.cpp | 88 ++--- src/reset_field.cpp | 51 ++- src/revert.cpp | 26 +- src/start.cpp | 3 + src/update_halo.cpp | 300 ++++++++++------- src/update_tile_halo_kernel.cpp | 548 +++++++++++++++++++------------- src/viscosity.cpp | 73 ++--- 19 files changed, 1292 insertions(+), 1256 deletions(-) diff --git a/src/PdV.cpp b/src/PdV.cpp index c6eee82..1333f0d 100644 --- a/src/PdV.cpp +++ b/src/PdV.cpp @@ -39,20 +39,8 @@ void PdV_kernel( bool predict, int x_min, int x_max, int y_min, int y_max, double dt, - clover::Buffer2D &xarea, - clover::Buffer2D &yarea, - clover::Buffer2D &volume, - clover::Buffer2D &density0, - clover::Buffer2D &density1, - clover::Buffer2D &energy0, - clover::Buffer2D &energy1, - clover::Buffer2D &pressure, - clover::Buffer2D &viscosity, - clover::Buffer2D &xvel0, - clover::Buffer2D &xvel1, - clover::Buffer2D &yvel0, - clover::Buffer2D &yvel1, - clover::Buffer2D &volume_change) { + field_type &field +) { // DO k=y_min,y_max @@ -60,95 +48,95 @@ void PdV_kernel( if (predict) { - omp(parallel(2) enable_target(use_target) - mapToFrom2D(xarea) - mapToFrom2D(yarea) - mapToFrom2D(volume) - mapToFrom2D(density0) - mapToFrom2D(density1) - mapToFrom2D(energy0) - mapToFrom2D(energy1) - mapToFrom2D(pressure) - mapToFrom2D(viscosity) - mapToFrom2D(xvel0) - mapToFrom2D(xvel1) - mapToFrom2D(yvel0) - mapToFrom2D(yvel1) - mapToFrom2D(volume_change) - ) + mapToFrom2Df(field, xarea) + mapToFrom2Df(field, yarea) + mapToFrom2Df(field, volume) + mapToFrom2Df(field, density0) + mapToFrom2Df(field, density1) + mapToFrom2Df(field, energy0) + mapToFrom2Df(field, energy1) + mapToFrom2Df(field, pressure) + mapToFrom2Df(field, viscosity) + mapToFrom2Df(field, xvel0) + mapToFrom2Df(field, xvel1) + mapToFrom2Df(field, yvel0) + mapToFrom2Df(field, yvel1) + mapToFrom2Dfn(field, work_array1, volume_change) + + omp(parallel(2) enable_target(use_target)) for (int j = (y_min + 1); j < (y_max + 2); j++) { for (int i = (x_min + 1); i < (x_max + 2); i++) { - double left_flux = (idx2(xarea, i, j) * (idx2(xvel0, i, j) + - idx2(xvel0, i + 0, j + 1) + - idx2(xvel0, i, j) + - idx2(xvel0, i + 0, j + 1))) * 0.25 * dt * 0.5; - double right_flux = (idx2(xarea, i + 1, j + 0) * (idx2(xvel0, i + 1, j + 0) + - idx2(xvel0, i + 1, j + 1) + - idx2(xvel0, i + 1, j + 0) + - idx2(xvel0, i + 1, j + 1))) * 0.25 * dt * 0.5; - double bottom_flux = (idx2(yarea, i, j) * (idx2(yvel0, i, j) + - idx2(yvel0, i + 1, j + 0) + - idx2(yvel0, i, j) + - idx2(yvel0, i + 1, j + 0))) * 0.25 * dt * 0.5; - double top_flux = (idx2(yarea, i + 0, j + 1) * (idx2(yvel0, i + 0, j + 1) + - idx2(yvel0, i + 1, j + 1) + - idx2(yvel0, i + 0, j + 1) + - idx2(yvel0, i + 1, j + 1))) * 0.25 * dt * 0.5; + double left_flux = (idx2f(field, xarea, i, j) * (idx2f(field, xvel0, i, j) + + idx2f(field, xvel0, i + 0, j + 1) + + idx2f(field, xvel0, i, j) + + idx2f(field, xvel0, i + 0, j + 1))) * 0.25 * dt * 0.5; + double right_flux = (idx2f(field, xarea, i + 1, j + 0) * (idx2f(field, xvel0, i + 1, j + 0) + + idx2f(field, xvel0, i + 1, j + 1) + + idx2f(field, xvel0, i + 1, j + 0) + + idx2f(field, xvel0, i + 1, j + 1))) * 0.25 * dt * 0.5; + double bottom_flux = (idx2f(field, yarea, i, j) * (idx2f(field, yvel0, i, j) + + idx2f(field, yvel0, i + 1, j + 0) + + idx2f(field, yvel0, i, j) + + idx2f(field, yvel0, i + 1, j + 0))) * 0.25 * dt * 0.5; + double top_flux = (idx2f(field, yarea, i + 0, j + 1) * (idx2f(field, yvel0, i + 0, j + 1) + + idx2f(field, yvel0, i + 1, j + 1) + + idx2f(field, yvel0, i + 0, j + 1) + + idx2f(field, yvel0, i + 1, j + 1))) * 0.25 * dt * 0.5; double total_flux = right_flux - left_flux + top_flux - bottom_flux; - double volume_change_s = idx2(volume, i, j) / (idx2(volume, i, j) + total_flux); - double min_cell_volume = std::fmin(std::fmin(idx2(volume, i, j) + right_flux - left_flux + top_flux - bottom_flux, idx2(volume, i, j) + right_flux - left_flux), - idx2(volume, i, j) + top_flux - bottom_flux); - double recip_volume = 1.0 / idx2(volume, i, j); - double energy_change = (idx2(pressure, i, j) / idx2(density0, i, j) + idx2(viscosity, i, j) / idx2(density0, i, j)) * total_flux * recip_volume; - idx2(energy1, i, j) = idx2(energy0, i, j) - energy_change; - idx2(density1, i, j) = idx2(density0, i, j) * volume_change_s; + double volume_change_s = idx2f(field, volume, i, j) / (idx2f(field, volume, i, j) + total_flux); + double min_cell_volume = std::fmin(std::fmin(idx2f(field, volume, i, j) + right_flux - left_flux + top_flux - bottom_flux, idx2f(field, volume, i, j) + right_flux - left_flux), + idx2f(field, volume, i, j) + top_flux - bottom_flux); + double recip_volume = 1.0 / idx2f(field, volume, i, j); + double energy_change = (idx2f(field, pressure, i, j) / idx2f(field, density0, i, j) + idx2f(field, viscosity, i, j) / idx2f(field, density0, i, j)) * total_flux * recip_volume; + idx2f(field, energy1, i, j) = idx2f(field, energy0, i, j) - energy_change; + idx2f(field, density1, i, j) = idx2f(field, density0, i, j) * volume_change_s; } } } else { - omp(parallel(2) enable_target(use_target) - mapToFrom2D(xarea) - mapToFrom2D(yarea) - mapToFrom2D(volume) - mapToFrom2D(density0) - mapToFrom2D(density1) - mapToFrom2D(energy0) - mapToFrom2D(energy1) - mapToFrom2D(pressure) - mapToFrom2D(viscosity) - mapToFrom2D(xvel0) - mapToFrom2D(xvel1) - mapToFrom2D(yvel0) - mapToFrom2D(yvel1) - mapToFrom2D(volume_change) - ) + mapToFrom2Df(field, xarea) + mapToFrom2Df(field, yarea) + mapToFrom2Df(field, volume) + mapToFrom2Df(field, density0) + mapToFrom2Df(field, density1) + mapToFrom2Df(field, energy0) + mapToFrom2Df(field, energy1) + mapToFrom2Df(field, pressure) + mapToFrom2Df(field, viscosity) + mapToFrom2Df(field, xvel0) + mapToFrom2Df(field, xvel1) + mapToFrom2Df(field, yvel0) + mapToFrom2Df(field, yvel1) + mapToFrom2Dfn(field, work_array1, volume_change) + + omp(parallel(2) enable_target(use_target)) for (int j = (y_min + 1); j < (y_max + 2); j++) { for (int i = (x_min + 1); i < (x_max + 2); i++) { - double left_flux = (idx2(xarea, i, j) * (idx2(xvel0, i, j) + - idx2(xvel0, i + 0, j + 1) + - idx2(xvel1, i, j) + - idx2(xvel1, i + 0, j + 1))) * 0.25 * dt; - double right_flux = (idx2(xarea, i + 1, j + 0) * (idx2(xvel0, i + 1, j + 0) + - idx2(xvel0, i + 1, j + 1) + - idx2(xvel1, i + 1, j + 0) + - idx2(xvel1, i + 1, j + 1))) * 0.25 * dt; - double bottom_flux = (idx2(yarea, i, j) * (idx2(yvel0, i, j) + - idx2(yvel0, i + 1, j + 0) + - idx2(yvel1, i, j) + - idx2(yvel1, i + 1, j + 0))) * 0.25 * dt; - double top_flux = (idx2(yarea, i + 0, j + 1) * (idx2(yvel0, i + 0, j + 1) + - idx2(yvel0, i + 1, j + 1) + - idx2(yvel1, i + 0, j + 1) + idx2(yvel1, i + 1, j + 1))) * 0.25 * dt; + double left_flux = (idx2f(field, xarea, i, j) * (idx2f(field, xvel0, i, j) + + idx2f(field, xvel0, i + 0, j + 1) + + idx2f(field, xvel1, i, j) + + idx2f(field, xvel1, i + 0, j + 1))) * 0.25 * dt; + double right_flux = (idx2f(field, xarea, i + 1, j + 0) * (idx2f(field, xvel0, i + 1, j + 0) + + idx2f(field, xvel0, i + 1, j + 1) + + idx2f(field, xvel1, i + 1, j + 0) + + idx2f(field, xvel1, i + 1, j + 1))) * 0.25 * dt; + double bottom_flux = (idx2f(field, yarea, i, j) * (idx2f(field, yvel0, i, j) + + idx2f(field, yvel0, i + 1, j + 0) + + idx2f(field, yvel1, i, j) + + idx2f(field, yvel1, i + 1, j + 0))) * 0.25 * dt; + double top_flux = (idx2f(field, yarea, i + 0, j + 1) * (idx2f(field, yvel0, i + 0, j + 1) + + idx2f(field, yvel0, i + 1, j + 1) + + idx2f(field, yvel1, i + 0, j + 1) + idx2f(field, yvel1, i + 1, j + 1))) * 0.25 * dt; double total_flux = right_flux - left_flux + top_flux - bottom_flux; - double volume_change_s = idx2(volume, i, j) / (idx2(volume, i, j) + total_flux); + double volume_change_s = idx2f(field, volume, i, j) / (idx2f(field, volume, i, j) + total_flux); double min_cell_volume = std::fmin(std::fmin( - idx2(volume, i, j) + right_flux - left_flux + top_flux - bottom_flux, idx2(volume, i, j) + right_flux - left_flux), - idx2(volume, i, j) + top_flux - bottom_flux); - double recip_volume = 1.0 / idx2(volume, i, j); - double energy_change = (idx2(pressure, i, j) / idx2(density0, i, j) + idx2(viscosity, i, j) / idx2(density0, i, j)) * total_flux * recip_volume; - idx2(energy1, i, j) = idx2(energy0, i, j) - energy_change; - idx2(density1, i, j) = idx2(density0, i, j) * volume_change_s; + idx2f(field, volume, i, j) + right_flux - left_flux + top_flux - bottom_flux, idx2f(field, volume, i, j) + right_flux - left_flux), + idx2f(field, volume, i, j) + top_flux - bottom_flux); + double recip_volume = 1.0 / idx2f(field, volume, i, j); + double energy_change = (idx2f(field, pressure, i, j) / idx2f(field, density0, i, j) + idx2f(field, viscosity, i, j) / idx2f(field, density0, i, j)) * total_flux * recip_volume; + idx2f(field, energy1, i, j) = idx2f(field, energy0, i, j) - energy_change; + idx2f(field, density1, i, j) = idx2f(field, density0, i, j) * volume_change_s; } } } @@ -179,20 +167,7 @@ void PdV(global_variables &globals, bool predict) { t.info.t_ymin, t.info.t_ymax, globals.dt, - t.field.xarea, - t.field.yarea, - t.field.volume, - t.field.density0, - t.field.density1, - t.field.energy0, - t.field.energy1, - t.field.pressure, - t.field.viscosity, - t.field.xvel0, - t.field.xvel1, - t.field.yvel0, - t.field.yvel1, - t.field.work_array1); + t.field); } #if FLUSH_BUFFER diff --git a/src/accelerate.cpp b/src/accelerate.cpp index 8dbe2f7..7985179 100644 --- a/src/accelerate.cpp +++ b/src/accelerate.cpp @@ -32,16 +32,7 @@ void accelerate_kernel( bool use_target, int x_min, int x_max, int y_min, int y_max, double dt, - clover::Buffer2D &xarea, - clover::Buffer2D &yarea, - clover::Buffer2D &volume, - clover::Buffer2D &density0, - clover::Buffer2D &pressure, - clover::Buffer2D &viscosity, - clover::Buffer2D &xvel0, - clover::Buffer2D &yvel0, - clover::Buffer2D &xvel1, - clover::Buffer2D &yvel1) { + field_type &field) { double halfdt = 0.5 * dt; @@ -53,39 +44,39 @@ void accelerate_kernel( //for(int j = ) - omp(parallel(2) enable_target(use_target) - mapToFrom2D(xarea) - mapToFrom2D(yarea) - mapToFrom2D(volume) - mapToFrom2D(density0) - mapToFrom2D(pressure) - mapToFrom2D(viscosity) - mapToFrom2D(xvel0) - mapToFrom2D(yvel0) - mapToFrom2D(xvel1) - mapToFrom2D(yvel1) - ) + mapToFrom2Df(field, xarea) + mapToFrom2Df(field, yarea) + mapToFrom2Df(field, volume) + mapToFrom2Df(field, density0) + mapToFrom2Df(field, pressure) + mapToFrom2Df(field, viscosity) + mapToFrom2Df(field, xvel0) + mapToFrom2Df(field, yvel0) + mapToFrom2Df(field, xvel1) + mapToFrom2Df(field, yvel1) + + omp(parallel(2) enable_target(use_target)) for (int j = (y_min + 1); j < (y_max + 1 + 2); j++) { for (int i = (x_min + 1); i < (x_max + 1 + 2); i++) { - double stepbymass_s = halfdt / ((idx2(density0, i - 1, j - 1) * idx2(volume, i - 1, j - 1) + - idx2(density0, i - 1, j + 0) * idx2(volume, i - 1, j + 0) + idx2(density0, i, j) * idx2(volume, i, j) + - idx2(density0, i + 0, j - 1) * idx2(volume, i + 0, j - 1)) * 0.25); - idx2(xvel1, i, j) = idx2(xvel0, i, j) - - stepbymass_s * (idx2(xarea, i, j) * (idx2(pressure, i, j) - idx2(pressure, i - 1, j + 0)) + - idx2(xarea, i + 0, j - 1) * (idx2(pressure, i + 0, j - 1) - idx2(pressure, i - 1, j - 1))); - idx2(yvel1, i, j) = idx2(yvel0, i, j) - - stepbymass_s * (idx2(yarea, i, j) * - (idx2(pressure, i, j) - idx2(pressure, i + 0, j - 1)) + - idx2(yarea, i - 1, j + 0) * (idx2(pressure, i - 1, j + 0) - idx2(pressure, i - 1, j - 1))); - idx2(xvel1, i, j) = idx2(xvel1, i, j) - - stepbymass_s * (idx2(xarea, i, j) * - (idx2(viscosity, i, j) - - idx2(viscosity, i - 1, j + 0)) + - idx2(xarea, i + 0, j - 1) * (idx2(viscosity, i + 0, j - 1) - idx2(viscosity, i - 1, j - 1))); - idx2(yvel1, i, j) = idx2(yvel1, i, j) - - stepbymass_s * (idx2(yarea, i, j) * - (idx2(viscosity, i, j) - idx2(viscosity, i + 0, j - 1)) + - idx2(yarea, i - 1, j + 0) * (idx2(viscosity, i - 1, j + 0) - idx2(viscosity, i - 1, j - 1))); + double stepbymass_s = halfdt / ((idx2f(field, density0, i - 1, j - 1) * idx2f(field, volume, i - 1, j - 1) + + idx2f(field, density0, i - 1, j + 0) * idx2f(field, volume, i - 1, j + 0) + idx2f(field, density0, i, j) * idx2f(field, volume, i, j) + + idx2f(field, density0, i + 0, j - 1) * idx2f(field, volume, i + 0, j - 1)) * 0.25); + idx2f(field, xvel1, i, j) = idx2f(field, xvel0, i, j) - + stepbymass_s * (idx2f(field, xarea, i, j) * (idx2f(field, pressure, i, j) - idx2f(field, pressure, i - 1, j + 0)) + + idx2f(field, xarea, i + 0, j - 1) * (idx2f(field, pressure, i + 0, j - 1) - idx2f(field, pressure, i - 1, j - 1))); + idx2f(field, yvel1, i, j) = idx2f(field, yvel0, i, j) - + stepbymass_s * (idx2f(field, yarea, i, j) * + (idx2f(field, pressure, i, j) - idx2f(field, pressure, i + 0, j - 1)) + + idx2f(field, yarea, i - 1, j + 0) * (idx2f(field, pressure, i - 1, j + 0) - idx2f(field, pressure, i - 1, j - 1))); + idx2f(field, xvel1, i, j) = idx2f(field, xvel1, i, j) - + stepbymass_s * (idx2f(field, xarea, i, j) * + (idx2f(field, viscosity, i, j) - + idx2f(field, viscosity, i - 1, j + 0)) + + idx2f(field, xarea, i + 0, j - 1) * (idx2f(field, viscosity, i + 0, j - 1) - idx2f(field, viscosity, i - 1, j - 1))); + idx2f(field, yvel1, i, j) = idx2f(field, yvel1, i, j) - + stepbymass_s * (idx2f(field, yarea, i, j) * + (idx2f(field, viscosity, i, j) - idx2f(field, viscosity, i + 0, j - 1)) + + idx2f(field, yarea, i - 1, j + 0) * (idx2f(field, viscosity, i - 1, j + 0) - idx2f(field, viscosity, i - 1, j - 1))); } } } @@ -113,16 +104,7 @@ void accelerate(global_variables &globals) { t.info.t_ymin, t.info.t_ymax, globals.dt, - t.field.xarea, - t.field.yarea, - t.field.volume, - t.field.density0, - t.field.pressure, - t.field.viscosity, - t.field.xvel0, - t.field.yvel0, - t.field.xvel1, - t.field.yvel1); + t.field); } diff --git a/src/advec_cell.cpp b/src/advec_cell.cpp index 39f5f81..ab20a11 100644 --- a/src/advec_cell.cpp +++ b/src/advec_cell.cpp @@ -35,22 +35,7 @@ void advec_cell_kernel( int y_max, int dir, int sweep_number, - clover::Buffer1D &vertexdx, - clover::Buffer1D &vertexdy, - clover::Buffer2D &volume, - clover::Buffer2D &density1, - clover::Buffer2D &energy1, - clover::Buffer2D &mass_flux_x, - clover::Buffer2D &vol_flux_x, - clover::Buffer2D &mass_flux_y, - clover::Buffer2D &vol_flux_y, - clover::Buffer2D &pre_vol, - clover::Buffer2D &post_vol, - clover::Buffer2D &pre_mass, - clover::Buffer2D &post_mass, - clover::Buffer2D &advec_vol, - clover::Buffer2D &post_ener, - clover::Buffer2D &ener_flux) { + field_type &field) { const double one_by_six = 1.0 / 6.0; @@ -62,17 +47,19 @@ void advec_cell_kernel( if (sweep_number == 1) { - omp(parallel(2) enable_target(use_target) - mapToFrom2D(volume) - mapToFrom2D(vol_flux_x) - mapToFrom2D(vol_flux_y) - mapToFrom2D(pre_vol) - mapToFrom2D(post_vol) - ) + mapToFrom2Df(field, volume) + mapToFrom2Df(field, vol_flux_x) + mapToFrom2Df(field, vol_flux_y) + mapToFrom2Dfn(field, work_array1, pre_vol) + mapToFrom2Dfn(field, work_array2, post_vol) + + omp(parallel(2) enable_target(use_target)) for (int j = (y_min - 2 + 1); j < (y_max + 2 + 2); j++) { for (int i = (x_min - 2 + 1); i < (x_max + 2 + 2); i++) { - idx2(pre_vol, i, j) = idx2(volume, i, j) + (idx2(vol_flux_x, i + 1, j + 0) - idx2(vol_flux_x, i, j) + idx2(vol_flux_y, i + 0, j + 1) - idx2(vol_flux_y, i, j)); - idx2(post_vol, i, j) = idx2(pre_vol, i, j) - (idx2(vol_flux_x, i + 1, j + 0) - idx2(vol_flux_x, i, j)); + idx2f(field, pre_vol, i, j) = idx2f(field, volume, i, j) + + (idx2f(field, vol_flux_x, i + 1, j + 0) - idx2f(field, vol_flux_x, i, j) + idx2f(field, vol_flux_y, i + 0, j + 1) - + idx2f(field, vol_flux_y, i, j)); + idx2f(field, post_vol, i, j) = idx2f(field, pre_vol, i, j) - (idx2f(field, vol_flux_x, i + 1, j + 0) - idx2f(field, vol_flux_x, i, j)); } } @@ -80,16 +67,16 @@ void advec_cell_kernel( } else { - omp(parallel(2) enable_target(use_target) - mapToFrom2D(volume) - mapToFrom2D(vol_flux_x) - mapToFrom2D(pre_vol) - mapToFrom2D(post_vol) - ) + mapToFrom2Df(field, volume) + mapToFrom2Df(field, vol_flux_x) + mapToFrom2Dfn(field, work_array1, pre_vol) + mapToFrom2Dfn(field, work_array2, post_vol) + + omp(parallel(2) enable_target(use_target)) for (int j = (y_min - 2 + 1); j < (y_max + 2 + 2); j++) { for (int i = (x_min - 2 + 1); i < (x_max + 2 + 2); i++) { - idx2(pre_vol, i, j) = idx2(volume, i, j) + idx2(vol_flux_x, i + 1, j + 0) - idx2(vol_flux_x, i, j); - idx2(post_vol, i, j) = idx2(volume, i, j); + idx2f(field, pre_vol, i, j) = idx2f(field, volume, i, j) + idx2f(field, vol_flux_x, i + 1, j + 0) - idx2f(field, vol_flux_x, i, j); + idx2f(field, post_vol, i, j) = idx2f(field, volume, i, j); } } @@ -97,21 +84,21 @@ void advec_cell_kernel( // DO k=y_min,y_max // DO j=x_min,x_max+2 - omp(parallel(2) enable_target(use_target) - mapToFrom1D(vertexdx) - mapToFrom2D(density1) - mapToFrom2D(energy1) - mapToFrom2D(mass_flux_x) - mapToFrom2D(vol_flux_x) - mapToFrom2D(pre_vol) - mapToFrom2D(ener_flux) - ) + mapToFrom1Df(field, vertexdx) + mapToFrom2Df(field, density1) + mapToFrom2Df(field, energy1) + mapToFrom2Df(field, mass_flux_x) + mapToFrom2Df(field, vol_flux_x) + mapToFrom2Dfn(field, work_array1, pre_vol) + mapToFrom2Dfn(field, work_array7, ener_flux) + + omp(parallel(2) enable_target(use_target)) for (int j = (y_min + 1); j < (y_max + 2); j++) { for (int i = (x_min + 1); i < (x_max + 2 + 2); i++) ({ int upwind, donor, downwind, dif; double sigmat, sigma3, sigma4, sigmav, sigma, sigmam, diffuw, diffdw, limiter, wind; - if (idx2(vol_flux_x, i, j) > 0.0) { + if (idx2f(field, vol_flux_x, i, j) > 0.0) { upwind = i - 2; donor = i - 1; downwind = i; @@ -122,13 +109,13 @@ void advec_cell_kernel( downwind = i - 1; dif = upwind; } - sigmat = std::fabs(idx2(vol_flux_x, i, j)) / idx2(pre_vol, donor, j); - sigma3 = (1.0 + sigmat) * (idx1(vertexdx, i) / idx1(vertexdx, dif)); + sigmat = std::fabs(idx2f(field, vol_flux_x, i, j)) / idx2f(field, pre_vol, donor, j); + sigma3 = (1.0 + sigmat) * (idx1f(field, vertexdx, i) / idx1f(field, vertexdx, dif)); sigma4 = 2.0 - sigmat; // sigma = sigmat; sigmav = sigmat; - diffuw = idx2(density1, donor, j) - idx2(density1, upwind, j); - diffdw = idx2(density1, downwind, j) - idx2(density1, donor, j); + diffuw = idx2f(field, density1, donor, j) - idx2f(field, density1, upwind, j); + diffdw = idx2f(field, density1, downwind, j) - idx2f(field, density1, donor, j); wind = 1.0; if (diffdw <= 0.0)wind = -1.0; if (diffuw * diffdw > 0.0) { @@ -140,10 +127,10 @@ void advec_cell_kernel( } else { limiter = 0.0; } - idx2(mass_flux_x, i, j) = idx2(vol_flux_x, i, j) * (idx2(density1, donor, j) + limiter); - sigmam = std::fabs(idx2(mass_flux_x, i, j)) / (idx2(density1, donor, j) * idx2(pre_vol, donor, j)); - diffuw = idx2(energy1, donor, j) - idx2(energy1, upwind, j); - diffdw = idx2(energy1, downwind, j) - idx2(energy1, donor, j); + idx2f(field, mass_flux_x, i, j) = idx2f(field, vol_flux_x, i, j) * (idx2f(field, density1, donor, j) + limiter); + sigmam = std::fabs(idx2f(field, mass_flux_x, i, j)) / (idx2f(field, density1, donor, j) * idx2f(field, pre_vol, donor, j)); + diffuw = idx2f(field, energy1, donor, j) - idx2f(field, energy1, upwind, j); + diffdw = idx2f(field, energy1, downwind, j) - idx2f(field, energy1, donor, j); wind = 1.0; if (diffdw <= 0.0)wind = -1.0; if (diffuw * diffdw > 0.0) { @@ -156,7 +143,7 @@ void advec_cell_kernel( } else { limiter = 0.0; } - idx2(ener_flux, i, j) = idx2(mass_flux_x, i, j) * (idx2(energy1, donor, j) + limiter); + idx2f(field, ener_flux, i, j) = idx2f(field, mass_flux_x, i, j) * (idx2f(field, energy1, donor, j) + limiter); }); } @@ -165,22 +152,17 @@ void advec_cell_kernel( // DO k=y_min,y_max // DO j=x_min,x_max - omp(parallel(2) enable_target(use_target) - mapToFrom2D(density1) - mapToFrom2D(energy1) - mapToFrom2D(mass_flux_x) - mapToFrom2D(vol_flux_x) - mapToFrom2D(pre_vol) - mapToFrom2D(ener_flux) - ) + + + omp(parallel(2) enable_target(use_target)) for (int j = (y_min + 1); j < (y_max + 2); j++) { for (int i = (x_min + 1); i < (x_max + 2); i++) { - double pre_mass_s = idx2(density1, i, j) * idx2(pre_vol, i, j); - double post_mass_s = pre_mass_s + idx2(mass_flux_x, i, j) - idx2(mass_flux_x, i + 1, j + 0); - double post_ener_s = (idx2(energy1, i, j) * pre_mass_s + idx2(ener_flux, i, j) - idx2(ener_flux, i + 1, j + 0)) / post_mass_s; - double advec_vol_s = idx2(pre_vol, i, j) + idx2(vol_flux_x, i, j) - idx2(vol_flux_x, i + 1, j + 0); - idx2(density1, i, j) = post_mass_s / advec_vol_s; - idx2(energy1, i, j) = post_ener_s; + double pre_mass_s = idx2f(field, density1, i, j) * idx2f(field, pre_vol, i, j); + double post_mass_s = pre_mass_s + idx2f(field, mass_flux_x, i, j) - idx2f(field, mass_flux_x, i + 1, j + 0); + double post_ener_s = (idx2f(field, energy1, i, j) * pre_mass_s + idx2f(field, ener_flux, i, j) - idx2f(field, ener_flux, i + 1, j + 0)) / post_mass_s; + double advec_vol_s = idx2f(field, pre_vol, i, j) + idx2f(field, vol_flux_x, i, j) - idx2f(field, vol_flux_x, i + 1, j + 0); + idx2f(field, density1, i, j) = post_mass_s / advec_vol_s; + idx2f(field, energy1, i, j) = post_ener_s; } } @@ -192,17 +174,19 @@ void advec_cell_kernel( if (sweep_number == 1) { - omp(parallel(2) enable_target(use_target) - mapToFrom2D(volume) - mapToFrom2D(vol_flux_x) - mapToFrom2D(vol_flux_y) - mapToFrom2D(pre_vol) - mapToFrom2D(post_vol) - ) + mapToFrom2Df(field, volume) + mapToFrom2Df(field, vol_flux_x) + mapToFrom2Df(field, vol_flux_y) + mapToFrom2Dfn(field, work_array1, pre_vol) + mapToFrom2Dfn(field, work_array2, post_vol) + + omp(parallel(2) enable_target(use_target)) for (int j = (y_min - 2 + 1); j < (y_max + 2 + 2); j++) { for (int i = (x_min - 2 + 1); i < (x_max + 2 + 2); i++) { - idx2(pre_vol, i, j) = idx2(volume, i, j) + (idx2(vol_flux_y, i + 0, j + 1) - idx2(vol_flux_y, i, j) + idx2(vol_flux_x, i + 1, j + 0) - idx2(vol_flux_x, i, j)); - idx2(post_vol, i, j) = idx2(pre_vol, i, j) - (idx2(vol_flux_y, i + 0, j + 1) - idx2(vol_flux_y, i, j)); + idx2f(field, pre_vol, i, j) = idx2f(field, volume, i, j) + + (idx2f(field, vol_flux_y, i + 0, j + 1) - idx2f(field, vol_flux_y, i, j) + idx2f(field, vol_flux_x, i + 1, j + 0) - + idx2f(field, vol_flux_x, i, j)); + idx2f(field, post_vol, i, j) = idx2f(field, pre_vol, i, j) - (idx2f(field, vol_flux_y, i + 0, j + 1) - idx2f(field, vol_flux_y, i, j)); } } @@ -210,16 +194,16 @@ void advec_cell_kernel( } else { - omp(parallel(2) enable_target(use_target) - mapToFrom2D(volume) - mapToFrom2D(vol_flux_y) - mapToFrom2D(pre_vol) - mapToFrom2D(post_vol) - ) + mapToFrom2Df(field, volume) + mapToFrom2Df(field, vol_flux_y) + mapToFrom2Dfn(field, work_array1, pre_vol) + mapToFrom2Dfn(field, work_array2, post_vol) + + omp(parallel(2) enable_target(use_target)) for (int j = (y_min - 2 + 1); j < (y_max + 2 + 2); j++) { for (int i = (x_min - 2 + 1); i < (x_max + 2 + 2); i++) { - idx2(pre_vol, i, j) = idx2(volume, i, j) + idx2(vol_flux_y, i + 0, j + 1) - idx2(vol_flux_y, i, j); - idx2(post_vol, i, j) = idx2(volume, i, j); + idx2f(field, pre_vol, i, j) = idx2f(field, volume, i, j) + idx2f(field, vol_flux_y, i + 0, j + 1) - idx2f(field, vol_flux_y, i, j); + idx2f(field, post_vol, i, j) = idx2f(field, volume, i, j); } } @@ -229,21 +213,20 @@ void advec_cell_kernel( // DO k=y_min,y_max+2 // DO j=x_min,x_max - omp(parallel(2) enable_target(use_target) - mapToFrom1D(vertexdy) - mapToFrom2D(density1) - mapToFrom2D(energy1) - mapToFrom2D(mass_flux_y) - mapToFrom2D(vol_flux_y) - mapToFrom2D(pre_vol) - mapToFrom2D(ener_flux) - ) + mapToFrom1Df(field, vertexdy) + mapToFrom2Df(field, density1) + mapToFrom2Df(field, energy1) + mapToFrom2Df(field, mass_flux_y) + mapToFrom2Df(field, vol_flux_y) + mapToFrom2Dfn(field, work_array1, pre_vol) + mapToFrom2Dfn(field, work_array7, ener_flux) + omp(parallel(2) enable_target(use_target)) for (int j = (y_min + 1); j < (y_max + 2 + 2); j++) { for (int i = (x_min + 1); i < (x_max + 2); i++) ({ int upwind, donor, downwind, dif; double sigmat, sigma3, sigma4, sigmav, sigma, sigmam, diffuw, diffdw, limiter, wind; - if (idx2(vol_flux_y, i, j) > 0.0) { + if (idx2f(field, vol_flux_y, i, j) > 0.0) { upwind = j - 2; donor = j - 1; downwind = j; @@ -254,13 +237,13 @@ void advec_cell_kernel( downwind = j - 1; dif = upwind; } - sigmat = std::fabs(idx2(vol_flux_y, i, j)) / idx2(pre_vol, i, donor); - sigma3 = (1.0 + sigmat) * (idx1(vertexdy, j) / idx1(vertexdy, dif)); + sigmat = std::fabs(idx2f(field, vol_flux_y, i, j)) / idx2f(field, pre_vol, i, donor); + sigma3 = (1.0 + sigmat) * (idx1f(field, vertexdy, j) / idx1f(field, vertexdy, dif)); sigma4 = 2.0 - sigmat; // sigma = sigmat; sigmav = sigmat; - diffuw = idx2(density1, i, donor) - idx2(density1, i, upwind); - diffdw = idx2(density1, i, downwind) - idx2(density1, i, donor); + diffuw = idx2f(field, density1, i, donor) - idx2f(field, density1, i, upwind); + diffdw = idx2f(field, density1, i, downwind) - idx2f(field, density1, i, donor); wind = 1.0; if (diffdw <= 0.0)wind = -1.0; if (diffuw * diffdw > 0.0) { @@ -272,10 +255,10 @@ void advec_cell_kernel( } else { limiter = 0.0; } - idx2(mass_flux_y, i, j) = idx2(vol_flux_y, i, j) * (idx2(density1, i, donor) + limiter); - sigmam = std::fabs(idx2(mass_flux_y, i, j)) / (idx2(density1, i, donor) * idx2(pre_vol, i, donor)); - diffuw = idx2(energy1, i, donor) - idx2(energy1, i, upwind); - diffdw = idx2(energy1, i, downwind) - idx2(energy1, i, donor); + idx2f(field, mass_flux_y, i, j) = idx2f(field, vol_flux_y, i, j) * (idx2f(field, density1, i, donor) + limiter); + sigmam = std::fabs(idx2f(field, mass_flux_y, i, j)) / (idx2f(field, density1, i, donor) * idx2f(field, pre_vol, i, donor)); + diffuw = idx2f(field, energy1, i, donor) - idx2f(field, energy1, i, upwind); + diffdw = idx2f(field, energy1, i, downwind) - idx2f(field, energy1, i, donor); wind = 1.0; if (diffdw <= 0.0)wind = -1.0; if (diffuw * diffdw > 0.0) { @@ -287,29 +270,24 @@ void advec_cell_kernel( } else { limiter = 0.0; } - idx2(ener_flux, i, j) = idx2(mass_flux_y, i, j) * (idx2(energy1, i, donor) + limiter); + idx2f(field, ener_flux, i, j) = idx2f(field, mass_flux_y, i, j) * (idx2f(field, energy1, i, donor) + limiter); }); } // DO k=y_min,y_max // DO j=x_min,x_max - omp(parallel(2) enable_target(use_target) - mapToFrom2D(density1) - mapToFrom2D(energy1) - mapToFrom2D(mass_flux_y) - mapToFrom2D(vol_flux_y) - mapToFrom2D(pre_vol) - mapToFrom2D(ener_flux) - ) + + + omp(parallel(2) enable_target(use_target)) for (int j = (y_min + 1); j < (y_max + 2); j++) { for (int i = (x_min + 1); i < (x_max + 2); i++) { - double pre_mass_s = idx2(density1, i, j) * idx2(pre_vol, i, j); - double post_mass_s = pre_mass_s + idx2(mass_flux_y, i, j) - idx2(mass_flux_y, i + 0, j + 1); - double post_ener_s = (idx2(energy1, i, j) * pre_mass_s + idx2(ener_flux, i, j) - idx2(ener_flux, i + 0, j + 1)) / post_mass_s; - double advec_vol_s = idx2(pre_vol, i, j) + idx2(vol_flux_y, i, j) - idx2(vol_flux_y, i + 0, j + 1); - idx2(density1, i, j) = post_mass_s / advec_vol_s; - idx2(energy1, i, j) = post_ener_s; + double pre_mass_s = idx2f(field, density1, i, j) * idx2f(field, pre_vol, i, j); + double post_mass_s = pre_mass_s + idx2f(field, mass_flux_y, i, j) - idx2f(field, mass_flux_y, i + 0, j + 1); + double post_ener_s = (idx2f(field, energy1, i, j) * pre_mass_s + idx2f(field, ener_flux, i, j) - idx2f(field, ener_flux, i + 0, j + 1)) / post_mass_s; + double advec_vol_s = idx2f(field, pre_vol, i, j) + idx2f(field, vol_flux_y, i, j) - idx2f(field, vol_flux_y, i + 0, j + 1); + idx2f(field, density1, i, j) = post_mass_s / advec_vol_s; + idx2f(field, energy1, i, j) = post_ener_s; } } @@ -336,22 +314,7 @@ void advec_cell_driver(global_variables &globals, int tile, int sweep_number, in t.info.t_ymax, direction, sweep_number, - t.field.vertexdx, - t.field.vertexdy, - t.field.volume, - t.field.density1, - t.field.energy1, - t.field.mass_flux_x, - t.field.vol_flux_x, - t.field.mass_flux_y, - t.field.vol_flux_y, - t.field.work_array1, - t.field.work_array2, - t.field.work_array3, - t.field.work_array4, - t.field.work_array5, - t.field.work_array6, - t.field.work_array7); + t.field); #if FLUSH_BUFFER globals.deviceToHost(); diff --git a/src/advec_mom.cpp b/src/advec_mom.cpp index a8016ea..155d5ba 100644 --- a/src/advec_mom.cpp +++ b/src/advec_mom.cpp @@ -31,21 +31,8 @@ void advec_mom_kernel( bool use_target, int x_min, int x_max, int y_min, int y_max, - clover::Buffer2D &vel1, - clover::Buffer2D &mass_flux_x, - clover::Buffer2D &vol_flux_x, - clover::Buffer2D &mass_flux_y, - clover::Buffer2D &vol_flux_y, - clover::Buffer2D &volume, - clover::Buffer2D &density1, - clover::Buffer2D &node_flux, - clover::Buffer2D &node_mass_post, - clover::Buffer2D &node_mass_pre, - clover::Buffer2D &mom_flux, - clover::Buffer2D &pre_vol, - clover::Buffer2D &post_vol, - clover::Buffer1D &celldx, - clover::Buffer1D &celldy, + clover::Buffer2D &vel1_buffer, + field_type &field, int which_vel, int sweep_number, int direction) { @@ -59,63 +46,63 @@ void advec_mom_kernel( if (mom_sweep == 1) { // x 1 - omp(parallel(2) enable_target(use_target) - mapToFrom2D(vol_flux_y) - mapToFrom2D(vol_flux_x) - mapToFrom2D(volume) - mapToFrom2D(pre_vol) - mapToFrom2D(post_vol) - ) + mapToFrom2Df(field, vol_flux_y) + mapToFrom2Df(field, vol_flux_x) + mapToFrom2Df(field, volume) + mapToFrom2Dfn(field, work_array5, pre_vol) + mapToFrom2Dfn(field, work_array6, post_vol) + + omp(parallel(2) enable_target(use_target)) for (int j = (y_min - 2 + 1); j < (y_max + 2 + 2); j++) { for (int i = (x_min - 2 + 1); i < (x_max + 2 + 2); i++) { - idx2(post_vol, i, j) = idx2(volume, i, j) + idx2(vol_flux_y, i + 0, j + 1) - idx2(vol_flux_y, i, j); - idx2(pre_vol, i, j) = idx2(post_vol, i, j) + idx2(vol_flux_x, i + 1, j + 0) - idx2(vol_flux_x, i, j); + idx2f(, post_vol, i, j) = idx2f(, volume, i, j) + idx2f(, vol_flux_y, i + 0, j + 1) - idx2f(, vol_flux_y, i, j); + idx2f(, pre_vol, i, j) = idx2f(, post_vol, i, j) + idx2f(, vol_flux_x, i + 1, j + 0) - idx2f(, vol_flux_x, i, j); } } } else if (mom_sweep == 2) { // y 1 - omp(parallel(2) enable_target(use_target) - mapToFrom2D(vol_flux_y) - mapToFrom2D(vol_flux_x) - mapToFrom2D(volume) - mapToFrom2D(pre_vol) - mapToFrom2D(post_vol) - ) + mapToFrom2Df(field, vol_flux_y) + mapToFrom2Df(field, vol_flux_x) + mapToFrom2Df(field, volume) + mapToFrom2Dfn(field, work_array5, pre_vol) + mapToFrom2Dfn(field, work_array6, post_vol) + + omp(parallel(2) enable_target(use_target)) for (int j = (y_min - 2 + 1); j < (y_max + 2 + 2); j++) { for (int i = (x_min - 2 + 1); i < (x_max + 2 + 2); i++) { - idx2(post_vol, i, j) = idx2(volume, i, j) + idx2(vol_flux_x, i + 1, j + 0) - idx2(vol_flux_x, i, j); - idx2(pre_vol, i, j) = idx2(post_vol, i, j) + idx2(vol_flux_y, i + 0, j + 1) - idx2(vol_flux_y, i, j); + idx2f(, post_vol, i, j) = idx2f(, volume, i, j) + idx2f(, vol_flux_x, i + 1, j + 0) - idx2f(, vol_flux_x, i, j); + idx2f(, pre_vol, i, j) = idx2f(, post_vol, i, j) + idx2f(, vol_flux_y, i + 0, j + 1) - idx2f(, vol_flux_y, i, j); } } } else if (mom_sweep == 3) { // x 2 - omp(parallel(2) enable_target(use_target) - mapToFrom2D(vol_flux_y) - mapToFrom2D(volume) - mapToFrom2D(pre_vol) - mapToFrom2D(post_vol) - ) + mapToFrom2Df(field, vol_flux_y) + mapToFrom2Df(field, volume) + mapToFrom2Dfn(field, work_array5, pre_vol) + mapToFrom2Dfn(field, work_array6, post_vol) + + omp(parallel(2) enable_target(use_target)) for (int j = (y_min - 2 + 1); j < (y_max + 2 + 2); j++) { for (int i = (x_min - 2 + 1); i < (x_max + 2 + 2); i++) { - idx2(post_vol, i, j) = idx2(volume, i, j); - idx2(pre_vol, i, j) = idx2(post_vol, i, j) + idx2(vol_flux_y, i + 0, j + 1) - idx2(vol_flux_y, i, j); + idx2f(, post_vol, i, j) = idx2f(, volume, i, j); + idx2f(, pre_vol, i, j) = idx2f(, post_vol, i, j) + idx2f(, vol_flux_y, i + 0, j + 1) - idx2f(, vol_flux_y, i, j); } } } else if (mom_sweep == 4) { // y 2 - omp(parallel(2) enable_target(use_target) - mapToFrom2D(vol_flux_x) - mapToFrom2D(volume) - mapToFrom2D(pre_vol) - mapToFrom2D(post_vol) - ) + mapToFrom2Df(field, vol_flux_x) + mapToFrom2Df(field, volume) + mapToFrom2Dfn(field, work_array5, pre_vol) + mapToFrom2Dfn(field, work_array6, post_vol) + + omp(parallel(2) enable_target(use_target)) for (int j = (y_min - 2 + 1); j < (y_max + 2 + 2); j++) { for (int i = (x_min - 2 + 1); i < (x_max + 2 + 2); i++) { - idx2(post_vol, i, j) = idx2(volume, i, j); - idx2(pre_vol, i, j) = idx2(post_vol, i, j) + idx2(vol_flux_x, i + 1, j + 0) - idx2(vol_flux_x, i, j); + idx2f(, post_vol, i, j) = idx2f(, volume, i, j); + idx2f(, pre_vol, i, j) = idx2f(, post_vol, i, j) + idx2f(, vol_flux_x, i + 1, j + 0) - idx2f(, vol_flux_x, i, j); } } } @@ -127,14 +114,14 @@ void advec_mom_kernel( - omp(parallel(2) enable_target(use_target) - mapToFrom2D(mass_flux_x) - mapToFrom2D(node_flux) - ) + mapToFrom2Df(field, mass_flux_x) + mapToFrom2Dfn(field, work_array1, node_flux) + + omp(parallel(2) enable_target(use_target)) for (int j = (y_min + 1); j < (y_max + 1 + 2); j++) { for (int i = (x_min - 2 + 1); i < (x_max + 2 + 2); i++) { - idx2(node_flux, i, j) = 0.25 * (idx2(mass_flux_x, i + 0, j - 1) + idx2(mass_flux_x, i, j) + - idx2(mass_flux_x, i + 1, j - 1) + idx2(mass_flux_x, i + 1, j + 0)); + idx2f(, node_flux, i, j) = 0.25 * (idx2f(, mass_flux_x, i + 0, j - 1) + idx2f(, mass_flux_x, i, j) + + idx2f(, mass_flux_x, i + 1, j - 1) + idx2f(, mass_flux_x, i + 1, j + 0)); } } @@ -142,45 +129,44 @@ void advec_mom_kernel( // DO j=x_min-1,x_max+2 - omp(parallel(2) enable_target(use_target) - mapToFrom2D(density1) - mapToFrom2D(node_flux) - mapToFrom2D(node_mass_post) - mapToFrom2D(node_mass_pre) - mapToFrom2D(post_vol) - ) + mapToFrom2Df(field, density1) + mapToFrom2Dfn(field, work_array2, node_mass_post) + mapToFrom2Dfn(field, work_array3, node_mass_pre) + mapToFrom2Dfn(field, work_array6, post_vol) + + omp(parallel(2) enable_target(use_target)) for (int j = (y_min + 1); j < (y_max + 1 + 2); j++) { for (int i = (x_min - 1 + 1); i < (x_max + 2 + 2); i++) { - idx2(node_mass_post, i, j) = 0.25 * (idx2(density1, i + 0, j - 1) * - idx2(post_vol, i + 0, j - 1) + - idx2(density1, i, j) * - idx2(post_vol, i, j) + - idx2(density1, i - 1, j - 1) * - idx2(post_vol, i - 1, j - 1) + - idx2(density1, i - 1, j + 0) * idx2(post_vol, i - 1, j + 0)); - idx2(node_mass_pre, i, j) = idx2(node_mass_post, i, j) - idx2(node_flux, i - 1, j + 0) + idx2(node_flux, i, j); + idx2f(, node_mass_post, i, j) = 0.25 * (idx2f(, density1, i + 0, j - 1) * + idx2f(, post_vol, i + 0, j - 1) + + idx2f(, density1, i, j) * + idx2f(, post_vol, i, j) + + idx2f(, density1, i - 1, j - 1) * + idx2f(, post_vol, i - 1, j - 1) + + idx2f(, density1, i - 1, j + 0) * idx2f(, post_vol, i - 1, j + 0)); + idx2f(, node_mass_pre, i, j) = idx2f(, node_mass_post, i, j) - idx2f(, node_flux, i - 1, j + 0) + idx2f(, node_flux, i, j); } } } - // DO k=y_min,y_max+1 - // DO j=x_min-1,x_max+1 + // DO k=y_min,y_max+1 + // DO j=x_min-1,x_max+1 - omp(parallel(2) enable_target(use_target) - mapToFrom2D(vel1) - mapToFrom2D(node_flux) - mapToFrom2D(node_mass_pre) - mapToFrom2D(mom_flux) - mapToFrom1D(celldx) - ) + mapToFrom2Dfe(vel1_buffer, vel1) + mapToFrom2Dfn(field, work_array1, node_flux) + mapToFrom2Dfn(field, work_array3, node_mass_pre) + mapToFrom2Dfn(field, work_array4, mom_flux) + mapToFrom1Df(field, celldx) + + omp(parallel(2) enable_target(use_target)) for (int j = (y_min + 1); j < (y_max + 1 + 2); j++) { for (int i = (x_min - 1 + 1); i < (x_max + 1 + 2); i++) ({ int upwind, donor, downwind, dif; double sigma, width, limiter, vdiffuw, vdiffdw, auw, adw, wind, advec_vel_s; - if (idx2(node_flux, i, j) < 0.0) { + if (idx2f(, node_flux, i, j) < 0.0) { upwind = i + 2; donor = i + 1; downwind = i; @@ -191,10 +177,10 @@ void advec_mom_kernel( downwind = i + 1; dif = upwind; } - sigma = std::fabs(idx2(node_flux, i, j)) / (idx2(node_mass_pre, donor, j)); - width = idx1(celldx, i); - vdiffuw = idx2(vel1, donor, j) - idx2(vel1, upwind, j); - vdiffdw = idx2(vel1, downwind, j) - idx2(vel1, donor, j); + sigma = std::fabs(idx2f(, node_flux, i, j)) / (idx2f(, node_mass_pre, donor, j)); + width = idx1f(, celldx, i); + vdiffuw = idx2f(, vel1, donor, j) - idx2f(, vel1, upwind, j); + vdiffdw = idx2f(, vel1, downwind, j) - idx2f(, vel1, donor, j); limiter = 0.0; if (vdiffuw * vdiffdw > 0.0) { auw = std::fabs(vdiffuw); @@ -202,10 +188,10 @@ void advec_mom_kernel( wind = 1.0; if (vdiffdw <= 0.0)wind = -1.0; limiter = wind * std::fmin(std::fmin( - width * ((2.0 - sigma) * adw / width + (1.0 + sigma) * auw / idx1(celldx, dif)) / 6.0, auw), adw); + width * ((2.0 - sigma) * adw / width + (1.0 + sigma) * auw / idx1f(, celldx, dif)) / 6.0, auw), adw); } - advec_vel_s = idx2(vel1, donor, j) + (1.0 - sigma) * limiter; - idx2(mom_flux, i, j) = advec_vel_s * idx2(node_flux, i, j); + advec_vel_s = idx2f(, vel1, donor, j) + (1.0 - sigma) * limiter; + idx2f(, mom_flux, i, j) = advec_vel_s * idx2f(, node_flux, i, j); }); } @@ -214,15 +200,13 @@ void advec_mom_kernel( - omp(parallel(2) enable_target(use_target) - mapToFrom2D(vel1) - mapToFrom2D(node_mass_post) - mapToFrom2D(node_mass_pre) - mapToFrom2D(mom_flux) - ) + mapToFrom2Dfn(field, work_array2, node_mass_post) + + omp(parallel(2) enable_target(use_target)) for (int j = (y_min + 1); j < (y_max + 1 + 2); j++) { for (int i = (x_min + 1); i < (x_max + 1 + 2); i++) { - idx2(vel1, i, j) = (idx2(vel1, i, j) * idx2(node_mass_pre, i, j) + idx2(mom_flux, i - 1, j + 0) - idx2(mom_flux, i, j)) / idx2(node_mass_post, i, j); + idx2f(, vel1, i, j) = + (idx2f(, vel1, i, j) * idx2f(, node_mass_pre, i, j) + idx2f(, mom_flux, i - 1, j + 0) - idx2f(, mom_flux, i, j)) / idx2f(, node_mass_post, i, j); } } } else if (direction == 2) { @@ -232,14 +216,14 @@ void advec_mom_kernel( - omp(parallel(2) enable_target(use_target) - mapToFrom2D(node_flux) - mapToFrom2D(mass_flux_y) - ) + mapToFrom2Dfn(field, work_array1, node_flux) + mapToFrom2Df(field, mass_flux_y) + + omp(parallel(2) enable_target(use_target)) for (int j = (y_min - 2 + 1); j < (y_max + 2 + 2); j++) { for (int i = (x_min + 1); i < (x_max + 1 + 2); i++) { - idx2(node_flux, i, j) = 0.25 * (idx2(mass_flux_y, i - 1, j + 0) + idx2(mass_flux_y, i, j) + - idx2(mass_flux_y, i - 1, j + 1) + idx2(mass_flux_y, i + 0, j + 1)); + idx2f(, node_flux, i, j) = 0.25 * (idx2f(, mass_flux_y, i - 1, j + 0) + idx2f(, mass_flux_y, i, j) + + idx2f(, mass_flux_y, i - 1, j + 1) + idx2f(, mass_flux_y, i + 0, j + 1)); } } @@ -247,44 +231,43 @@ void advec_mom_kernel( // DO k=y_min-1,y_max+2 // DO j=x_min,x_max+1 - omp(parallel(2) enable_target(use_target) - mapToFrom2D(density1) - mapToFrom2D(node_flux) - mapToFrom2D(node_mass_post) - mapToFrom2D(node_mass_pre) - mapToFrom2D(post_vol) - ) + mapToFrom2Df(field, density1) + mapToFrom2Dfn(field, work_array2, node_mass_post) + mapToFrom2Dfn(field, work_array3, node_mass_pre) + mapToFrom2Dfn(field, work_array6, post_vol) + + omp(parallel(2) enable_target(use_target)) for (int j = (y_min - 1 + 1); j < (y_max + 2 + 2); j++) { for (int i = (x_min + 1); i < (x_max + 1 + 2); i++) { - idx2(node_mass_post, i, j) = 0.25 * (idx2(density1, i + 0, j - 1) * - idx2(post_vol, i + 0, j - 1) + - idx2(density1, i, j) * - idx2(post_vol, i, j) + - idx2(density1, i - 1, j - 1) * - idx2(post_vol, i - 1, j - 1) + - idx2(density1, i - 1, j + 0) * - idx2(post_vol, i - 1, j + 0)); - idx2(node_mass_pre, i, j) = idx2(node_mass_post, i, j) - idx2(node_flux, i + 0, j - 1) + idx2(node_flux, i, j); + idx2f(, node_mass_post, i, j) = 0.25 * (idx2f(, density1, i + 0, j - 1) * + idx2f(, post_vol, i + 0, j - 1) + + idx2f(, density1, i, j) * + idx2f(, post_vol, i, j) + + idx2f(, density1, i - 1, j - 1) * + idx2f(, post_vol, i - 1, j - 1) + + idx2f(, density1, i - 1, j + 0) * + idx2f(, post_vol, i - 1, j + 0)); + idx2f(, node_mass_pre, i, j) = idx2f(, node_mass_post, i, j) - idx2f(, node_flux, i + 0, j - 1) + idx2f(, node_flux, i, j); } } } - // DO k=y_min-1,y_max+1 - // DO j=x_min,x_max+1 + // DO k=y_min-1,y_max+1 + // DO j=x_min,x_max+1 - omp(parallel(2) enable_target(use_target) - mapToFrom2D(vel1) - mapToFrom2D(node_flux) - mapToFrom2D(node_mass_pre) - mapToFrom2D(mom_flux) - mapToFrom1D(celldy) - ) + mapToFrom2Dfe(vel1_buffer, vel1) + mapToFrom2Dfn(field, work_array1, node_flux) + mapToFrom2Dfn(field, work_array3, node_mass_pre) + mapToFrom2Dfn(field, work_array4, mom_flux) + mapToFrom1Df(field, celldy) + + omp(parallel(2) enable_target(use_target)) for (int j = (y_min - 1 + 1); j < (y_max + 1 + 2); j++) { for (int i = (x_min + 1); i < (x_max + 1 + 2); i++) ({ int upwind, donor, downwind, dif; double sigma, width, limiter, vdiffuw, vdiffdw, auw, adw, wind, advec_vel_s; - if (idx2(node_flux, i, j) < 0.0) { + if (idx2f(, node_flux, i, j) < 0.0) { upwind = j + 2; donor = j + 1; downwind = j; @@ -295,10 +278,10 @@ void advec_mom_kernel( downwind = j + 1; dif = upwind; } - sigma = std::fabs(idx2(node_flux, i, j)) / (idx2(node_mass_pre, i, donor)); - width = idx1(celldy, j); - vdiffuw = idx2(vel1, i, donor) - idx2(vel1, i, upwind); - vdiffdw = idx2(vel1, i, downwind) - idx2(vel1, i, donor); + sigma = std::fabs(idx2f(, node_flux, i, j)) / (idx2f(, node_mass_pre, i, donor)); + width = idx1f(, celldy, j); + vdiffuw = idx2f(, vel1, i, donor) - idx2f(, vel1, i, upwind); + vdiffdw = idx2f(, vel1, i, downwind) - idx2f(, vel1, i, donor); limiter = 0.0; if (vdiffuw * vdiffdw > 0.0) { auw = std::fabs(vdiffuw); @@ -306,10 +289,10 @@ void advec_mom_kernel( wind = 1.0; if (vdiffdw <= 0.0)wind = -1.0; limiter = wind * std::fmin(std::fmin( - width * ((2.0 - sigma) * adw / width + (1.0 + sigma) * auw / idx1(celldy, dif)) / 6.0, auw), adw); + width * ((2.0 - sigma) * adw / width + (1.0 + sigma) * auw / idx1f(, celldy, dif)) / 6.0, auw), adw); } - advec_vel_s = idx2(vel1, i, donor) + (1.0 - sigma) * limiter; - idx2(mom_flux, i, j) = advec_vel_s * idx2(node_flux, i, j); + advec_vel_s = idx2f(, vel1, i, donor) + (1.0 - sigma) * limiter; + idx2f(, mom_flux, i, j) = advec_vel_s * idx2f(, node_flux, i, j); }); } @@ -319,15 +302,13 @@ void advec_mom_kernel( - omp(parallel(2) enable_target(use_target) - mapToFrom2D(vel1) - mapToFrom2D(node_mass_post) - mapToFrom2D(node_mass_pre) - mapToFrom2D(mom_flux) - ) + mapToFrom2Dfn(field, work_array2, node_mass_post) + + omp(parallel(2) enable_target(use_target)) for (int j = (y_min + 1); j < (y_max + 1 + 2); j++) { for (int i = (x_min + 1); i < (x_max + 1 + 2); i++) { - idx2(vel1, i, j) = (idx2(vel1, i, j) * idx2(node_mass_pre, i, j) + idx2(mom_flux, i + 0, j - 1) - idx2(mom_flux, i, j)) / idx2(node_mass_post, i, j); + idx2f(, vel1, i, j) = + (idx2f(, vel1, i, j) * idx2f(, node_mass_pre, i, j) + idx2f(, mom_flux, i + 0, j - 1) - idx2f(, mom_flux, i, j)) / idx2f(, node_mass_post, i, j); } } } @@ -355,20 +336,7 @@ void advec_mom_driver(global_variables &globals, int tile, int which_vel, int di t.info.t_ymin, t.info.t_ymax, t.field.xvel1, - t.field.mass_flux_x, - t.field.vol_flux_x, - t.field.mass_flux_y, - t.field.vol_flux_y, - t.field.volume, - t.field.density1, - t.field.work_array1, - t.field.work_array2, - t.field.work_array3, - t.field.work_array4, - t.field.work_array5, - t.field.work_array6, - t.field.celldx, - t.field.celldy, + t.field, which_vel, sweep_number, direction); @@ -380,20 +348,7 @@ void advec_mom_driver(global_variables &globals, int tile, int which_vel, int di t.info.t_ymin, t.info.t_ymax, t.field.yvel1, - t.field.mass_flux_x, - t.field.vol_flux_x, - t.field.mass_flux_y, - t.field.vol_flux_y, - t.field.volume, - t.field.density1, - t.field.work_array1, - t.field.work_array2, - t.field.work_array3, - t.field.work_array4, - t.field.work_array5, - t.field.work_array6, - t.field.celldx, - t.field.celldy, + t.field, which_vel, sweep_number, direction); diff --git a/src/build_field.cpp b/src/build_field.cpp index 504d712..0bfe0f8 100644 --- a/src/build_field.cpp +++ b/src/build_field.cpp @@ -41,28 +41,28 @@ void build_field(global_variables &globals) { // #pragma omp target enter data \ - map(alloc: field.density0.data[:field.density0.N()]) map(to: field.density0.sizeX) \ - map(alloc: field.density1.data[:field.density1.N()]) map(to: field.density1.sizeX) \ - map(alloc: field.energy0.data[:field.energy0.N()]) map(to: field.energy0.sizeX) \ - map(alloc: field.energy1.data[:field.energy1.N()]) map(to: field.energy1.sizeX) \ - map(alloc: field.pressure.data[:field.pressure.N()]) map(to: field.pressure.sizeX) \ - map(alloc: field.viscosity.data[:field.viscosity.N()]) map(to: field.viscosity.sizeX) \ - map(alloc: field.soundspeed.data[:field.soundspeed.N()]) map(to: field.soundspeed.sizeX) \ - map(alloc: field.yvel0.data[:field.yvel0.N()]) map(to: field.yvel0.sizeX) \ - map(alloc: field.yvel1.data[:field.yvel1.N()]) map(to: field.yvel1.sizeX) \ - map(alloc: field.xvel0.data[:field.xvel0.N()]) map(to: field.xvel0.sizeX) \ - map(alloc: field.xvel1.data[:field.xvel1.N()]) map(to: field.xvel1.sizeX) \ - map(alloc: field.vol_flux_x.data[:field.vol_flux_x.N()]) map(to: field.vol_flux_x.sizeX) \ - map(alloc: field.vol_flux_y.data[:field.vol_flux_y.N()]) map(to: field.vol_flux_y.sizeX) \ - map(alloc: field.mass_flux_x.data[:field.mass_flux_x.N()]) map(to: field.mass_flux_x.sizeX) \ - map(alloc: field.mass_flux_y.data[:field.mass_flux_y.N()]) map(to: field.mass_flux_y.sizeX) \ - map(alloc: field.work_array1.data[:field.work_array1.N()]) map(to: field.work_array1.sizeX) \ - map(alloc: field.work_array2.data[:field.work_array2.N()]) map(to: field.work_array2.sizeX) \ - map(alloc: field.work_array3.data[:field.work_array3.N()]) map(to: field.work_array3.sizeX) \ - map(alloc: field.work_array4.data[:field.work_array4.N()]) map(to: field.work_array4.sizeX) \ - map(alloc: field.work_array5.data[:field.work_array5.N()]) map(to: field.work_array5.sizeX) \ - map(alloc: field.work_array6.data[:field.work_array6.N()]) map(to: field.work_array6.sizeX) \ - map(alloc: field.work_array7.data[:field.work_array7.N()]) map(to: field.work_array7.sizeX) \ + map(alloc: field.density0.data[:field.density0.N()]) \ + map(alloc: field.density1.data[:field.density1.N()]) \ + map(alloc: field.energy0.data[:field.energy0.N()]) \ + map(alloc: field.energy1.data[:field.energy1.N()]) \ + map(alloc: field.pressure.data[:field.pressure.N()]) \ + map(alloc: field.viscosity.data[:field.viscosity.N()]) \ + map(alloc: field.soundspeed.data[:field.soundspeed.N()]) \ + map(alloc: field.yvel0.data[:field.yvel0.N()]) \ + map(alloc: field.yvel1.data[:field.yvel1.N()]) \ + map(alloc: field.xvel0.data[:field.xvel0.N()]) \ + map(alloc: field.xvel1.data[:field.xvel1.N()]) \ + map(alloc: field.vol_flux_x.data[:field.vol_flux_x.N()]) \ + map(alloc: field.vol_flux_y.data[:field.vol_flux_y.N()]) \ + map(alloc: field.mass_flux_x.data[:field.mass_flux_x.N()]) \ + map(alloc: field.mass_flux_y.data[:field.mass_flux_y.N()]) \ + map(alloc: field.work_array1.data[:field.work_array1.N()]) \ + map(alloc: field.work_array2.data[:field.work_array2.N()]) \ + map(alloc: field.work_array3.data[:field.work_array3.N()]) \ + map(alloc: field.work_array4.data[:field.work_array4.N()]) \ + map(alloc: field.work_array5.data[:field.work_array5.N()]) \ + map(alloc: field.work_array6.data[:field.work_array6.N()]) \ + map(alloc: field.work_array7.data[:field.work_array7.N()]) \ map(alloc: field.cellx.data[:field.cellx.N()]) \ map(alloc: field.celldx.data[:field.celldx.N()]) \ map(alloc: field.celly.data[:field.celly.N()]) \ @@ -71,9 +71,9 @@ void build_field(global_variables &globals) { map(alloc: field.vertexdx.data[:field.vertexdx.N()]) \ map(alloc: field.vertexy.data[:field.vertexy.N()]) \ map(alloc: field.vertexdy.data[:field.vertexdy.N()]) \ - map(alloc: field.volume.data[:field.volume.N()]) map(to: field.volume.sizeX) \ - map(alloc: field.xarea.data[:field.xarea.N()]) map(to: field.xarea.sizeX) \ - map(alloc: field.yarea.data[:field.yarea.N()]) map(to: field.yarea.sizeX) \ + map(alloc: field.volume.data[:field.volume.N()]) \ + map(alloc: field.xarea.data[:field.xarea.N()]) \ + map(alloc: field.yarea.data[:field.yarea.N()]) \ const int xrange = (t.info.t_xmax + 2) - (t.info.t_xmin - 2) + 1; const int yrange = (t.info.t_ymax + 2) - (t.info.t_ymin - 2) + 1; @@ -147,125 +147,125 @@ void build_field(global_variables &globals) { - omp(parallel(2) enable_target(globals.use_target) - mapToFrom2D(field.work_array1) - mapToFrom2D(field.work_array2) - mapToFrom2D(field.work_array3) - mapToFrom2D(field.work_array4) - mapToFrom2D(field.work_array5) - mapToFrom2D(field.work_array6) - mapToFrom2D(field.work_array7) - mapToFrom2D(field.xvel0) - mapToFrom2D(field.xvel1) - mapToFrom2D(field.yvel0) - mapToFrom2D(field.yvel1) - ) + mapToFrom2Df(field, work_array1) + mapToFrom2Df(field, work_array2) + mapToFrom2Df(field, work_array3) + mapToFrom2Df(field, work_array4) + mapToFrom2Df(field, work_array5) + mapToFrom2Df(field, work_array6) + mapToFrom2Df(field, work_array7) + mapToFrom2Df(field, xvel0) + mapToFrom2Df(field, xvel1) + mapToFrom2Df(field, yvel0) + mapToFrom2Df(field, yvel1) + + omp(parallel(2) enable_target(globals.use_target)) for (int j = (0); j < (yrange + 1); j++) { for (int i = (0); i < (xrange + 1); i++) { - idx2(field.work_array1, i, j) = 0.0; - idx2(field.work_array2, i, j) = 0.0; - idx2(field.work_array3, i, j) = 0.0; - idx2(field.work_array4, i, j) = 0.0; - idx2(field.work_array5, i, j) = 0.0; - idx2(field.work_array6, i, j) = 0.0; - idx2(field.work_array7, i, j) = 0.0; - idx2(field.xvel0, i, j) = 0.0; - idx2(field.xvel1, i, j) = 0.0; - idx2(field.yvel0, i, j) = 0.0; - idx2(field.yvel1, i, j) = 0.0; + idx2f(field, work_array1, i, j) = 0.0; + idx2f(field, work_array2, i, j) = 0.0; + idx2f(field, work_array3, i, j) = 0.0; + idx2f(field, work_array4, i, j) = 0.0; + idx2f(field, work_array5, i, j) = 0.0; + idx2f(field, work_array6, i, j) = 0.0; + idx2f(field, work_array7, i, j) = 0.0; + idx2f(field, xvel0, i, j) = 0.0; + idx2f(field, xvel1, i, j) = 0.0; + idx2f(field, yvel0, i, j) = 0.0; + idx2f(field, yvel1, i, j) = 0.0; } } // Nested loop over (t_ymin-2:t_ymax+2) and (t_xmin-2:t_xmax+2) inclusive - omp(parallel(2) enable_target(globals.use_target) - mapToFrom2D(field.density0) - mapToFrom2D(field.density1) - mapToFrom2D(field.energy0) - mapToFrom2D(field.energy1) - mapToFrom2D(field.pressure) - mapToFrom2D(field.viscosity) - mapToFrom2D(field.soundspeed) - mapToFrom2D(field.volume) - ) + mapToFrom2Df(field, density0) + mapToFrom2Df(field, density1) + mapToFrom2Df(field, energy0) + mapToFrom2Df(field, energy1) + mapToFrom2Df(field, pressure) + mapToFrom2Df(field, viscosity) + mapToFrom2Df(field, soundspeed) + mapToFrom2Df(field, volume) + + omp(parallel(2) enable_target(globals.use_target)) for (int j = (0); j < (yrange); j++) { for (int i = (0); i < (xrange); i++) { - idx2(field.density0, i, j) = 0.0; - idx2(field.density1, i, j) = 0.0; - idx2(field.energy0, i, j) = 0.0; - idx2(field.energy1, i, j) = 0.0; - idx2(field.pressure, i, j) = 0.0; - idx2(field.viscosity, i, j) = 0.0; - idx2(field.soundspeed, i, j) = 0.0; - idx2(field.volume, i, j) = 0.0; + idx2f(field, density0, i, j) = 0.0; + idx2f(field, density1, i, j) = 0.0; + idx2f(field, energy0, i, j) = 0.0; + idx2f(field, energy1, i, j) = 0.0; + idx2f(field, pressure, i, j) = 0.0; + idx2f(field, viscosity, i, j) = 0.0; + idx2f(field, soundspeed, i, j) = 0.0; + idx2f(field, volume, i, j) = 0.0; } } // Nested loop over (t_ymin-2:t_ymax+2) and (t_xmin-2:t_xmax+3) inclusive - omp(parallel(2) enable_target(globals.use_target) - mapToFrom2D(field.vol_flux_x) - mapToFrom2D(field.mass_flux_x) - mapToFrom2D(field.xarea) - ) + mapToFrom2Df(field, vol_flux_x) + mapToFrom2Df(field, mass_flux_x) + mapToFrom2Df(field, xarea) + + omp(parallel(2) enable_target(globals.use_target)) for (int j = (0); j < (yrange); j++) { for (int i = (0); i < (xrange); i++) { - idx2(field.vol_flux_x, i, j) = 0.0; - idx2(field.mass_flux_x, i, j) = 0.0; - idx2(field.xarea, i, j) = 0.0; + idx2f(field, vol_flux_x, i, j) = 0.0; + idx2f(field, mass_flux_x, i, j) = 0.0; + idx2f(field, xarea, i, j) = 0.0; } } // Nested loop over (t_ymin-2:t_ymax+3) and (t_xmin-2:t_xmax+2) inclusive - omp(parallel(2) enable_target(globals.use_target) - mapToFrom2D(field.vol_flux_y) - mapToFrom2D(field.mass_flux_y) - mapToFrom2D(field.yarea) - ) + mapToFrom2Df(field, vol_flux_y) + mapToFrom2Df(field, mass_flux_y) + mapToFrom2Df(field, yarea) + + omp(parallel(2) enable_target(globals.use_target)) for (int j = (0); j < (yrange + 1); j++) { for (int i = (0); i < (xrange); i++) { - idx2(field.vol_flux_y, i, j) = 0.0; - idx2(field.mass_flux_y, i, j) = 0.0; - idx2(field.yarea, i, j) = 0.0; + idx2f(field, vol_flux_y, i, j) = 0.0; + idx2f(field, mass_flux_y, i, j) = 0.0; + idx2f(field, yarea, i, j) = 0.0; } } // (t_xmin-2:t_xmax+2) inclusive - omp(parallel(1) enable_target(globals.use_target) - mapToFrom1D(field.cellx) - mapToFrom1D(field.celldx) - ) + mapToFrom1Df(field, cellx) + mapToFrom1Df(field, celldx) + + omp(parallel(1) enable_target(globals.use_target)) for (int id = (0); id < (xrange); id++) { - idx1(field.cellx, id) = 0.0; - idx1(field.celldx, id) = 0.0; + idx1f(field, cellx, id) = 0.0; + idx1f(field, celldx, id) = 0.0; } // (t_ymin-2:t_ymax+2) inclusive - omp(parallel(1) enable_target(globals.use_target) - mapToFrom1D(field.celly) - mapToFrom1D(field.celldy) - ) + mapToFrom1Df(field, celly) + mapToFrom1Df(field, celldy) + + omp(parallel(1) enable_target(globals.use_target)) for (int id = (0); id < (yrange); id++) { - idx1(field.celly, id) = 0.0; - idx1(field.celldy, id) = 0.0; + idx1f(field, celly, id) = 0.0; + idx1f(field, celldy, id) = 0.0; } // (t_xmin-2:t_xmax+3) inclusive - omp(parallel(1) enable_target(globals.use_target) - mapToFrom1D(field.vertexx) - mapToFrom1D(field.vertexdx) - ) + mapToFrom1Df(field, vertexx) + mapToFrom1Df(field, vertexdx) + + omp(parallel(1) enable_target(globals.use_target)) for (int id = (0); id < (xrange + 1); id++) { - idx1(field.vertexx, id) = 0.0; - idx1(field.vertexdx, id) = 0.0; + idx1f(field, vertexx, id) = 0.0; + idx1f(field, vertexdx, id) = 0.0; } // (t_ymin-2:t_ymax+3) inclusive - omp(parallel(1) enable_target(globals.use_target) - mapToFrom1D(field.vertexy) - mapToFrom1D(field.vertexdy) - ) + mapToFrom1Df(field, vertexy) + mapToFrom1Df(field, vertexdy) + + omp(parallel(1) enable_target(globals.use_target)) for (int id = (0); id < (yrange + 1); id++) { - idx1(field.vertexy, id) = 0.0; - idx1(field.vertexdy, id) = 0.0; + idx1f(field, vertexy, id) = 0.0; + idx1f(field, vertexdy, id) = 0.0; } diff --git a/src/calc_dt.cpp b/src/calc_dt.cpp index 5dc84d9..01bf275 100644 --- a/src/calc_dt.cpp +++ b/src/calc_dt.cpp @@ -39,20 +39,7 @@ void calc_dt_kernel( double dtu_safe, double dtv_safe, double dtdiv_safe, - clover::Buffer2D &xarea, - clover::Buffer2D &yarea, - clover::Buffer1D &cellx, - clover::Buffer1D &celly, - clover::Buffer1D &celldx, - clover::Buffer1D &celldy, - clover::Buffer2D &volume, - clover::Buffer2D &density0, - clover::Buffer2D &energy0, - clover::Buffer2D &pressure, - clover::Buffer2D &viscosity_a, - clover::Buffer2D &soundspeed, - clover::Buffer2D &xvel0, - clover::Buffer2D &yvel0, + field_type &field, double &dt_min_val, int &dtl_control, double &xl_pos, @@ -71,38 +58,40 @@ void calc_dt_kernel( // Kokkos::MDRangePolicy > policy({x_min + 1, y_min + 1}, {x_max + 2, y_max + 2}); + mapToFrom2Df(field, xarea) + mapToFrom2Df(field, yarea) + mapToFrom1Df(field, celldx) + mapToFrom1Df(field, celldy) + mapToFrom2Df(field, volume) + mapToFrom2Df(field, density0) + mapToFrom2Df(field, viscosity) + mapToFrom2Df(field, soundspeed) + mapToFrom2Df(field, xvel0) + mapToFrom2Df(field, yvel0) + + omp(parallel(2) enable_target(use_target) - mapToFrom2D(xarea) - mapToFrom2D(yarea) - mapToFrom1D(celldx) - mapToFrom1D(celldy) - mapToFrom2D(volume) - mapToFrom2D(density0) - mapToFrom2D(viscosity_a) - mapToFrom2D(soundspeed) - mapToFrom2D(xvel0) - mapToFrom2D(yvel0) map(tofrom:dt_min_val) reduction(min:dt_min_val) ) for (int j = (y_min + 1); j < (y_max + 2); j++) { for (int i = (x_min + 1); i < (x_max + 2); i++) { - double dsx = idx1(celldx, i); - double dsy = idx1(celldy, j); - double cc = idx2(soundspeed, i, j) * idx2(soundspeed, i, j); - cc = cc + 2.0 * idx2(viscosity_a, i, j) / idx2(density0, i, j); + double dsx = idx1f(field, celldx, i); + double dsy = idx1f(field, celldy, j); + double cc = idx2f(field, soundspeed, i, j) * idx2f(field, soundspeed, i, j); + cc = cc + 2.0 * idx2f(field, viscosity, i, j) / idx2f(field, density0, i, j); cc = std::fmax(std::sqrt(cc), g_small); double dtct = dtc_safe * std::fmin(dsx, dsy) / cc; double div = 0.0; - double dv1 = (idx2(xvel0, i, j) + idx2(xvel0, i + 0, j + 1)) * idx2(xarea, i, j); - double dv2 = (idx2(xvel0, i + 1, j + 0) + idx2(xvel0, i + 1, j + 1)) * idx2(xarea, i + 1, j + 0); + double dv1 = (idx2f(field, xvel0, i, j) + idx2f(field, xvel0, i + 0, j + 1)) * idx2f(field, xarea, i, j); + double dv2 = (idx2f(field, xvel0, i + 1, j + 0) + idx2f(field, xvel0, i + 1, j + 1)) * idx2f(field, xarea, i + 1, j + 0); div = div + dv2 - dv1; - double dtut = dtu_safe * 2.0 * idx2(volume, i, j) / std::fmax(std::fmax(std::fabs(dv1), std::fabs(dv2)), g_small * idx2(volume, i, j)); - dv1 = (idx2(yvel0, i, j) + idx2(yvel0, i + 1, j + 0)) * idx2(yarea, i, j); - dv2 = (idx2(yvel0, i + 0, j + 1) + idx2(yvel0, i + 1, j + 1)) * idx2(yarea, i + 0, j + 1); + double dtut = dtu_safe * 2.0 * idx2f(field, volume, i, j) / std::fmax(std::fmax(std::fabs(dv1), std::fabs(dv2)), g_small * idx2f(field, volume, i, j)); + dv1 = (idx2f(field, yvel0, i, j) + idx2f(field, yvel0, i + 1, j + 0)) * idx2f(field, yarea, i, j); + dv2 = (idx2f(field, yvel0, i + 0, j + 1) + idx2f(field, yvel0, i + 1, j + 1)) * idx2f(field, yarea, i + 0, j + 1); div = div + dv2 - dv1; - double dtvt = dtv_safe * 2.0 * idx2(volume, i, j) / std::fmax(std::fmax(std::fabs(dv1), std::fabs(dv2)), g_small * idx2(volume, i, j)); - div = div / (2.0 * idx2(volume, i, j)); + double dtvt = dtv_safe * 2.0 * idx2f(field, volume, i, j) / std::fmax(std::fmax(std::fabs(dv1), std::fabs(dv2)), g_small * idx2f(field, volume, i, j)); + div = div / (2.0 * idx2f(field, volume, i, j)); double dtdivt; if (div < -g_small) { dtdivt = dtdiv_safe * (-1.0 / div); @@ -125,14 +114,14 @@ void calc_dt_kernel( if (small != 0) { - auto cellx_acc = cellx; - auto celly_acc = celly; - auto density0_acc = density0; - auto energy0_acc = energy0; - auto pressure_acc = pressure; - auto soundspeed_acc = soundspeed; - auto xvel0_acc = xvel0; - auto yvel0_acc = yvel0; + auto &cellx_acc = field.cellx; + auto &celly_acc = field.celly; + auto &density0_acc = field.density0; + auto &energy0_acc = field.energy0; + auto &pressure_acc = field.pressure; + auto &soundspeed_acc = field.soundspeed; + auto &xvel0_acc = field.xvel0; + auto &yvel0_acc = field.yvel0; std::cout << "Timestep information:" << std::endl @@ -183,20 +172,7 @@ void calc_dt(global_variables &globals, int tile, double &local_dt, std::string globals.config.dtu_safe, globals.config.dtv_safe, globals.config.dtdiv_safe, - t.field.xarea, - t.field.yarea, - t.field.cellx, - t.field.celly, - t.field.celldx, - t.field.celldy, - t.field.volume, - t.field.density0, - t.field.energy0, - t.field.pressure, - t.field.viscosity, - t.field.soundspeed, - t.field.xvel0, - t.field.yvel0, + t.field, local_dt, l_control, xl_pos, diff --git a/src/definitions.h b/src/definitions.h index 4b2be4b..661e3e8 100644 --- a/src/definitions.h +++ b/src/definitions.h @@ -142,14 +142,35 @@ namespace clover { #define _str(s) #s #define parallel(n) omp target teams distribute parallel for simd collapse(n) device(0) + #define enable_target(enable) if(target: (enable)) + + +// #define mapToFrom1D(xs) map(tofrom: xs.data[:0]) +// #define mapToFrom2D(xs) map(tofrom: xs.data[:0]) map(from: xs.sizeX) +// +//// #define mapTo(xs) map(to: xs.data[:xs.N()]) +// #define mapTo1D(xs) map(to: xs.data[:xs.N()]) + + + + + #define mapToFrom2Df(f, xs) double * xs = f.xs.data; const int xs##_sizex = f.xs.sizeX; + #define mapToFrom1Df(f, xs) double * xs = f.xs.data; - #define mapToFrom1D(xs) map(tofrom: xs.data[:0]) - #define mapToFrom2D(xs) map(tofrom: xs.data[:0]) map(from: xs.sizeX) - #define enable_target(enable) if(target: (enable)) -// #define mapTo(xs) map(to: xs.data[:xs.N()]) - #define mapTo1D(xs) map(to: xs.data[:xs.N()]) + #define mapToFrom2Dfn(f, xs, name) double * name = f.xs.data; const int name##_sizex = f.xs.sizeX; + + #define mapToFrom2Dfe( xs, name) double * name = xs.data; const int name##_sizex = xs.sizeX; + #define mapToFrom1Dfe(xs, name) double * name = xs.data; +// #define idx2fn(f, xs, i, j) xs[(i) + (j) * f.xs.sizeX] + + + + #define idx1f(f, xs, i) xs[i] + #define idx2f(f, xs, i, j) xs[(i) + (j) * xs##_sizex] + + #define mapTo1D(xs) #define omp(xs) _Pragma(_xstr(xs)) @@ -340,6 +361,7 @@ struct field_type { pressure(xrange, yrange), viscosity(xrange, yrange), soundspeed(xrange, yrange), + xvel0(xrange + 1, yrange + 1), xvel1(xrange + 1, yrange + 1), yvel0(xrange + 1, yrange + 1), @@ -348,6 +370,7 @@ struct field_type { mass_flux_x(xrange + 1, yrange), vol_flux_y(xrange, yrange + 1), mass_flux_y(xrange, yrange + 1), + work_array1(xrange + 1, yrange + 1), work_array2(xrange + 1, yrange + 1), work_array3(xrange + 1, yrange + 1), @@ -359,6 +382,7 @@ struct field_type { celldx(xrange), celly(yrange), celldy(yrange), + vertexx(xrange + 1), vertexdx(xrange + 1), vertexy(yrange + 1), diff --git a/src/field_summary.cpp b/src/field_summary.cpp index 220479a..98fbd0b 100644 --- a/src/field_summary.cpp +++ b/src/field_summary.cpp @@ -88,13 +88,15 @@ void field_summary(global_variables &globals, parallel_ ¶llel) { int xmin = t.info.t_xmin; field_type &field = t.field; - omp(parallel(1) enable_target( globals.use_target) - mapToFrom2D(field.volume) - mapToFrom2D(field.density0) - mapToFrom2D(field.energy0) - mapToFrom2D(field.pressure) - mapToFrom2D(field.xvel0) - mapToFrom2D(field.yvel0) + mapToFrom2Df(field, volume) + mapToFrom2Df(field, density0) + mapToFrom2Df(field, energy0) + mapToFrom2Df(field, pressure) + mapToFrom2Df(field, xvel0) + mapToFrom2Df(field, yvel0) + + + omp(parallel(1) enable_target(globals.use_target) map(from:vol) map(from:mass) map(from:ie) @@ -108,16 +110,16 @@ void field_summary(global_variables &globals, parallel_ ¶llel) { double vsqrd = 0.0; for (int kv = k; kv <= k + 1; ++kv) { for (int jv = j; jv <= j + 1; ++jv) { - vsqrd += 0.25 * (idx2(field.xvel0, jv, kv) * idx2(field.xvel0, jv, kv) + idx2(field.yvel0, jv, kv) * idx2(field.yvel0, jv, kv)); + vsqrd += 0.25 * (idx2f(field, xvel0, jv, kv) * idx2f(field, xvel0, jv, kv) + idx2f(field, yvel0, jv, kv) * idx2f(field, yvel0, jv, kv)); } } - double cell_vol = idx2(field.volume, j, k); - double cell_mass = cell_vol * idx2(field.density0, j, k); + double cell_vol = idx2f(field, volume, j, k); + double cell_mass = cell_vol * idx2f(field, density0, j, k); vol += cell_vol; mass += cell_mass; - ie += cell_mass * idx2(field.energy0, j, k); + ie += cell_mass * idx2f(field, energy0, j, k); ke += cell_mass * 0.5 * vsqrd; - press += cell_vol * idx2(field.pressure, j, k); + press += cell_vol * idx2f(field, pressure, j, k); } diff --git a/src/flux_calc.cpp b/src/flux_calc.cpp index 68c6afe..1565bb5 100644 --- a/src/flux_calc.cpp +++ b/src/flux_calc.cpp @@ -31,33 +31,28 @@ void flux_calc_kernel( bool use_target, int x_min, int x_max, int y_min, int y_max, double dt, - clover::Buffer2D &xarea, - clover::Buffer2D &yarea, - clover::Buffer2D &xvel0, - clover::Buffer2D &yvel0, - clover::Buffer2D &xvel1, - clover::Buffer2D &yvel1, - clover::Buffer2D &vol_flux_x, - clover::Buffer2D &vol_flux_y) { + field_type &field) { // DO k=y_min,y_max+1 // DO j=x_min,x_max+1 // Note that the loops calculate one extra flux than required, but this // allows loop fusion that improves performance - omp(parallel(2) enable_target(use_target) - mapToFrom2D(xarea) - mapToFrom2D(yarea) - mapToFrom2D(xvel0) - mapToFrom2D(yvel0) - mapToFrom2D(xvel1) - mapToFrom2D(yvel1) - mapToFrom2D(vol_flux_x) - mapToFrom2D(vol_flux_y) - ) + mapToFrom2Df(field, xarea) + mapToFrom2Df(field, yarea) + mapToFrom2Df(field, xvel0) + mapToFrom2Df(field, yvel0) + mapToFrom2Df(field, xvel1) + mapToFrom2Df(field, yvel1) + mapToFrom2Df(field, vol_flux_x) + mapToFrom2Df(field, vol_flux_y) + + omp(parallel(2) enable_target(use_target)) for (int j = (y_min + 1); j < (y_max + 1 + 2); j++) { for (int i = (x_min + 1); i < (x_max + 1 + 2); i++) { - idx2(vol_flux_x, i, j) = 0.25 * dt * idx2(xarea, i, j) * (idx2(xvel0, i, j) + idx2(xvel0, i + 0, j + 1) + idx2(xvel1, i, j) + idx2(xvel1, i + 0, j + 1)); - idx2(vol_flux_y, i, j) = 0.25 * dt * idx2(yarea, i, j) * (idx2(yvel0, i, j) + idx2(yvel0, i + 1, j + 0) + idx2(yvel1, i, j) + idx2(yvel1, i + 1, j + 0)); + idx2f(field, vol_flux_x, i, j) = 0.25 * dt * idx2f(field, xarea, i, j) * + (idx2f(field, xvel0, i, j) + idx2f(field, xvel0, i + 0, j + 1) + idx2f(field, xvel1, i, j) + idx2f(field, xvel1, i + 0, j + 1)); + idx2f(field, vol_flux_y, i, j) = 0.25 * dt * idx2f(field, yarea, i, j) * + (idx2f(field, yvel0, i, j) + idx2f(field, yvel0, i + 1, j + 0) + idx2f(field, yvel1, i, j) + idx2f(field, yvel1, i + 1, j + 0)); } } } @@ -85,14 +80,7 @@ void flux_calc(global_variables &globals) { t.info.t_ymin, t.info.t_ymax, globals.dt, - t.field.xarea, - t.field.yarea, - t.field.xvel0, - t.field.yvel0, - t.field.xvel1, - t.field.yvel1, - t.field.vol_flux_x, - t.field.vol_flux_y); + t.field); } #if FLUSH_BUFFER diff --git a/src/generate_chunk.cpp b/src/generate_chunk.cpp index 71039f3..da3bd5c 100644 --- a/src/generate_chunk.cpp +++ b/src/generate_chunk.cpp @@ -28,36 +28,34 @@ #include #include "generate_chunk.h" -#include "comms.h" - void generate_chunk(const int tile, global_variables &globals) { // Need to copy the host array of state input data into a device array - clover::Buffer1D state_density(globals.config.number_of_states); - clover::Buffer1D state_energy(globals.config.number_of_states); - clover::Buffer1D state_xvel(globals.config.number_of_states); - clover::Buffer1D state_yvel(globals.config.number_of_states); - clover::Buffer1D state_xmin(globals.config.number_of_states); - clover::Buffer1D state_xmax(globals.config.number_of_states); - clover::Buffer1D state_ymin(globals.config.number_of_states); - clover::Buffer1D state_ymax(globals.config.number_of_states); - clover::Buffer1D state_radius(globals.config.number_of_states); - clover::Buffer1D state_geometry(globals.config.number_of_states); + clover::Buffer1D state_density_buffer(globals.config.number_of_states); + clover::Buffer1D state_energy_buffer(globals.config.number_of_states); + clover::Buffer1D state_xvel_buffer(globals.config.number_of_states); + clover::Buffer1D state_yvel_buffer(globals.config.number_of_states); + clover::Buffer1D state_xmin_buffer(globals.config.number_of_states); + clover::Buffer1D state_xmax_buffer(globals.config.number_of_states); + clover::Buffer1D state_ymin_buffer(globals.config.number_of_states); + clover::Buffer1D state_ymax_buffer(globals.config.number_of_states); + clover::Buffer1D state_radius_buffer(globals.config.number_of_states); + clover::Buffer1D state_geometry_buffer(globals.config.number_of_states); // Copy the data to the new views for (int state = 0; state < globals.config.number_of_states; ++state) { - idx1(state_density, state) = globals.config.states[state].density; - idx1(state_energy, state) = globals.config.states[state].energy; - idx1(state_xvel, state) = globals.config.states[state].xvel; - idx1(state_yvel, state) = globals.config.states[state].yvel; - idx1(state_xmin, state) = globals.config.states[state].xmin; - idx1(state_xmax, state) = globals.config.states[state].xmax; - idx1(state_ymin, state) = globals.config.states[state].ymin; - idx1(state_ymax, state) = globals.config.states[state].ymax; - idx1(state_radius, state) = globals.config.states[state].radius; - idx1(state_geometry, state) = globals.config.states[state].geometry; + idx1(state_density_buffer, state) = globals.config.states[state].density; + idx1(state_energy_buffer, state) = globals.config.states[state].energy; + idx1(state_xvel_buffer, state) = globals.config.states[state].xvel; + idx1(state_yvel_buffer, state) = globals.config.states[state].yvel; + idx1(state_xmin_buffer, state) = globals.config.states[state].xmin; + idx1(state_xmax_buffer, state) = globals.config.states[state].xmax; + idx1(state_ymin_buffer, state) = globals.config.states[state].ymin; + idx1(state_ymax_buffer, state) = globals.config.states[state].ymax; + idx1(state_radius_buffer, state) = globals.config.states[state].radius; + idx1(state_geometry_buffer, state) = globals.config.states[state].geometry; } // Kokkos::deep_copy (TO, FROM) @@ -77,91 +75,97 @@ void generate_chunk(const int tile, global_variables &globals) { field_type &field = globals.chunk.tiles[tile].field; - const double state_energy_0 = idx1(state_energy, 0); - const double state_density_0 = idx1(state_density, 0); - const double state_xvel_0 = idx1(state_xvel, 0); - const double state_yvel_0 = idx1(state_yvel, 0); + const double state_energy_0 = idx1(state_energy_buffer, 0); + const double state_density_0 = idx1(state_density_buffer, 0); + const double state_xvel_0 = idx1(state_xvel_buffer, 0); + const double state_yvel_0 = idx1(state_yvel_buffer, 0); // State 1 is always the background state - omp(parallel(2) enable_target(globals.use_target) - mapToFrom2D(field.energy0) - mapToFrom2D(field.density0) - mapToFrom2D(field.xvel0) - mapToFrom2D(field.yvel0) - ) + mapToFrom2Df(field, energy0) + mapToFrom2Df(field, density0) + mapToFrom2Df(field, xvel0) + mapToFrom2Df(field, yvel0) + + omp(parallel(2) enable_target(globals.use_target)) for (int j = (0); j < (yrange); j++) { for (int i = (0); i < (xrange); i++) { - idx2(field.energy0, i, j) = state_energy_0; - idx2(field.density0, i, j) = state_density_0; - idx2(field.xvel0, i, j) = state_xvel_0; - idx2(field.yvel0, i, j) = state_yvel_0; + idx2f(field, energy0, i, j) = state_energy_0; + idx2f(field, density0, i, j) = state_density_0; + idx2f(field, xvel0, i, j) = state_xvel_0; + idx2f(field, yvel0, i, j) = state_yvel_0; } } for (int state = 1; state < globals.config.number_of_states; ++state) { - omp(parallel(2) enable_target(globals.use_target) - mapToFrom2D(field.density0) - mapToFrom2D(field.xvel0) - mapToFrom2D(field.yvel0) - mapToFrom2D(field.energy0) - - mapToFrom1D(field.cellx) - mapToFrom1D(field.celly) - - mapToFrom1D(field.vertexx) - mapToFrom1D(field.vertexy) - - mapTo1D(state_density) - mapTo1D(state_energy) - mapTo1D(state_xvel) - mapTo1D(state_yvel) - mapTo1D(state_xmin) - mapTo1D(state_xmax) - mapTo1D(state_ymin) - mapTo1D(state_ymax) - mapTo1D(state_radius) - mapTo1D(state_geometry) + mapToFrom1Df(field, cellx) + mapToFrom1Df(field, celly) + + mapToFrom1Df(field, vertexx) + mapToFrom1Df(field, vertexy) + + const double *state_density = state_density_buffer.data; + const double *state_energy = state_energy_buffer.data; + const double *state_xvel = state_xvel_buffer.data; + const double *state_yvel = state_yvel_buffer.data; + const double *state_xmin = state_xmin_buffer.data; + const double *state_xmax = state_xmax_buffer.data; + const double *state_ymin = state_ymin_buffer.data; + const double *state_ymax = state_ymax_buffer.data; + const double *state_radius = state_radius_buffer.data; + const int *state_geometry = state_geometry_buffer.data; + + omp(parallel(2) enable_target(globals.use_target) + map(to : state_density[:state_density_buffer.N()]) + map(to : state_energy[:state_energy_buffer.N()]) + map(to : state_xvel[:state_xvel_buffer.N()]) + map(to : state_yvel[:state_yvel_buffer.N()]) + map(to : state_xmin[:state_xmin_buffer.N()]) + map(to : state_xmax[:state_xmax_buffer.N()]) + map(to : state_ymin[:state_ymin_buffer.N()]) + map(to : state_ymax[:state_ymax_buffer.N()]) + map(to : state_radius[:state_radius_buffer.N()]) + map(to : state_geometry[:state_geometry_buffer.N()]) ) for (int j = (0); j < (yrange); j++) { for (int i = (0); i < (xrange); i++) { - double x_cent = idx1(state_xmin, state); - double y_cent = idx1(state_ymin, state); - if (idx1(state_geometry, state) == g_rect) { - if (idx1(field.vertexx, i + 1) >= idx1(state_xmin, state) && idx1(field.vertexx, i) < idx1(state_xmax, state)) { - if (idx1(field.vertexy, j + 1) >= idx1(state_ymin, state) && idx1(field.vertexy, j) < idx1(state_ymax, state)) { - idx2(field.energy0, i, j) = idx1(state_energy, state); - idx2(field.density0, i, j) = idx1(state_density, state); + double x_cent = state_xmin[state]; + double y_cent = state_ymin[state]; + if (state_geometry[state] == g_rect) { + if (idx1f(field, vertexx, i + 1) >= state_xmin[state] && idx1f(field, vertexx, i) < state_xmax[state]) { + if (idx1f(field, vertexy, j + 1) >= state_ymin[state] && idx1f(field, vertexy, j) < state_ymax[state]) { + idx2f(field, energy0, i, j) = state_energy[state]; + idx2f(field, density0, i, j) = state_density[state]; for (int kt = j; kt <= j + 1; ++kt) { for (int jt = i; jt <= i + 1; ++jt) { - idx2(field.xvel0, jt, kt) = idx1(state_xvel, state); - idx2(field.yvel0, jt, kt) = idx1(state_yvel, state); + idx2f(field, xvel0, jt, kt) = state_xvel[state]; + idx2f(field, yvel0, jt, kt) = state_yvel[state]; } } } } - } else if (idx1(state_geometry, state) == g_circ) { - double radius = std::sqrt((idx1(field.cellx, i) - x_cent) * - (idx1(field.cellx, i) - x_cent) + (idx1(field.celly, j) - y_cent) * (idx1(field.celly, j) - y_cent)); - if (radius <= idx1(state_radius, state)) { - idx2(field.energy0, i, j) = idx1(state_energy, state); - idx2(field.density0, i, j) = idx1(state_density, state); + } else if (state_geometry[state] == g_circ) { + double radius = std::sqrt((idx1f(field, cellx, i) - x_cent) * + (idx1f(field, cellx, i) - x_cent) + (idx1f(field, celly, j) - y_cent) * (idx1f(field, celly, j) - y_cent)); + if (radius <= state_radius[state]) { + idx2f(field, energy0, i, j) = state_energy[state]; + idx2f(field, density0, i, j) = state_density[state]; for (int kt = j; kt <= j + 1; ++kt) { for (int jt = i; jt <= i + 1; ++jt) { - idx2(field.xvel0, jt, kt) = idx1(state_xvel, state); - idx2(field.yvel0, jt, kt) = idx1(state_yvel, state); + idx2f(field, xvel0, jt, kt) = state_xvel[state]; + idx2f(field, yvel0, jt, kt) = state_yvel[state]; } } } - } else if (idx1(state_geometry, state) == g_point) { - if (idx1(field.vertexx, i) == x_cent && idx1(field.vertexy, j) == y_cent) { - idx2(field.energy0, i, j) = idx1(state_energy, state); - idx2(field.density0, i, j) = idx1(state_density, state); + } else if (state_geometry[state] == g_point) { + if (idx1f(field, vertexx, i) == x_cent && idx1f(field, vertexy, j) == y_cent) { + idx2f(field, energy0, i, j) = state_energy[state]; + idx2f(field, density0, i, j) = state_density[state]; for (int kt = j; kt <= j + 1; ++kt) { for (int jt = i; jt <= i + 1; ++jt) { - idx2(field.xvel0, jt, kt) = idx1(state_xvel, state); - idx2(field.yvel0, jt, kt) = idx1(state_yvel, state); + idx2f(field, xvel0, jt, kt) = state_xvel[state]; + idx2f(field, yvel0, jt, kt) = state_yvel[state]; } } } diff --git a/src/ideal_gas.cpp b/src/ideal_gas.cpp index d5672f1..50a15b1 100644 --- a/src/ideal_gas.cpp +++ b/src/ideal_gas.cpp @@ -37,10 +37,10 @@ int N = 0; void ideal_gas_kernel( bool use_target, int x_min, int x_max, int y_min, int y_max, - clover::Buffer2D &density, - clover::Buffer2D &energy, - clover::Buffer2D &pressure, - clover::Buffer2D &soundspeed) { + clover::Buffer2D &density_buffer, + clover::Buffer2D &energy_buffer, + clover::Buffer2D &pressure_buffer, + clover::Buffer2D &soundspeed_buffer) { //std::cout <<" ideal_gas(" << x_min+1 << ","<< y_min+1<< ","<< x_max+2<< ","<< y_max +2 << ")" << std::endl; // DO k=y_min,y_max @@ -48,20 +48,20 @@ void ideal_gas_kernel( // Kokkos::MDRangePolicy > policy({x_min + 1, y_min + 1}, {x_max + 2, y_max + 2}); - omp(parallel(2) enable_target(use_target) - mapToFrom2D(density) - mapToFrom2D(energy) - mapToFrom2D(pressure) - mapToFrom2D(soundspeed) - ) + mapToFrom2Dfe(density_buffer, density) + mapToFrom2Dfe(energy_buffer, energy) + mapToFrom2Dfe(pressure_buffer, pressure) + mapToFrom2Dfe(soundspeed_buffer, soundspeed) + + omp(parallel(2) enable_target(use_target)) for (int j = (y_min + 1); j < (y_max + 2); j++) { for (int i = (x_min + 1); i < (x_max + 2); i++) { - double v = 1.0 / idx2(density, i, j); - idx2(pressure, i, j) = (1.4 - 1.0) * idx2(density, i, j) * idx2(energy, i, j); - double pressurebyenergy = (1.4 - 1.0) * idx2(density, i, j); - double pressurebyvolume = -idx2(density, i, j) * idx2(pressure, i, j); - double sound_speed_squared = v * v * (idx2(pressure, i, j) * pressurebyenergy - pressurebyvolume); - idx2(soundspeed, i, j) = std::sqrt(sound_speed_squared); + double v = 1.0 / idx2f(,density, i, j); + idx2f(,pressure, i, j) = (1.4 - 1.0) * idx2f(,density, i, j) * idx2f(,energy, i, j); + double pressurebyenergy = (1.4 - 1.0) * idx2f(,density, i, j); + double pressurebyvolume = -idx2f(,density, i, j) * idx2f(,pressure, i, j); + double sound_speed_squared = v * v * (idx2f(,pressure, i, j) * pressurebyenergy - pressurebyvolume); + idx2f(,soundspeed, i, j) = std::sqrt(sound_speed_squared); } }; diff --git a/src/initialise_chunk.cpp b/src/initialise_chunk.cpp index d92d13c..71b13be 100644 --- a/src/initialise_chunk.cpp +++ b/src/initialise_chunk.cpp @@ -58,63 +58,57 @@ void initialise_chunk(const int tile, global_variables &globals) { - omp(parallel(1) enable_target(globals.use_target) - mapToFrom1D(field.vertexx) - mapToFrom1D(field.vertexdx) - ) + mapToFrom1Df(field, vertexx) + mapToFrom1Df(field, vertexdx) + + omp(parallel(1) enable_target(globals.use_target)) for (int j = (0); j < (xrange); j++) { - idx1(field.vertexx, j) = xmin + dx * (j - 1 - x_min); - idx1(field.vertexdx, j) = dx; + idx1f(field, vertexx, j) = xmin + dx * (j - 1 - x_min); + idx1f(field, vertexdx, j) = dx; } - omp(parallel(1) enable_target(globals.use_target) - mapToFrom1D(field.vertexy) - mapToFrom1D(field.vertexdy) - ) + mapToFrom1Df(field, vertexy) + mapToFrom1Df(field, vertexdy) + + omp(parallel(1) enable_target(globals.use_target)) for (int k = (0); k < (yrange); k++) { - idx1(field.vertexy, k) = ymin + dy * (k - 1 - y_min); - idx1(field.vertexdy, k) = dy; + idx1f(field, vertexy, k) = ymin + dy * (k - 1 - y_min); + idx1f(field, vertexdy, k) = dy; } const int xrange1 = (x_max + 2) - (x_min - 2) + 1; const int yrange1 = (y_max + 2) - (y_min - 2) + 1; - omp(parallel(1) enable_target(globals.use_target) - mapToFrom1D(field.cellx) - mapToFrom1D(field.celldx) - mapToFrom1D(field.vertexx) - ) + mapToFrom1Df(field, cellx) + mapToFrom1Df(field, celldx) + omp(parallel(1) enable_target(globals.use_target)) for (int j = (0); j < (xrange1); j++) { - idx1(field.cellx, j) = 0.5 * (idx1(field.vertexx, j) + idx1(field.vertexx, j + 1)); - idx1(field.celldx, j) = dx; + idx1f(field, cellx, j) = 0.5 * (idx1f(field, vertexx, j) + idx1f(field, vertexx, j + 1)); + idx1f(field, celldx, j) = dx; } - omp(parallel(1) enable_target(globals.use_target) - mapToFrom1D(field.celly) - mapToFrom1D(field.celldy) - mapToFrom1D(field.vertexy) - ) + mapToFrom1Df(field, celly) + mapToFrom1Df(field, celldy) + omp(parallel(1) enable_target(globals.use_target)) for (int k = (0); k < (yrange1); k++) { - idx1(field.celly, k) = 0.5 * (idx1(field.vertexy, k) + idx1(field.vertexy, k + 1)); - idx1(field.celldy, k) = dy; + idx1f(field, celly, k) = 0.5 * (idx1f(field, vertexy, k) + idx1f(field, vertexy, k + 1)); + idx1f(field, celldy, k) = dy; } - omp(parallel(2) enable_target(globals.use_target) - mapToFrom2D(field.volume) - mapToFrom2D(field.xarea) - mapToFrom2D(field.yarea) - mapToFrom1D(field.celldx) - mapToFrom1D(field.celldy) - ) + mapToFrom2Df(field, volume) + mapToFrom2Df(field, xarea) + mapToFrom2Df(field, yarea) + + omp(parallel(2) enable_target(globals.use_target)) for (int j = (0); j < (yrange1); j++) { for (int i = (0); i < (xrange1); i++) { - idx2(field.volume, i, j) = dx * dy; - idx2(field.xarea, i, j) = idx1(field.celldy, j); - idx2(field.yarea, i, j) = idx1(field.celldx, i); + idx2f(field, volume, i, j) = dx * dy; + idx2f(field, xarea, i, j) = idx1f(field, celldy, j); + idx2f(field, yarea, i, j) = idx1f(field, celldx, i); } } diff --git a/src/pack_kernel.cpp b/src/pack_kernel.cpp index a626dfd..4a64e6f 100644 --- a/src/pack_kernel.cpp +++ b/src/pack_kernel.cpp @@ -27,8 +27,8 @@ void clover_pack_message_left(bool use_target, int x_min, int x_max, int y_min, int y_max, - clover::Buffer2D &field, - clover::Buffer1D &left_snd, + clover::Buffer2D &field_buffer, + clover::Buffer1D &left_snd_buffer, int cell_data, int vertex_data, int x_face_data, int y_face_data, int depth, int field_type, int buffer_offset) { @@ -54,14 +54,16 @@ void clover_pack_message_left(bool use_target, int x_min, int x_max, int y_min, y_inc = 1; } - // DO k=y_min-depth,y_max+y_inc+depth + // DO k=y_min-depth,y_max+y_inc+depth - omp(parallel(1) enable_target(use_target) mapToFrom1D(left_snd) mapToFrom2D(field)) + mapToFrom1Dfe(left_snd_buffer, left_snd) + mapToFrom2Dfe(field_buffer, field) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + y_inc + depth + 2); k++) { for (int j = 0; j < depth; ++j) { int index = buffer_offset + j + (k + depth - 1) * depth; - idx1(left_snd, index) = idx2(field, x_min + x_inc - 1 + j, k); + idx1f(, left_snd, index) = idx2f(, field, x_min + x_inc - 1 + j, k); } } @@ -70,8 +72,8 @@ void clover_pack_message_left(bool use_target, int x_min, int x_max, int y_min, void clover_unpack_message_left(bool use_target, int x_min, int x_max, int y_min, int y_max, - clover::Buffer2D &field, - clover::Buffer1D &left_rcv, + clover::Buffer2D &field_buffer, + clover::Buffer1D &left_rcv_buffer, int cell_data, int vertex_data, int x_face_data, int y_face_data, int depth, int field_type, int buffer_offset) { @@ -93,16 +95,18 @@ void clover_unpack_message_left(bool use_target, int x_min, int x_max, int y_min y_inc = 1; } - // DO k=y_min-depth,y_max+y_inc+depth + // DO k=y_min-depth,y_max+y_inc+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field) mapToFrom1D(left_rcv)) + mapToFrom2Dfe(field_buffer, field) + mapToFrom1Dfe(left_rcv_buffer, left_rcv) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + y_inc + depth + 2); k++) { for (int j = 0; j < depth; ++j) { int index = buffer_offset + j + (k + depth - 1) * depth; - idx2(field, x_min - j, k) = idx1(left_rcv, index); + idx2f(, field, x_min - j, k) = idx1f(, left_rcv, index); } } @@ -111,8 +115,8 @@ void clover_unpack_message_left(bool use_target, int x_min, int x_max, int y_min void clover_pack_message_right(bool use_target, int x_min, int x_max, int y_min, int y_max, - clover::Buffer2D &field, - clover::Buffer1D &right_snd, + clover::Buffer2D &field_buffer, + clover::Buffer1D &right_snd_buffer, int cell_data, int vertex_data, int x_face_data, int y_face_data, int depth, int field_type, int buffer_offset) { @@ -134,12 +138,14 @@ void clover_pack_message_right(bool use_target, int x_min, int x_max, int y_min, y_inc = 1; } - // DO k=y_min-depth,y_max+y_inc+depth - omp(parallel(1) enable_target(use_target) mapToFrom1D(right_snd) mapToFrom2D(field)) + // DO k=y_min-depth,y_max+y_inc+depth + mapToFrom1Dfe(right_snd_buffer, right_snd) + mapToFrom2Dfe(field_buffer, field) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + y_inc + depth + 2); k++) { for (int j = 0; j < depth; ++j) { int index = buffer_offset + j + (k + depth - 1) * depth; - idx1(right_snd, index) = idx2(field, x_min + 1 + j, k); + idx1f(, right_snd, index) = idx2f(, field, x_min + 1 + j, k); } } @@ -148,8 +154,8 @@ void clover_pack_message_right(bool use_target, int x_min, int x_max, int y_min, void clover_unpack_message_right(bool use_target, int x_min, int x_max, int y_min, int y_max, - clover::Buffer2D &field, - clover::Buffer1D &right_rcv, + clover::Buffer2D &field_buffer, + clover::Buffer1D &right_rcv_buffer, int cell_data, int vertex_data, int x_face_data, int y_face_data, int depth, int field_type, int buffer_offset) { @@ -175,12 +181,14 @@ void clover_unpack_message_right(bool use_target, int x_min, int x_max, int y_mi y_inc = 1; } - // DO k=y_min-depth,y_max+y_inc+depth - omp(parallel(1) enable_target(use_target) mapToFrom1D(right_rcv) mapToFrom2D(field)) + // DO k=y_min-depth,y_max+y_inc+depth + mapToFrom1Dfe(right_rcv_buffer, right_rcv) + mapToFrom2Dfe(field_buffer, field) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + y_inc + depth + 2); k++) { for (int j = 0; j < depth; ++j) { int index = buffer_offset + j + (k + depth - 1) * depth; - idx1(right_rcv, index) = idx2(field, x_max + x_inc + j, k); + idx1f(, right_rcv, index) = idx2f(, field, x_max + x_inc + j, k); } } @@ -188,8 +196,8 @@ void clover_unpack_message_right(bool use_target, int x_min, int x_max, int y_mi } void clover_pack_message_top(bool use_target, int x_min, int x_max, int y_min, int y_max, - clover::Buffer2D &field, - clover::Buffer1D &top_snd, + clover::Buffer2D &field_buffer, + clover::Buffer1D &top_snd_buffer, int cell_data, int vertex_data, int x_face_data, int y_face_data, int depth, int field_type, int buffer_offset) { @@ -214,17 +222,19 @@ void clover_pack_message_top(bool use_target, int x_min, int x_max, int y_min, i for (int k = 0; k < depth; ++k) { // DO j=x_min-depth,x_max+x_inc+depth - omp(parallel(1) enable_target(use_target) mapToFrom1D(top_snd) mapToFrom2D(field)) + mapToFrom1Dfe(top_snd_buffer, top_snd) + mapToFrom2Dfe(field_buffer, field) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + x_inc + depth + 2); j++) { int index = buffer_offset + k + (j + depth - 1) * depth; - idx1(top_snd, index) = idx2(field, j, y_max + 1 - k); + idx1f(, top_snd, index) = idx2f(, field, j, y_max + 1 - k); } } } void clover_unpack_message_top(bool use_target, int x_min, int x_max, int y_min, int y_max, - clover::Buffer2D &field, - clover::Buffer1D &top_rcv, + clover::Buffer2D &field_buffer, + clover::Buffer1D &top_rcv_buffer, int cell_data, int vertex_data, int x_face_data, int y_face_data, int depth, int field_type, int buffer_offset) { @@ -254,18 +264,20 @@ void clover_unpack_message_top(bool use_target, int x_min, int x_max, int y_min, // DO j=x_min-depth,x_max+x_inc+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field) mapToFrom1D(top_rcv)) + mapToFrom2Dfe(field_buffer, field) + mapToFrom1Dfe(top_rcv_buffer, top_rcv) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + x_inc + depth + 2); j++) { int index = buffer_offset + k + (j + depth - 1) * depth; - idx2(field, j, y_max + y_inc + k) = idx1(top_rcv, index); + idx2f(, field, j, y_max + y_inc + k) = idx1f(, top_rcv, index); } } } void clover_pack_message_bottom(bool use_target, int x_min, int x_max, int y_min, int y_max, - clover::Buffer2D &field, - clover::Buffer1D &bottom_snd, + clover::Buffer2D &field_buffer, + clover::Buffer1D &bottom_snd_buffer, int cell_data, int vertex_data, int x_face_data, int y_face_data, int depth, int field_type, int buffer_offset) { @@ -294,17 +306,19 @@ void clover_pack_message_bottom(bool use_target, int x_min, int x_max, int y_min for (int k = 0; k < depth; ++k) { // DO j=x_min-depth,x_max+x_inc+depth - omp(parallel(1) enable_target(use_target) mapToFrom1D(bottom_snd) mapToFrom2D(field)) + mapToFrom1Dfe(bottom_snd_buffer, bottom_snd) + mapToFrom2Dfe(field_buffer, field) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + x_inc + depth + 2); j++) { int index = buffer_offset + k + (j + depth - 1) * depth; - idx1(bottom_snd, index) = idx2(field, j, y_min + y_inc - 1 + k); + idx1f(, bottom_snd, index) = idx2f(, field, j, y_min + y_inc - 1 + k); } } } void clover_unpack_message_bottom(bool use_target, int x_min, int x_max, int y_min, int y_max, - clover::Buffer2D &field, - clover::Buffer1D &bottom_rcv, + clover::Buffer2D &field_buffer, + clover::Buffer1D &bottom_rcv_buffer, int cell_data, int vertex_data, int x_face_data, int y_face_data, int depth, int field_type, int buffer_offset) { @@ -329,10 +343,12 @@ void clover_unpack_message_bottom(bool use_target, int x_min, int x_max, int y_m for (int k = 0; k < depth; ++k) { // DO j=x_min-depth,x_max+x_inc+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field) mapToFrom1D(bottom_rcv)) + mapToFrom2Dfe(field_buffer, field) + mapToFrom1Dfe(bottom_rcv_buffer, bottom_rcv) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + x_inc + depth + 2); j++) { int index = buffer_offset + k + (j + depth - 1) * depth; - idx2(field, j, y_min - k) = idx1(bottom_rcv, index); + idx2f(, field, j, y_min - k) = idx1f(, bottom_rcv, index); } } } diff --git a/src/reset_field.cpp b/src/reset_field.cpp index 009ae6d..e3ccb01 100644 --- a/src/reset_field.cpp +++ b/src/reset_field.cpp @@ -29,29 +29,22 @@ void reset_field_kernel( bool use_target, int x_min, int x_max, int y_min, int y_max, - clover::Buffer2D &density0, - clover::Buffer2D &density1, - clover::Buffer2D &energy0, - clover::Buffer2D &energy1, - clover::Buffer2D &xvel0, - clover::Buffer2D &xvel1, - clover::Buffer2D &yvel0, - clover::Buffer2D &yvel1) { + field_type &field) { // DO k=y_min,y_max // DO j=x_min,x_max - omp(parallel(2) enable_target(use_target) - mapToFrom2D(density0) - mapToFrom2D(density1) - mapToFrom2D(energy0) - mapToFrom2D(energy1) - ) + mapToFrom2Df(field, density0) + mapToFrom2Df(field, density1) + mapToFrom2Df(field, energy0) + mapToFrom2Df(field, energy1) + + omp(parallel(2) enable_target(use_target)) for (int j = (y_min + 1); j < (y_max + 2); j++) { for (int i = (x_min + 1); i < (x_max + 2); i++) { - idx2(density0, i, j) = idx2(density1, i, j); - idx2(energy0, i, j) = idx2(energy1, i, j); + idx2f(field, density0, i, j) = idx2f(field, density1, i, j); + idx2f(field, energy0, i, j) = idx2f(field, energy1, i, j); } } @@ -60,16 +53,16 @@ void reset_field_kernel( // DO k=y_min,y_max+1 // DO j=x_min,x_max+1 - omp(parallel(2) enable_target(use_target) - mapToFrom2D(xvel0) - mapToFrom2D(xvel1) - mapToFrom2D(yvel0) - mapToFrom2D(yvel1) - ) + mapToFrom2Df(field, xvel0) + mapToFrom2Df(field, xvel1) + mapToFrom2Df(field, yvel0) + mapToFrom2Df(field, yvel1) + + omp(parallel(2) enable_target(use_target)) for (int j = (y_min + 1); j < (y_max + 1 + 2); j++) { for (int i = (x_min + 1); i < (x_max + 1 + 2); i++) { - idx2(xvel0, i, j) = idx2(xvel1, i, j); - idx2(yvel0, i, j) = idx2(yvel1, i, j); + idx2f(field, xvel0, i, j) = idx2f(field, xvel1, i, j); + idx2f(field, yvel0, i, j) = idx2f(field, yvel1, i, j); } } @@ -97,15 +90,7 @@ void reset_field(global_variables &globals) { t.info.t_xmax, t.info.t_ymin, t.info.t_ymax, - - t.field.density0, - t.field.density1, - t.field.energy0, - t.field.energy1, - t.field.xvel0, - t.field.xvel1, - t.field.yvel0, - t.field.yvel1); + t.field); } #if FLUSH_BUFFER diff --git a/src/revert.cpp b/src/revert.cpp index 150ec98..7013a15 100644 --- a/src/revert.cpp +++ b/src/revert.cpp @@ -30,23 +30,20 @@ void revert_kernel( bool use_target, int x_min, int x_max, int y_min, int y_max, - clover::Buffer2D &density0, - clover::Buffer2D &density1, - clover::Buffer2D &energy0, - clover::Buffer2D &energy1) { + field_type &field) { // DO k=y_min,y_max // DO j=x_min,x_max - omp(parallel(2) enable_target(use_target) - mapToFrom2D(density0) - mapToFrom2D(density1) - mapToFrom2D(energy0) - mapToFrom2D(energy1) - ) + mapToFrom2Df(field, density0) + mapToFrom2Df(field, density1) + mapToFrom2Df(field, energy0) + mapToFrom2Df(field, energy1) + + omp(parallel(2) enable_target(use_target)) for (int j = (y_min + 1); j < (y_max + 2); j++) { for (int i = (x_min + 1); i < (x_max + 2); i++) { - idx2(density1, i, j) = idx2(density0, i, j); - idx2(energy1, i, j) = idx2(energy0, i, j); + idx2f(field, density1, i, j) = idx2f(field, density0, i, j); + idx2f(field, energy1, i, j) = idx2f(field, energy0, i, j); } } @@ -70,10 +67,7 @@ void revert(global_variables &globals) { t.info.t_xmax, t.info.t_ymin, t.info.t_ymax, - t.field.density0, - t.field.density1, - t.field.energy0, - t.field.energy1); + t.field); } #if FLUSH_BUFFER diff --git a/src/start.cpp b/src/start.cpp index db51b4c..8f2eb70 100644 --- a/src/start.cpp +++ b/src/start.cpp @@ -108,7 +108,10 @@ global_variables start(parallel_ ¶llel, for (int tile = 0; tile < config.tiles_per_chunk; ++tile) { initialise_chunk(tile, globals); + if (DEBUG) std::cout << "Field initialised2" << std::endl; + generate_chunk(tile, globals); + if (DEBUG) std::cout << "Field initialised3" << std::endl; } diff --git a/src/update_halo.cpp b/src/update_halo.cpp index b70bf39..16d5933 100644 --- a/src/update_halo.cpp +++ b/src/update_halo.cpp @@ -52,10 +52,11 @@ void update_halo_kernel( // DO j=x_min-depth,x_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.density0)) + mapToFrom2Df(field, density0) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + depth + 2); j++) { for (int k = 0; k < depth; ++k) { - idx2(field.density0, j, 1 - k) = idx2(field.density0, j, 2 + k); + idx2f(field, density0, j, 1 - k) = idx2f(field, density0, j, 2 + k); } } @@ -65,10 +66,11 @@ void update_halo_kernel( // DO j=x_min-depth,x_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.density0)) + mapToFrom2Df(field, density0) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + depth + 2); j++) { for (int k = 0; k < depth; ++k) { - idx2(field.density0, j, y_max + 2 + k) = idx2(field.density0, j, y_max + 1 - k); + idx2f(field, density0, j, y_max + 2 + k) = idx2f(field, density0, j, y_max + 1 - k); } } @@ -78,10 +80,11 @@ void update_halo_kernel( // DO k=y_min-depth,y_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.density0)) + mapToFrom2Df(field, density0) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(field.density0, 1 - j, k) = idx2(field.density0, 2 + j, k); + idx2f(field, density0, 1 - j, k) = idx2f(field, density0, 2 + j, k); } } @@ -91,10 +94,11 @@ void update_halo_kernel( // DO k=y_min-depth,y_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.density0)) + mapToFrom2Df(field, density0) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(field.density0, x_max + 2 + j, k) = idx2(field.density0, x_max + 1 - j, k); + idx2f(field, density0, x_max + 2 + j, k) = idx2f(field, density0, x_max + 1 - j, k); } } @@ -108,10 +112,11 @@ void update_halo_kernel( // DO k=y_min-depth,y_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.density1)) + mapToFrom2Df(field, density1) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + depth + 2); j++) { for (int k = 0; k < depth; ++k) { - idx2(field.density1, j, 1 - k) = idx2(field.density1, j, 2 + k); + idx2f(field, density1, j, 1 - k) = idx2f(field, density1, j, 2 + k); } } @@ -121,10 +126,11 @@ void update_halo_kernel( // DO j=x_min-depth,x_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.density1)) + mapToFrom2Df(field, density1) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + depth + 2); j++) { for (int k = 0; k < depth; ++k) { - idx2(field.density1, j, y_max + 2 + k) = idx2(field.density1, j, y_max + 1 - k); + idx2f(field, density1, j, y_max + 2 + k) = idx2f(field, density1, j, y_max + 1 - k); } } @@ -134,10 +140,11 @@ void update_halo_kernel( // DO k=y_min-depth,y_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.density1)) + mapToFrom2Df(field, density1) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(field.density1, 1 - j, k) = idx2(field.density1, 2 + j, k); + idx2f(field, density1, 1 - j, k) = idx2f(field, density1, 2 + j, k); } } @@ -147,10 +154,11 @@ void update_halo_kernel( // DO k=y_min-depth,y_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.density1)) + mapToFrom2Df(field, density1) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(field.density1, x_max + 2 + j, k) = idx2(field.density1, x_max + 1 - j, k); + idx2f(field, density1, x_max + 2 + j, k) = idx2f(field, density1, x_max + 1 - j, k); } } @@ -162,10 +170,11 @@ void update_halo_kernel( (tile_neighbours[tile_bottom] == external_tile)) { // DO j=x_min-depth,x_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.energy0)) + mapToFrom2Df(field, energy0) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + depth + 2); j++) { for (int k = 0; k < depth; ++k) { - idx2(field.energy0, j, 1 - k) = idx2(field.energy0, j, 2 + k); + idx2f(field, energy0, j, 1 - k) = idx2f(field, energy0, j, 2 + k); } } @@ -174,10 +183,11 @@ void update_halo_kernel( (tile_neighbours[tile_top] == external_tile)) { // DO j=x_min-depth,x_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.energy0)) + mapToFrom2Df(field, energy0) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + depth + 2); j++) { for (int k = 0; k < depth; ++k) { - idx2(field.energy0, j, y_max + 2 + k) = idx2(field.energy0, j, y_max + 1 - k); + idx2f(field, energy0, j, y_max + 2 + k) = idx2f(field, energy0, j, y_max + 1 - k); } } @@ -186,10 +196,11 @@ void update_halo_kernel( (tile_neighbours[tile_left] == external_tile)) { // DO k=y_min-depth,y_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.energy0)) + mapToFrom2Df(field, energy0) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(field.energy0, 1 - j, k) = idx2(field.energy0, 2 + j, k); + idx2f(field, energy0, 1 - j, k) = idx2f(field, energy0, 2 + j, k); } } @@ -198,10 +209,11 @@ void update_halo_kernel( (tile_neighbours[tile_right] == external_tile)) { // DO k=y_min-depth,y_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.energy0)) + mapToFrom2Df(field, energy0) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(field.energy0, x_max + 2 + j, k) = idx2(field.energy0, x_max + 1 - j, k); + idx2f(field, energy0, x_max + 2 + j, k) = idx2f(field, energy0, x_max + 1 - j, k); } } @@ -215,10 +227,11 @@ void update_halo_kernel( // DO j=x_min-depth,x_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.energy1)) + mapToFrom2Df(field, energy1) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + depth + 2); j++) { for (int k = 0; k < depth; ++k) { - idx2(field.energy1, j, 1 - k) = idx2(field.energy1, j, 2 + k); + idx2f(field, energy1, j, 1 - k) = idx2f(field, energy1, j, 2 + k); } } @@ -228,10 +241,11 @@ void update_halo_kernel( // DO j=x_min-depth,x_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.energy1)) + mapToFrom2Df(field, energy1) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + depth + 2); j++) { for (int k = 0; k < depth; ++k) { - idx2(field.energy1, j, y_max + 2 + k) = idx2(field.energy1, j, y_max + 1 - k); + idx2f(field, energy1, j, y_max + 2 + k) = idx2f(field, energy1, j, y_max + 1 - k); } } @@ -241,10 +255,11 @@ void update_halo_kernel( // DO k=y_min-depth,y_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.energy1)) + mapToFrom2Df(field, energy1) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(field.energy1, 1 - j, k) = idx2(field.energy1, 2 + j, k); + idx2f(field, energy1, 1 - j, k) = idx2f(field, energy1, 2 + j, k); } } @@ -254,10 +269,11 @@ void update_halo_kernel( // DO k=y_min-depth,y_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.energy1)) + mapToFrom2Df(field, energy1) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(field.energy1, x_max + 2 + j, k) = idx2(field.energy1, x_max + 1 - j, k); + idx2f(field, energy1, x_max + 2 + j, k) = idx2f(field, energy1, x_max + 1 - j, k); } } @@ -270,10 +286,11 @@ void update_halo_kernel( // DO j=x_min-depth,x_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.pressure)) + mapToFrom2Df(field, pressure) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + depth + 2); j++) { for (int k = 0; k < depth; ++k) { - idx2(field.pressure, j, 1 - k) = idx2(field.pressure, j, 2 + k); + idx2f(field, pressure, j, 1 - k) = idx2f(field, pressure, j, 2 + k); } } @@ -283,10 +300,11 @@ void update_halo_kernel( // DO j=x_min-depth,x_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.pressure)) + mapToFrom2Df(field, pressure) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + depth + 2); j++) { for (int k = 0; k < depth; ++k) { - idx2(field.pressure, j, y_max + 2 + k) = idx2(field.pressure, j, y_max + 1 - k); + idx2f(field, pressure, j, y_max + 2 + k) = idx2f(field, pressure, j, y_max + 1 - k); } } @@ -296,10 +314,11 @@ void update_halo_kernel( // DO k=y_min-depth,y_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.pressure)) + mapToFrom2Df(field, pressure) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(field.pressure, 1 - j, k) = idx2(field.pressure, 2 + j, k); + idx2f(field, pressure, 1 - j, k) = idx2f(field, pressure, 2 + j, k); } } @@ -309,10 +328,11 @@ void update_halo_kernel( // DO k=y_min-depth,y_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.pressure)) + mapToFrom2Df(field, pressure) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(field.pressure, x_max + 2 + j, k) = idx2(field.pressure, x_max + 1 - j, k); + idx2f(field, pressure, x_max + 2 + j, k) = idx2f(field, pressure, x_max + 1 - j, k); } } @@ -325,10 +345,11 @@ void update_halo_kernel( // DO j=x_min-depth,x_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.viscosity)) + mapToFrom2Df(field, viscosity) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + depth + 2); j++) { for (int k = 0; k < depth; ++k) { - idx2(field.viscosity, j, 1 - k) = idx2(field.viscosity, j, 2 + k); + idx2f(field, viscosity, j, 1 - k) = idx2f(field, viscosity, j, 2 + k); } } @@ -338,10 +359,11 @@ void update_halo_kernel( // DO j=x_min-depth,x_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.viscosity)) + mapToFrom2Df(field, viscosity) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + depth + 2); j++) { for (int k = 0; k < depth; ++k) { - idx2(field.viscosity, j, y_max + 2 + k) = idx2(field.viscosity, j, y_max + 1 - k); + idx2f(field, viscosity, j, y_max + 2 + k) = idx2f(field, viscosity, j, y_max + 1 - k); } } @@ -351,10 +373,11 @@ void update_halo_kernel( // DO k=y_min-depth,y_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.viscosity)) + mapToFrom2Df(field, viscosity) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(field.viscosity, 1 - j, k) = idx2(field.viscosity, 2 + j, k); + idx2f(field, viscosity, 1 - j, k) = idx2f(field, viscosity, 2 + j, k); } } @@ -364,10 +387,11 @@ void update_halo_kernel( // DO k=y_min-depth,y_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.viscosity)) + mapToFrom2Df(field, viscosity) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(field.viscosity, x_max + 2 + j, k) = idx2(field.viscosity, x_max + 1 - j, k); + idx2f(field, viscosity, x_max + 2 + j, k) = idx2f(field, viscosity, x_max + 1 - j, k); } } @@ -380,10 +404,11 @@ void update_halo_kernel( // DO j=x_min-depth,x_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.soundspeed)) + mapToFrom2Df(field, soundspeed) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + depth + 2); j++) { for (int k = 0; k < depth; ++k) { - idx2(field.soundspeed, j, 1 - k) = idx2(field.soundspeed, j, +k); + idx2f(field, soundspeed, j, 1 - k) = idx2f(field, soundspeed, j, +k); } } @@ -393,10 +418,11 @@ void update_halo_kernel( // DO j=x_min-depth,x_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.soundspeed)) + mapToFrom2Df(field, soundspeed) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + depth + 2); j++) { for (int k = 0; k < depth; ++k) { - idx2(field.soundspeed, j, y_max + 2 + k) = idx2(field.soundspeed, j, y_max + 1 - k); + idx2f(field, soundspeed, j, y_max + 2 + k) = idx2f(field, soundspeed, j, y_max + 1 - k); } } @@ -406,10 +432,11 @@ void update_halo_kernel( // DO k=y_min-depth,y_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.soundspeed)) + mapToFrom2Df(field, soundspeed) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(field.soundspeed, 1 - j, k) = idx2(field.soundspeed, 2 + j, k); + idx2f(field, soundspeed, 1 - j, k) = idx2f(field, soundspeed, 2 + j, k); } } @@ -419,10 +446,11 @@ void update_halo_kernel( // DO k=y_min-depth,y_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.soundspeed)) + mapToFrom2Df(field, soundspeed) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(field.soundspeed, x_max + 2 + j, k) = idx2(field.soundspeed, x_max + 1 - j, k); + idx2f(field, soundspeed, x_max + 2 + j, k) = idx2f(field, soundspeed, x_max + 1 - j, k); } } @@ -436,10 +464,11 @@ void update_halo_kernel( // DO j=x_min-depth,x_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.xvel0)) + mapToFrom2Df(field, xvel0) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + 1 + depth + 2); j++) { for (int k = 0; k < depth; ++k) { - idx2(field.xvel0, j, 1 - k) = idx2(field.xvel0, j, + idx2f(field, xvel0, j, 1 - k) = idx2f(field, xvel0, j, 1 + 2 + k); } @@ -451,10 +480,11 @@ void update_halo_kernel( // DO j=x_min-depth,x_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.xvel0)) + mapToFrom2Df(field, xvel0) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + 1 + depth + 2); j++) { for (int k = 0; k < depth; ++k) { - idx2(field.xvel0, j, y_max + 1 + 2 + k) = idx2(field.xvel0, j, y_max + 1 - k); + idx2f(field, xvel0, j, y_max + 1 + 2 + k) = idx2f(field, xvel0, j, y_max + 1 - k); } } @@ -464,10 +494,11 @@ void update_halo_kernel( // DO k=y_min-depth,y_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.xvel0)) + mapToFrom2Df(field, xvel0) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + 1 + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(field.xvel0, 1 - j, k) = -idx2(field.xvel0, 1 + 2 + j, k); + idx2f(field, xvel0, 1 - j, k) = -idx2f(field, xvel0, 1 + 2 + j, k); } } @@ -477,10 +508,11 @@ void update_halo_kernel( // DO k=y_min-depth,y_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.xvel0)) + mapToFrom2Df(field, xvel0) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + 1 + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(field.xvel0, x_max + 2 + 1 + j, k) = -idx2(field.xvel0, x_max + 1 - j, k); + idx2f(field, xvel0, x_max + 2 + 1 + j, k) = -idx2f(field, xvel0, x_max + 1 - j, k); } } @@ -493,10 +525,11 @@ void update_halo_kernel( // DO j=x_min-depth,x_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.xvel1)) + mapToFrom2Df(field, xvel1) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + 1 + depth + 2); j++) { for (int k = 0; k < depth; ++k) { - idx2(field.xvel1, j, 1 - k) = idx2(field.xvel1, j, 1 + 2 + k); + idx2f(field, xvel1, j, 1 - k) = idx2f(field, xvel1, j, 1 + 2 + k); } } @@ -506,10 +539,11 @@ void update_halo_kernel( // DO j=x_min-depth,x_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.xvel1)) + mapToFrom2Df(field, xvel1) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + 1 + depth + 2); j++) { for (int k = 0; k < depth; ++k) { - idx2(field.xvel1, j, y_max + 1 + 2 + k) = idx2(field.xvel1, j, y_max + 1 - k); + idx2f(field, xvel1, j, y_max + 1 + 2 + k) = idx2f(field, xvel1, j, y_max + 1 - k); } } @@ -519,10 +553,11 @@ void update_halo_kernel( // DO k=y_min-depth,y_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.xvel1)) + mapToFrom2Df(field, xvel1) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + 1 + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(field.xvel1, 1 - j, k) = -idx2(field.xvel1, 1 + 2 + j, k); + idx2f(field, xvel1, 1 - j, k) = -idx2f(field, xvel1, 1 + 2 + j, k); } } @@ -532,10 +567,11 @@ void update_halo_kernel( // DO k=y_min-depth,y_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.xvel1)) + mapToFrom2Df(field, xvel1) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + 1 + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(field.xvel1, x_max + 2 + 1 + j, k) = -idx2(field.xvel1, x_max + 1 - j, k); + idx2f(field, xvel1, x_max + 2 + 1 + j, k) = -idx2f(field, xvel1, x_max + 1 - j, k); } } @@ -548,10 +584,11 @@ void update_halo_kernel( // DO j=x_min-depth,x_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.yvel0)) + mapToFrom2Df(field, yvel0) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + 1 + depth + 2); j++) { for (int k = 0; k < depth; ++k) { - idx2(field.yvel0, j, 1 - k) = -idx2(field.yvel0, j, 1 + 2 + k); + idx2f(field, yvel0, j, 1 - k) = -idx2f(field, yvel0, j, 1 + 2 + k); } } @@ -561,10 +598,11 @@ void update_halo_kernel( // DO j=x_min-depth,x_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.yvel0)) + mapToFrom2Df(field, yvel0) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + 1 + depth + 2); j++) { for (int k = 0; k < depth; ++k) { - idx2(field.yvel0, j, y_max + 1 + 2 + k) = -idx2(field.yvel0, j, y_max + 1 - k); + idx2f(field, yvel0, j, y_max + 1 + 2 + k) = -idx2f(field, yvel0, j, y_max + 1 - k); } } @@ -574,10 +612,11 @@ void update_halo_kernel( // DO k=y_min-depth,y_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.yvel0)) + mapToFrom2Df(field, yvel0) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + 1 + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(field.yvel0, 1 - j, k) = idx2(field.yvel0, 1 + 2 + j, k); + idx2f(field, yvel0, 1 - j, k) = idx2f(field, yvel0, 1 + 2 + j, k); } } @@ -587,10 +626,11 @@ void update_halo_kernel( // DO k=y_min-depth,y_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.yvel0)) + mapToFrom2Df(field, yvel0) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + 1 + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(field.yvel0, x_max + 2 + 1 + j, k) = idx2(field.yvel0, x_max + 1 - j, k); + idx2f(field, yvel0, x_max + 2 + 1 + j, k) = idx2f(field, yvel0, x_max + 1 - j, k); } } @@ -603,10 +643,11 @@ void update_halo_kernel( // DO j=x_min-depth,x_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.yvel1)) + mapToFrom2Df(field, yvel1) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + 1 + depth + 2); j++) { for (int k = 0; k < depth; ++k) { - idx2(field.yvel1, j, 1 - k) = -idx2(field.yvel1, j, 1 + 2 + k); + idx2f(field, yvel1, j, 1 - k) = -idx2f(field, yvel1, j, 1 + 2 + k); } } @@ -616,10 +657,11 @@ void update_halo_kernel( // DO j=x_min-depth,x_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.yvel1)) + mapToFrom2Df(field, yvel1) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + 1 + depth + 2); j++) { for (int k = 0; k < depth; ++k) { - idx2(field.yvel1, j, y_max + 1 + 2 + k) = -idx2(field.yvel1, j, y_max + 1 - k); + idx2f(field, yvel1, j, y_max + 1 + 2 + k) = -idx2f(field, yvel1, j, y_max + 1 - k); } } @@ -629,10 +671,11 @@ void update_halo_kernel( // DO k=y_min-depth,y_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.yvel1)) + mapToFrom2Df(field, yvel1) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + 1 + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(field.yvel1, 1 - j, k) = idx2(field.yvel1, 1 + 2 + j, k); + idx2f(field, yvel1, 1 - j, k) = idx2f(field, yvel1, 1 + 2 + j, k); } } @@ -642,10 +685,11 @@ void update_halo_kernel( // DO k=y_min-depth,y_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.yvel1)) + mapToFrom2Df(field, yvel1) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + 1 + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(field.yvel1, x_max + 2 + 1 + j, k) = idx2(field.yvel1, x_max + 1 - j, k); + idx2f(field, yvel1, x_max + 2 + 1 + j, k) = idx2f(field, yvel1, x_max + 1 - j, k); } } @@ -659,10 +703,11 @@ void update_halo_kernel( // DO j=x_min-depth,x_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.vol_flux_x)) + mapToFrom2Df(field, vol_flux_x) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + 1 + depth + 2); j++) { for (int k = 0; k < depth; ++k) { - idx2(field.vol_flux_x, j, 1 - k) = idx2(field.vol_flux_x, j, 1 + 2 + k); + idx2f(field, vol_flux_x, j, 1 - k) = idx2f(field, vol_flux_x, j, 1 + 2 + k); } } @@ -672,10 +717,11 @@ void update_halo_kernel( // DO j=x_min-depth,x_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.vol_flux_x)) + mapToFrom2Df(field, vol_flux_x) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + 1 + depth + 2); j++) { for (int k = 0; k < depth; ++k) { - idx2(field.vol_flux_x, j, y_max + 2 + k) = idx2(field.vol_flux_x, j, y_max - k); + idx2f(field, vol_flux_x, j, y_max + 2 + k) = idx2f(field, vol_flux_x, j, y_max - k); } } @@ -685,10 +731,11 @@ void update_halo_kernel( // DO k=y_min-depth,y_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.vol_flux_x)) + mapToFrom2Df(field, vol_flux_x) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(field.vol_flux_x, 1 - j, k) = -idx2(field.vol_flux_x, 1 + 2 + j, k); + idx2f(field, vol_flux_x, 1 - j, k) = -idx2f(field, vol_flux_x, 1 + 2 + j, k); } } @@ -698,10 +745,11 @@ void update_halo_kernel( // DO k=y_min-depth,y_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.vol_flux_x)) + mapToFrom2Df(field, vol_flux_x) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(field.vol_flux_x, x_max + j + 1 + 2, k) = -idx2(field.vol_flux_x, x_max + 1 - j, k); + idx2f(field, vol_flux_x, x_max + j + 1 + 2, k) = -idx2f(field, vol_flux_x, x_max + 1 - j, k); } } @@ -715,10 +763,11 @@ void update_halo_kernel( // DO j=x_min-depth,x_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.mass_flux_x)) + mapToFrom2Df(field, mass_flux_x) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + 1 + depth + 2); j++) { for (int k = 0; k < depth; ++k) { - idx2(field.mass_flux_x, j, 1 - k) = idx2(field.mass_flux_x, j, 1 + 2 + k); + idx2f(field, mass_flux_x, j, 1 - k) = idx2f(field, mass_flux_x, j, 1 + 2 + k); } } @@ -728,10 +777,11 @@ void update_halo_kernel( // DO j=x_min-depth,x_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.mass_flux_x)) + mapToFrom2Df(field, mass_flux_x) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + 1 + depth + 2); j++) { for (int k = 0; k < depth; ++k) { - idx2(field.mass_flux_x, j, y_max + 2 + k) = idx2(field.mass_flux_x, j, y_max - k); + idx2f(field, mass_flux_x, j, y_max + 2 + k) = idx2f(field, mass_flux_x, j, y_max - k); } } @@ -741,10 +791,11 @@ void update_halo_kernel( // DO k=y_min-depth,y_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.mass_flux_x)) + mapToFrom2Df(field, mass_flux_x) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(field.mass_flux_x, 1 - j, k) = -idx2(field.mass_flux_x, 1 + 2 + j, k); + idx2f(field, mass_flux_x, 1 - j, k) = -idx2f(field, mass_flux_x, 1 + 2 + j, k); } } @@ -754,10 +805,11 @@ void update_halo_kernel( // DO k=y_min-depth,y_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.mass_flux_x)) + mapToFrom2Df(field, mass_flux_x) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(field.mass_flux_x, x_max + j + 1 + 2, k) = -idx2(field.mass_flux_x, x_max + 1 - j, k); + idx2f(field, mass_flux_x, x_max + j + 1 + 2, k) = -idx2f(field, mass_flux_x, x_max + 1 - j, k); } } @@ -771,10 +823,11 @@ void update_halo_kernel( // DO j=x_min-depth,x_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.vol_flux_y)) + mapToFrom2Df(field, vol_flux_y) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + depth + 2); j++) { for (int k = 0; k < depth; ++k) { - idx2(field.vol_flux_y, j, 1 - k) = -idx2(field.vol_flux_y, j, 1 + 2 + k); + idx2f(field, vol_flux_y, j, 1 - k) = -idx2f(field, vol_flux_y, j, 1 + 2 + k); } } @@ -784,10 +837,11 @@ void update_halo_kernel( // DO j=x_min-depth,x_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.vol_flux_y)) + mapToFrom2Df(field, vol_flux_y) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + depth + 2); j++) { for (int k = 0; k < depth; ++k) { - idx2(field.vol_flux_y, j, y_max + k + 1 + 2) = -idx2(field.vol_flux_y, j, y_max + 1 - k); + idx2f(field, vol_flux_y, j, y_max + k + 1 + 2) = -idx2f(field, vol_flux_y, j, y_max + 1 - k); } } @@ -797,10 +851,11 @@ void update_halo_kernel( // DO k=y_min-depth,y_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.vol_flux_y)) + mapToFrom2Df(field, vol_flux_y) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + 1 + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(field.vol_flux_y, 1 - j, k) = idx2(field.vol_flux_y, 1 + 2 + j, k); + idx2f(field, vol_flux_y, 1 - j, k) = idx2f(field, vol_flux_y, 1 + 2 + j, k); } } @@ -810,10 +865,11 @@ void update_halo_kernel( // DO k=y_min-depth,y_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.vol_flux_y)) + mapToFrom2Df(field, vol_flux_y) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + 1 + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(field.vol_flux_y, x_max + 2 + j, k) = idx2(field.vol_flux_y, x_max - j, k); + idx2f(field, vol_flux_y, x_max + 2 + j, k) = idx2f(field, vol_flux_y, x_max - j, k); } } @@ -826,10 +882,11 @@ void update_halo_kernel( // DO j=x_min-depth,x_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.mass_flux_y)) + mapToFrom2Df(field, mass_flux_y) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + depth + 2); j++) { for (int k = 0; k < depth; ++k) { - idx2(field.mass_flux_y, j, 1 - k) = -idx2(field.mass_flux_y, j, 1 + 2 + k); + idx2f(field, mass_flux_y, j, 1 - k) = -idx2f(field, mass_flux_y, j, 1 + 2 + k); } } @@ -839,10 +896,11 @@ void update_halo_kernel( // DO j=x_min-depth,x_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.mass_flux_y)) + mapToFrom2Df(field, mass_flux_y) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + depth + 2); j++) { for (int k = 0; k < depth; ++k) { - idx2(field.mass_flux_y, j, y_max + k + 1 + 2) = -idx2(field.mass_flux_y, j, y_max + 1 - k); + idx2f(field, mass_flux_y, j, y_max + k + 1 + 2) = -idx2f(field, mass_flux_y, j, y_max + 1 - k); } } @@ -852,10 +910,11 @@ void update_halo_kernel( // DO k=y_min-depth,y_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.mass_flux_y)) + mapToFrom2Df(field, mass_flux_y) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + 1 + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(field.mass_flux_y, 1 - j, k) = idx2(field.mass_flux_y, 1 + 2 + j, k); + idx2f(field, mass_flux_y, 1 - j, k) = idx2f(field, mass_flux_y, 1 + 2 + j, k); } } @@ -865,10 +924,11 @@ void update_halo_kernel( // DO k=y_min-depth,y_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(field.mass_flux_y)) + mapToFrom2Df(field, mass_flux_y) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + 1 + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(field.mass_flux_y, x_max + 2 + j, k) = idx2(field.mass_flux_y, x_max - j, k); + idx2f(field, mass_flux_y, x_max + 2 + j, k) = idx2f(field, mass_flux_y, x_max - j, k); } } diff --git a/src/update_tile_halo_kernel.cpp b/src/update_tile_halo_kernel.cpp index d0fe087..0767735 100644 --- a/src/update_tile_halo_kernel.cpp +++ b/src/update_tile_halo_kernel.cpp @@ -30,39 +30,41 @@ void update_tile_halo_l_kernel( bool use_target, int x_min, int x_max, int y_min, int y_max, - clover::Buffer2D &density0, clover::Buffer2D &energy0, - clover::Buffer2D &pressure, clover::Buffer2D &viscosity, - clover::Buffer2D &soundspeed, clover::Buffer2D &density1, - clover::Buffer2D &energy1, clover::Buffer2D &xvel0, - clover::Buffer2D &yvel0, clover::Buffer2D &xvel1, - clover::Buffer2D &yvel1, clover::Buffer2D &vol_flux_x, - clover::Buffer2D &vol_flux_y, - clover::Buffer2D &mass_flux_x, - clover::Buffer2D &mass_flux_y, int left_xmin, int left_xmax, - int left_ymin, int left_ymax, clover::Buffer2D &left_density0, - clover::Buffer2D &left_energy0, - clover::Buffer2D &left_pressure, - clover::Buffer2D &left_viscosity, - clover::Buffer2D &left_soundspeed, - clover::Buffer2D &left_density1, - clover::Buffer2D &left_energy1, - clover::Buffer2D &left_xvel0, - clover::Buffer2D &left_yvel0, - clover::Buffer2D &left_xvel1, - clover::Buffer2D &left_yvel1, - clover::Buffer2D &left_vol_flux_x, - clover::Buffer2D &left_vol_flux_y, - clover::Buffer2D &left_mass_flux_x, - clover::Buffer2D &left_mass_flux_y, const int fields[NUM_FIELDS], + clover::Buffer2D &density0_buffer, clover::Buffer2D &energy0_buffer, + clover::Buffer2D &pressure_buffer, clover::Buffer2D &viscosity_buffer, + clover::Buffer2D &soundspeed_buffer, clover::Buffer2D &density1_buffer, + clover::Buffer2D &energy1_buffer, clover::Buffer2D &xvel0_buffer, + clover::Buffer2D &yvel0_buffer, clover::Buffer2D &xvel1_buffer, + clover::Buffer2D &yvel1_buffer, clover::Buffer2D &vol_flux_x_buffer, + clover::Buffer2D &vol_flux_y_buffer, + clover::Buffer2D &mass_flux_x_buffer, + clover::Buffer2D &mass_flux_y_buffer, int left_xmin, int left_xmax, + int left_ymin, int left_ymax, clover::Buffer2D &left_density0_buffer, + clover::Buffer2D &left_energy0_buffer, + clover::Buffer2D &left_pressure_buffer, + clover::Buffer2D &left_viscosity_buffer, + clover::Buffer2D &left_soundspeed_buffer, + clover::Buffer2D &left_density1_buffer, + clover::Buffer2D &left_energy1_buffer, + clover::Buffer2D &left_xvel0_buffer, + clover::Buffer2D &left_yvel0_buffer, + clover::Buffer2D &left_xvel1_buffer, + clover::Buffer2D &left_yvel1_buffer, + clover::Buffer2D &left_vol_flux_x_buffer, + clover::Buffer2D &left_vol_flux_y_buffer, + clover::Buffer2D &left_mass_flux_x_buffer, + clover::Buffer2D &left_mass_flux_y_buffer, const int fields[NUM_FIELDS], int depth) { // Density 0 if (fields[field_density0] == 1) { // DO k=y_min-depth,y_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(density0) mapToFrom2D(left_density0)) + mapToFrom2Dfe(density0_buffer, density0) + mapToFrom2Dfe(left_density0_buffer, left_density0) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(density0, x_min - j, k) = idx2(left_density0, left_xmax + 1 - j, k); + idx2f(,density0, x_min - j, k) = idx2f(,left_density0, left_xmax + 1 - j, k); } } } @@ -71,10 +73,12 @@ void update_tile_halo_l_kernel( if (fields[field_density1] == 1) { // DO k=y_min-depth,y_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(density1) mapToFrom2D(left_density1)) + mapToFrom2Dfe(density1_buffer, density1) + mapToFrom2Dfe(left_density1_buffer, left_density1) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(density1, x_min - j, k) = idx2(left_density1, left_xmax + 1 - j, k); + idx2f(,density1, x_min - j, k) = idx2f(,left_density1, left_xmax + 1 - j, k); } } } @@ -83,10 +87,12 @@ void update_tile_halo_l_kernel( if (fields[field_energy0] == 1) { // DO k=y_min-depth,y_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(energy0) mapToFrom2D(left_energy0)) + mapToFrom2Dfe(energy0_buffer, energy0) + mapToFrom2Dfe(left_energy0_buffer, left_energy0) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(energy0, x_min - j, k) = idx2(left_energy0, left_xmax + 1 - j, k); + idx2f(,energy0, x_min - j, k) = idx2f(,left_energy0, left_xmax + 1 - j, k); } } } @@ -95,10 +101,12 @@ void update_tile_halo_l_kernel( if (fields[field_energy1] == 1) { // DO k=y_min-depth,y_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(energy1) mapToFrom2D(left_energy1)) + mapToFrom2Dfe(energy1_buffer, energy1) + mapToFrom2Dfe(left_energy1_buffer, left_energy1) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(energy1, x_min - j, k) = idx2(left_energy1, left_xmax + 1 - j, k); + idx2f(,energy1, x_min - j, k) = idx2f(,left_energy1, left_xmax + 1 - j, k); } } } @@ -107,10 +115,12 @@ void update_tile_halo_l_kernel( if (fields[field_pressure] == 1) { // DO k=y_min-depth,y_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(pressure) mapToFrom2D(left_pressure)) + mapToFrom2Dfe(pressure_buffer, pressure) + mapToFrom2Dfe(left_pressure_buffer, left_pressure) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(pressure, x_min - j, k) = idx2(left_pressure, left_xmax + 1 - j, k); + idx2f(,pressure, x_min - j, k) = idx2f(,left_pressure, left_xmax + 1 - j, k); } } } @@ -119,10 +129,12 @@ void update_tile_halo_l_kernel( if (fields[field_viscosity] == 1) { // DO k=y_min-depth,y_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(viscosity) mapToFrom2D(left_viscosity)) + mapToFrom2Dfe(viscosity_buffer, viscosity) + mapToFrom2Dfe(left_viscosity_buffer, left_viscosity) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(viscosity, x_min - j, k) = idx2(left_viscosity, left_xmax + 1 - j, k); + idx2f(,viscosity, x_min - j, k) = idx2f(,left_viscosity, left_xmax + 1 - j, k); } } } @@ -131,10 +143,12 @@ void update_tile_halo_l_kernel( if (fields[field_soundspeed] == 1) { // DO k=y_min-depth,y_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(soundspeed) mapToFrom2D(left_soundspeed)) + mapToFrom2Dfe(soundspeed_buffer, soundspeed) + mapToFrom2Dfe(left_soundspeed_buffer, left_soundspeed) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(soundspeed, x_min - j, k) = idx2(left_soundspeed, left_xmax + 1 - j, k); + idx2f(,soundspeed, x_min - j, k) = idx2f(,left_soundspeed, left_xmax + 1 - j, k); } } } @@ -143,10 +157,12 @@ void update_tile_halo_l_kernel( if (fields[field_xvel0] == 1) { // DO k=y_min-depth,y_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(xvel0) mapToFrom2D(left_xvel0)) + mapToFrom2Dfe(xvel0_buffer, xvel0) + mapToFrom2Dfe(left_xvel0_buffer, left_xvel0) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + 1 + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(xvel0, x_min - j, k) = idx2(left_xvel0, left_xmax + 1 - j, k); + idx2f(,xvel0, x_min - j, k) = idx2f(,left_xvel0, left_xmax + 1 - j, k); } } } @@ -155,10 +171,12 @@ void update_tile_halo_l_kernel( if (fields[field_xvel1] == 1) { // DO k=y_min-depth,y_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(xvel1) mapToFrom2D(left_xvel1)) + mapToFrom2Dfe(xvel1_buffer, xvel1) + mapToFrom2Dfe(left_xvel1_buffer, left_xvel1) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + 1 + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(xvel1, x_min - j, k) = idx2(left_xvel1, left_xmax + 1 - j, k); + idx2f(,xvel1, x_min - j, k) = idx2f(,left_xvel1, left_xmax + 1 - j, k); } } } @@ -167,10 +185,12 @@ void update_tile_halo_l_kernel( if (fields[field_yvel0] == 1) { // DO k=y_min-depth,y_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(yvel0) mapToFrom2D(left_yvel0)) + mapToFrom2Dfe(yvel0_buffer, yvel0) + mapToFrom2Dfe(left_yvel0_buffer, left_yvel0) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + 1 + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(yvel0, x_min - j, k) = idx2(left_yvel0, left_xmax + 1 - j, k); + idx2f(,yvel0, x_min - j, k) = idx2f(,left_yvel0, left_xmax + 1 - j, k); } } } @@ -179,10 +199,12 @@ void update_tile_halo_l_kernel( if (fields[field_yvel1] == 1) { // DO k=y_min-depth,y_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(yvel1) mapToFrom2D(left_yvel1)) + mapToFrom2Dfe(yvel1_buffer, yvel1) + mapToFrom2Dfe(left_yvel1_buffer, left_yvel1) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + 1 + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(yvel1, x_min - j, k) = idx2(left_yvel1, left_xmax + 1 - j, k); + idx2f(,yvel1, x_min - j, k) = idx2f(,left_yvel1, left_xmax + 1 - j, k); } } } @@ -191,10 +213,12 @@ void update_tile_halo_l_kernel( if (fields[field_vol_flux_x] == 1) { // DO k=y_min-depth,y_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(vol_flux_x) mapToFrom2D(left_vol_flux_x)) + mapToFrom2Dfe(vol_flux_x_buffer, vol_flux_x) + mapToFrom2Dfe(left_vol_flux_x_buffer, left_vol_flux_x) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(vol_flux_x, x_min - j, k) = idx2(left_vol_flux_x, left_xmax + 1 - j, k); + idx2f(,vol_flux_x, x_min - j, k) = idx2f(,left_vol_flux_x, left_xmax + 1 - j, k); } } } @@ -203,10 +227,12 @@ void update_tile_halo_l_kernel( if (fields[field_mass_flux_x] == 1) { // DO k=y_min-depth,y_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(mass_flux_x) mapToFrom2D(left_mass_flux_x)) + mapToFrom2Dfe(mass_flux_x_buffer, mass_flux_x) + mapToFrom2Dfe(left_mass_flux_x_buffer, left_mass_flux_x) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(mass_flux_x, x_min - j, k) = idx2(left_mass_flux_x, left_xmax + 1 - j, k); + idx2f(,mass_flux_x, x_min - j, k) = idx2f(,left_mass_flux_x, left_xmax + 1 - j, k); } } } @@ -215,10 +241,12 @@ void update_tile_halo_l_kernel( if (fields[field_vol_flux_y] == 1) { // DO k=y_min-depth,y_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(vol_flux_y) mapToFrom2D(left_vol_flux_y)) + mapToFrom2Dfe(vol_flux_y_buffer, vol_flux_y) + mapToFrom2Dfe(left_vol_flux_y_buffer, left_vol_flux_y) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + 1 + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(vol_flux_y, x_min - j, k) = idx2(left_vol_flux_y, left_xmax + 1 - j, k); + idx2f(,vol_flux_y, x_min - j, k) = idx2f(,left_vol_flux_y, left_xmax + 1 - j, k); } } } @@ -227,10 +255,12 @@ void update_tile_halo_l_kernel( if (fields[field_mass_flux_y] == 1) { // DO k=y_min-depth,y_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(mass_flux_y) mapToFrom2D(left_mass_flux_y)) + mapToFrom2Dfe(mass_flux_y_buffer, mass_flux_y) + mapToFrom2Dfe(left_mass_flux_y_buffer, left_mass_flux_y) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + 1 + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(mass_flux_y, x_min - j, k) = idx2(left_mass_flux_y, left_xmax + 1 - j, k); + idx2f(,mass_flux_y, x_min - j, k) = idx2f(,left_mass_flux_y, left_xmax + 1 - j, k); } } } @@ -239,39 +269,41 @@ void update_tile_halo_l_kernel( void update_tile_halo_r_kernel( bool use_target, int x_min, int x_max, int y_min, int y_max, - clover::Buffer2D &density0, clover::Buffer2D &energy0, - clover::Buffer2D &pressure, clover::Buffer2D &viscosity, - clover::Buffer2D &soundspeed, clover::Buffer2D &density1, - clover::Buffer2D &energy1, clover::Buffer2D &xvel0, - clover::Buffer2D &yvel0, clover::Buffer2D &xvel1, - clover::Buffer2D &yvel1, clover::Buffer2D &vol_flux_x, - clover::Buffer2D &vol_flux_y, - clover::Buffer2D &mass_flux_x, - clover::Buffer2D &mass_flux_y, int right_xmin, int right_xmax, - int right_ymin, int right_ymax, clover::Buffer2D &right_density0, - clover::Buffer2D &right_energy0, - clover::Buffer2D &right_pressure, - clover::Buffer2D &right_viscosity, - clover::Buffer2D &right_soundspeed, - clover::Buffer2D &right_density1, - clover::Buffer2D &right_energy1, - clover::Buffer2D &right_xvel0, - clover::Buffer2D &right_yvel0, - clover::Buffer2D &right_xvel1, - clover::Buffer2D &right_yvel1, - clover::Buffer2D &right_vol_flux_x, - clover::Buffer2D &right_vol_flux_y, - clover::Buffer2D &right_mass_flux_x, - clover::Buffer2D &right_mass_flux_y, const int fields[NUM_FIELDS], + clover::Buffer2D &density0_buffer, clover::Buffer2D &energy0_buffer, + clover::Buffer2D &pressure_buffer, clover::Buffer2D &viscosity_buffer, + clover::Buffer2D &soundspeed_buffer, clover::Buffer2D &density1_buffer, + clover::Buffer2D &energy1_buffer, clover::Buffer2D &xvel0_buffer, + clover::Buffer2D &yvel0_buffer, clover::Buffer2D &xvel1_buffer, + clover::Buffer2D &yvel1_buffer, clover::Buffer2D &vol_flux_x_buffer, + clover::Buffer2D &vol_flux_y_buffer, + clover::Buffer2D &mass_flux_x_buffer, + clover::Buffer2D &mass_flux_y_buffer, int right_xmin, int right_xmax, + int right_ymin, int right_ymax, clover::Buffer2D &right_density0_buffer, + clover::Buffer2D &right_energy0_buffer, + clover::Buffer2D &right_pressure_buffer, + clover::Buffer2D &right_viscosity_buffer, + clover::Buffer2D &right_soundspeed_buffer, + clover::Buffer2D &right_density1_buffer, + clover::Buffer2D &right_energy1_buffer, + clover::Buffer2D &right_xvel0_buffer, + clover::Buffer2D &right_yvel0_buffer, + clover::Buffer2D &right_xvel1_buffer, + clover::Buffer2D &right_yvel1_buffer, + clover::Buffer2D &right_vol_flux_x_buffer, + clover::Buffer2D &right_vol_flux_y_buffer, + clover::Buffer2D &right_mass_flux_x_buffer, + clover::Buffer2D &right_mass_flux_y_buffer, const int fields[NUM_FIELDS], int depth) { // Density 0 if (fields[field_density0] == 1) { // DO k=y_min-depth,y_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(density0) mapToFrom2D(right_density0)) + mapToFrom2Dfe(density0_buffer, density0) + mapToFrom2Dfe(right_density0_buffer, right_density0) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(density0, x_max + 2 + j, k) = idx2(right_density0, right_xmin - 1 + 2 + j, k); + idx2f(,density0, x_max + 2 + j, k) = idx2f(,right_density0, right_xmin - 1 + 2 + j, k); } } } @@ -280,10 +312,12 @@ void update_tile_halo_r_kernel( if (fields[field_density1] == 1) { // DO k=y_min-depth,y_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(density1) mapToFrom2D(right_density1)) + mapToFrom2Dfe(density1_buffer, density1) + mapToFrom2Dfe(right_density1_buffer, right_density1) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(density1, x_max + 2 + j, k) = idx2(right_density1, right_xmin - 1 + 2 + j, k); + idx2f(,density1, x_max + 2 + j, k) = idx2f(,right_density1, right_xmin - 1 + 2 + j, k); } } } @@ -292,10 +326,12 @@ void update_tile_halo_r_kernel( if (fields[field_energy0] == 1) { // DO k=y_min-depth,y_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(energy0) mapToFrom2D(right_energy0)) + mapToFrom2Dfe(energy0_buffer, energy0) + mapToFrom2Dfe(right_energy0_buffer, right_energy0) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(energy0, x_max + 2 + j, k) = idx2(right_energy0, right_xmin - 1 + 2 + j, k); + idx2f(,energy0, x_max + 2 + j, k) = idx2f(,right_energy0, right_xmin - 1 + 2 + j, k); } } } @@ -304,10 +340,12 @@ void update_tile_halo_r_kernel( if (fields[field_energy1] == 1) { // DO k=y_min-depth,y_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(energy1) mapToFrom2D(right_energy1)) + mapToFrom2Dfe(energy1_buffer, energy1) + mapToFrom2Dfe(right_energy1_buffer, right_energy1) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(energy1, x_max + 2 + j, k) = idx2(right_energy1, right_xmin - 1 + 2 + j, k); + idx2f(,energy1, x_max + 2 + j, k) = idx2f(,right_energy1, right_xmin - 1 + 2 + j, k); } } } @@ -316,10 +354,12 @@ void update_tile_halo_r_kernel( if (fields[field_pressure] == 1) { // DO k=y_min-depth,y_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(pressure) mapToFrom2D(right_pressure)) + mapToFrom2Dfe(pressure_buffer, pressure) + mapToFrom2Dfe(right_pressure_buffer, right_pressure) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(pressure, x_max + 2 + j, k) = idx2(right_pressure, right_xmin - 1 + 2 + j, k); + idx2f(,pressure, x_max + 2 + j, k) = idx2f(,right_pressure, right_xmin - 1 + 2 + j, k); } } } @@ -328,10 +368,12 @@ void update_tile_halo_r_kernel( if (fields[field_viscosity] == 1) { // DO k=y_min-depth,y_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(viscosity) mapToFrom2D(right_viscosity)) + mapToFrom2Dfe(viscosity_buffer, viscosity) + mapToFrom2Dfe(right_viscosity_buffer, right_viscosity) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(viscosity, x_max + 2 + j, k) = idx2(right_viscosity, right_xmin - 1 + 2 + j, k); + idx2f(,viscosity, x_max + 2 + j, k) = idx2f(,right_viscosity, right_xmin - 1 + 2 + j, k); } } } @@ -340,10 +382,12 @@ void update_tile_halo_r_kernel( if (fields[field_soundspeed] == 1) { // DO k=y_min-depth,y_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(soundspeed) mapToFrom2D(right_soundspeed)) + mapToFrom2Dfe(soundspeed_buffer, soundspeed) + mapToFrom2Dfe(right_soundspeed_buffer, right_soundspeed) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(soundspeed, x_max + 2 + j, k) = idx2(right_soundspeed, right_xmin - 1 + 2 + j, k); + idx2f(,soundspeed, x_max + 2 + j, k) = idx2f(,right_soundspeed, right_xmin - 1 + 2 + j, k); } } } @@ -352,10 +396,12 @@ void update_tile_halo_r_kernel( if (fields[field_xvel0] == 1) { // DO k=y_min-depth,y_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(xvel0) mapToFrom2D(right_xvel0)) + mapToFrom2Dfe(xvel0_buffer, xvel0) + mapToFrom2Dfe(right_xvel0_buffer, right_xvel0) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + 1 + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(xvel0, x_max + 1 + 2 + j, k) = idx2(right_xvel0, right_xmin + 1 - 1 + 2 + j, k); + idx2f(,xvel0, x_max + 1 + 2 + j, k) = idx2f(,right_xvel0, right_xmin + 1 - 1 + 2 + j, k); } } } @@ -364,10 +410,12 @@ void update_tile_halo_r_kernel( if (fields[field_xvel1] == 1) { // DO k=y_min-depth,y_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(xvel1) mapToFrom2D(right_xvel1)) + mapToFrom2Dfe(xvel1_buffer, xvel1) + mapToFrom2Dfe(right_xvel1_buffer, right_xvel1) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + 1 + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(xvel1, x_max + 1 + 2 + j, k) = idx2(right_xvel1, right_xmin + 1 - 1 + 2 + j, k); + idx2f(,xvel1, x_max + 1 + 2 + j, k) = idx2f(,right_xvel1, right_xmin + 1 - 1 + 2 + j, k); } } } @@ -376,10 +424,12 @@ void update_tile_halo_r_kernel( if (fields[field_yvel0] == 1) { // DO k=y_min-depth,y_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(yvel0) mapToFrom2D(right_yvel0)) + mapToFrom2Dfe(yvel0_buffer, yvel0) + mapToFrom2Dfe(right_yvel0_buffer, right_yvel0) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + 1 + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(yvel0, x_max + 1 + 2 + j, k) = idx2(right_yvel0, right_xmin + 1 - 1 + 2 + j, k); + idx2f(,yvel0, x_max + 1 + 2 + j, k) = idx2f(,right_yvel0, right_xmin + 1 - 1 + 2 + j, k); } } } @@ -388,10 +438,12 @@ void update_tile_halo_r_kernel( if (fields[field_yvel1] == 1) { // DO k=y_min-depth,y_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(yvel1) mapToFrom2D(right_yvel1)) + mapToFrom2Dfe(yvel1_buffer, yvel1) + mapToFrom2Dfe(right_yvel1_buffer, right_yvel1) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + 1 + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(yvel1, x_max + 1 + 2 + j, k) = idx2(right_yvel1, right_xmin + 1 - 1 + 2 + j, k); + idx2f(,yvel1, x_max + 1 + 2 + j, k) = idx2f(,right_yvel1, right_xmin + 1 - 1 + 2 + j, k); } } } @@ -400,10 +452,12 @@ void update_tile_halo_r_kernel( if (fields[field_vol_flux_x] == 1) { // DO k=y_min-depth,y_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(vol_flux_x) mapToFrom2D(right_vol_flux_x)) + mapToFrom2Dfe(vol_flux_x_buffer, vol_flux_x) + mapToFrom2Dfe(right_vol_flux_x_buffer, right_vol_flux_x) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(vol_flux_x, x_max + 1 + 2 + j, k) = idx2(right_vol_flux_x, right_xmin + 1 - 1 + 2 + j, k); + idx2f(,vol_flux_x, x_max + 1 + 2 + j, k) = idx2f(,right_vol_flux_x, right_xmin + 1 - 1 + 2 + j, k); } } } @@ -412,10 +466,12 @@ void update_tile_halo_r_kernel( if (fields[field_mass_flux_x] == 1) { // DO k=y_min-depth,y_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(mass_flux_x) mapToFrom2D(right_mass_flux_x)) + mapToFrom2Dfe(mass_flux_x_buffer, mass_flux_x) + mapToFrom2Dfe(right_mass_flux_x_buffer, right_mass_flux_x) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(mass_flux_x, x_max + 1 + 2 + j, k) = idx2(right_mass_flux_x, right_xmin + 1 - 1 + 2 + j, k); + idx2f(,mass_flux_x, x_max + 1 + 2 + j, k) = idx2f(,right_mass_flux_x, right_xmin + 1 - 1 + 2 + j, k); } } } @@ -424,10 +480,12 @@ void update_tile_halo_r_kernel( if (fields[field_vol_flux_y] == 1) { // DO k=y_min-depth,y_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(vol_flux_y) mapToFrom2D(right_vol_flux_y)) + mapToFrom2Dfe(vol_flux_y_buffer, vol_flux_y) + mapToFrom2Dfe(right_vol_flux_y_buffer, right_vol_flux_y) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + 1 + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(vol_flux_y, x_max + 2 + j, k) = idx2(right_vol_flux_y, right_xmin - 1 + 2 + j, k); + idx2f(,vol_flux_y, x_max + 2 + j, k) = idx2f(,right_vol_flux_y, right_xmin - 1 + 2 + j, k); } } } @@ -436,10 +494,12 @@ void update_tile_halo_r_kernel( if (fields[field_mass_flux_y] == 1) { // DO k=y_min-depth,y_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(mass_flux_y) mapToFrom2D(right_mass_flux_y)) + mapToFrom2Dfe(mass_flux_y_buffer, mass_flux_y) + mapToFrom2Dfe(right_mass_flux_y_buffer, right_mass_flux_y) + omp(parallel(1) enable_target(use_target)) for (int k = (y_min - depth + 1); k < (y_max + 1 + depth + 2); k++) { for (int j = 0; j < depth; ++j) { - idx2(mass_flux_y, x_max + 2 + j, k) = idx2(right_mass_flux_y, right_xmin - 1 + 2 + j, k); + idx2f(,mass_flux_y, x_max + 2 + j, k) = idx2f(,right_mass_flux_y, right_xmin - 1 + 2 + j, k); } } } @@ -452,37 +512,39 @@ void update_tile_halo_r_kernel( void update_tile_halo_t_kernel( bool use_target, int x_min, int x_max, int y_min, int y_max, - clover::Buffer2D &density0, clover::Buffer2D &energy0, - clover::Buffer2D &pressure, clover::Buffer2D &viscosity, - clover::Buffer2D &soundspeed, clover::Buffer2D &density1, - clover::Buffer2D &energy1, clover::Buffer2D &xvel0, - clover::Buffer2D &yvel0, clover::Buffer2D &xvel1, - clover::Buffer2D &yvel1, clover::Buffer2D &vol_flux_x, - clover::Buffer2D &vol_flux_y, - clover::Buffer2D &mass_flux_x, - clover::Buffer2D &mass_flux_y, int top_xmin, int top_xmax, - int top_ymin, int top_ymax, clover::Buffer2D &top_density0, - clover::Buffer2D &top_energy0, - clover::Buffer2D &top_pressure, - clover::Buffer2D &top_viscosity, - clover::Buffer2D &top_soundspeed, - clover::Buffer2D &top_density1, - clover::Buffer2D &top_energy1, - clover::Buffer2D &top_xvel0, clover::Buffer2D &top_yvel0, - clover::Buffer2D &top_xvel1, clover::Buffer2D &top_yvel1, - clover::Buffer2D &top_vol_flux_x, - clover::Buffer2D &top_vol_flux_y, - clover::Buffer2D &top_mass_flux_x, - clover::Buffer2D &top_mass_flux_y, const int fields[NUM_FIELDS], + clover::Buffer2D &density0_buffer, clover::Buffer2D &energy0_buffer, + clover::Buffer2D &pressure_buffer, clover::Buffer2D &viscosity_buffer, + clover::Buffer2D &soundspeed_buffer, clover::Buffer2D &density1_buffer, + clover::Buffer2D &energy1_buffer, clover::Buffer2D &xvel0_buffer, + clover::Buffer2D &yvel0_buffer, clover::Buffer2D &xvel1_buffer, + clover::Buffer2D &yvel1_buffer, clover::Buffer2D &vol_flux_x_buffer, + clover::Buffer2D &vol_flux_y_buffer, + clover::Buffer2D &mass_flux_x_buffer, + clover::Buffer2D &mass_flux_y_buffer, int top_xmin, int top_xmax, + int top_ymin, int top_ymax, clover::Buffer2D &top_density0_buffer, + clover::Buffer2D &top_energy0_buffer, + clover::Buffer2D &top_pressure_buffer, + clover::Buffer2D &top_viscosity_buffer, + clover::Buffer2D &top_soundspeed_buffer, + clover::Buffer2D &top_density1_buffer, + clover::Buffer2D &top_energy1_buffer, + clover::Buffer2D &top_xvel0_buffer, clover::Buffer2D &top_yvel0_buffer, + clover::Buffer2D &top_xvel1_buffer, clover::Buffer2D &top_yvel1_buffer, + clover::Buffer2D &top_vol_flux_x_buffer, + clover::Buffer2D &top_vol_flux_y_buffer, + clover::Buffer2D &top_mass_flux_x_buffer, + clover::Buffer2D &top_mass_flux_y_buffer, const int fields[NUM_FIELDS], int depth) { // Density 0 if (fields[field_density0] == 1) { for (int k = 0; k < depth; ++k) { // DO j=x_min-depth, x_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(density0) mapToFrom2D(top_density0)) + mapToFrom2Dfe(density0_buffer, density0) + mapToFrom2Dfe(top_density0_buffer, top_density0) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + depth + 2); j++) { - idx2(density0, j, y_max + 2 + k) = idx2(top_density0, j, top_ymin - 1 + 2 + k); + idx2f(,density0, j, y_max + 2 + k) = idx2f(,top_density0, j, top_ymin - 1 + 2 + k); } } } @@ -492,9 +554,11 @@ void update_tile_halo_t_kernel( for (int k = 0; k < depth; ++k) { // DO j=x_min-depth, x_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(density1) mapToFrom2D(top_density1)) + mapToFrom2Dfe(density1_buffer, density1) + mapToFrom2Dfe(top_density1_buffer, top_density1) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + depth + 2); j++) { - idx2(density1, j, y_max + 2 + k) = idx2(top_density1, j, top_ymin - 1 + 2 + k); + idx2f(,density1, j, y_max + 2 + k) = idx2f(,top_density1, j, top_ymin - 1 + 2 + k); } } } @@ -504,9 +568,11 @@ void update_tile_halo_t_kernel( for (int k = 0; k < depth; ++k) { // DO j=x_min-depth, x_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(energy0) mapToFrom2D(top_energy0)) + mapToFrom2Dfe(energy0_buffer, energy0) + mapToFrom2Dfe(top_energy0_buffer, top_energy0) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + depth + 2); j++) { - idx2(energy0, j, y_max + 2 + k) = idx2(top_energy0, j, top_ymin - 1 + 2 + k); + idx2f(,energy0, j, y_max + 2 + k) = idx2f(,top_energy0, j, top_ymin - 1 + 2 + k); } } } @@ -516,9 +582,11 @@ void update_tile_halo_t_kernel( for (int k = 0; k < depth; ++k) { // DO j=x_min-depth, x_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(energy1) mapToFrom2D(top_energy1)) + mapToFrom2Dfe(energy1_buffer, energy1) + mapToFrom2Dfe(top_energy1_buffer, top_energy1) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + depth + 2); j++) { - idx2(energy1, j, y_max + 2 + k) = idx2(top_energy1, j, top_ymin - 1 + 2 + k); + idx2f(,energy1, j, y_max + 2 + k) = idx2f(,top_energy1, j, top_ymin - 1 + 2 + k); } } } @@ -528,9 +596,11 @@ void update_tile_halo_t_kernel( for (int k = 0; k < depth; ++k) { // DO j=x_min-depth, x_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(pressure) mapToFrom2D(top_pressure)) + mapToFrom2Dfe(pressure_buffer, pressure) + mapToFrom2Dfe(top_pressure_buffer, top_pressure) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + depth + 2); j++) { - idx2(pressure, j, y_max + 2 + k) = idx2(top_pressure, j, top_ymin - 1 + 2 + k); + idx2f(,pressure, j, y_max + 2 + k) = idx2f(,top_pressure, j, top_ymin - 1 + 2 + k); } } } @@ -540,9 +610,11 @@ void update_tile_halo_t_kernel( for (int k = 0; k < depth; ++k) { // DO j=x_min-depth, x_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(viscosity) mapToFrom2D(top_viscosity)) + mapToFrom2Dfe(viscosity_buffer, viscosity) + mapToFrom2Dfe(top_viscosity_buffer, top_viscosity) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + depth + 2); j++) { - idx2(viscosity, j, y_max + 2 + k) = idx2(top_viscosity, j, top_ymin - 1 + 2 + k); + idx2f(,viscosity, j, y_max + 2 + k) = idx2f(,top_viscosity, j, top_ymin - 1 + 2 + k); } } } @@ -552,9 +624,11 @@ void update_tile_halo_t_kernel( for (int k = 0; k < depth; ++k) { // DO j=x_min-depth, x_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(soundspeed) mapToFrom2D(top_soundspeed)) + mapToFrom2Dfe(soundspeed_buffer, soundspeed) + mapToFrom2Dfe(top_soundspeed_buffer, top_soundspeed) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + depth + 2); j++) { - idx2(soundspeed, j, y_max + 2 + k) = idx2(top_soundspeed, j, top_ymin - 1 + 2 + k); + idx2f(,soundspeed, j, y_max + 2 + k) = idx2f(,top_soundspeed, j, top_ymin - 1 + 2 + k); } } } @@ -564,9 +638,11 @@ void update_tile_halo_t_kernel( for (int k = 0; k < depth; ++k) { // DO j=x_min-depth, x_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(xvel0) mapToFrom2D(top_xvel0)) + mapToFrom2Dfe(xvel0_buffer, xvel0) + mapToFrom2Dfe(top_xvel0_buffer, top_xvel0) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + 1 + depth + 2); j++) { - idx2(xvel0, j, y_max + 1 + 2 + k) = idx2(top_xvel0, j, top_ymin + 1 - 1 + 2 + k); + idx2f(,xvel0, j, y_max + 1 + 2 + k) = idx2f(,top_xvel0, j, top_ymin + 1 - 1 + 2 + k); } } } @@ -576,9 +652,11 @@ void update_tile_halo_t_kernel( for (int k = 0; k < depth; ++k) { // DO j=x_min-depth, x_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(xvel1) mapToFrom2D(top_xvel1)) + mapToFrom2Dfe(xvel1_buffer, xvel1) + mapToFrom2Dfe(top_xvel1_buffer, top_xvel1) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + 1 + depth + 2); j++) { - idx2(xvel1, j, y_max + 1 + 2 + k) = idx2(top_xvel1, j, top_ymin + 1 - 1 + 2 + k); + idx2f(,xvel1, j, y_max + 1 + 2 + k) = idx2f(,top_xvel1, j, top_ymin + 1 - 1 + 2 + k); } } } @@ -588,9 +666,11 @@ void update_tile_halo_t_kernel( for (int k = 0; k < depth; ++k) { // DO j=x_min-depth, x_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(yvel0) mapToFrom2D(top_yvel0)) + mapToFrom2Dfe(yvel0_buffer, yvel0) + mapToFrom2Dfe(top_yvel0_buffer, top_yvel0) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + 1 + depth + 2); j++) { - idx2(yvel0, j, y_max + 1 + 2 + k) = idx2(top_yvel0, j, top_ymin + 1 - 1 + 2 + k); + idx2f(,yvel0, j, y_max + 1 + 2 + k) = idx2f(,top_yvel0, j, top_ymin + 1 - 1 + 2 + k); } } } @@ -600,9 +680,11 @@ void update_tile_halo_t_kernel( for (int k = 0; k < depth; ++k) { // DO j=x_min-depth, x_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(yvel1) mapToFrom2D(top_yvel1)) + mapToFrom2Dfe(yvel1_buffer, yvel1) + mapToFrom2Dfe(top_yvel1_buffer, top_yvel1) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + 1 + depth + 2); j++) { - idx2(yvel1, j, y_max + 1 + 2 + k) = idx2(top_yvel1, j, top_ymin + 1 - 1 + 2 + k); + idx2f(,yvel1, j, y_max + 1 + 2 + k) = idx2f(,top_yvel1, j, top_ymin + 1 - 1 + 2 + k); } } } @@ -612,9 +694,11 @@ void update_tile_halo_t_kernel( for (int k = 0; k < depth; ++k) { // DO j=x_min-depth, x_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(vol_flux_x) mapToFrom2D(top_vol_flux_x)) + mapToFrom2Dfe(vol_flux_x_buffer, vol_flux_x) + mapToFrom2Dfe(top_vol_flux_x_buffer, top_vol_flux_x) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + 1 + depth + 2); j++) { - idx2(vol_flux_x, j, y_max + 2 + k) = idx2(top_vol_flux_x, j, top_ymin - 1 + 2 + k); + idx2f(,vol_flux_x, j, y_max + 2 + k) = idx2f(,top_vol_flux_x, j, top_ymin - 1 + 2 + k); } } } @@ -624,9 +708,11 @@ void update_tile_halo_t_kernel( for (int k = 0; k < depth; ++k) { // DO j=x_min-depth, x_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(mass_flux_x) mapToFrom2D(top_mass_flux_x)) + mapToFrom2Dfe(mass_flux_x_buffer, mass_flux_x) + mapToFrom2Dfe(top_mass_flux_x_buffer, top_mass_flux_x) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + 1 + depth + 2); j++) { - idx2(mass_flux_x, j, y_max + 2 + k) = idx2(top_mass_flux_x, j, top_ymin - 1 + 2 + k); + idx2f(,mass_flux_x, j, y_max + 2 + k) = idx2f(,top_mass_flux_x, j, top_ymin - 1 + 2 + k); } } } @@ -636,9 +722,11 @@ void update_tile_halo_t_kernel( for (int k = 0; k < depth; ++k) { // DO j=x_min-depth, x_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(vol_flux_y) mapToFrom2D(top_vol_flux_y)) + mapToFrom2Dfe(vol_flux_y_buffer, vol_flux_y) + mapToFrom2Dfe(top_vol_flux_y_buffer, top_vol_flux_y) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + depth + 2); j++) { - idx2(vol_flux_y, j, y_max + 1 + 2 + k) = idx2(top_vol_flux_y, j, top_ymin + 1 - 1 + 2 + k); + idx2f(,vol_flux_y, j, y_max + 1 + 2 + k) = idx2f(,top_vol_flux_y, j, top_ymin + 1 - 1 + 2 + k); } } } @@ -648,9 +736,11 @@ void update_tile_halo_t_kernel( for (int k = 0; k < depth; ++k) { // DO j=x_min-depth, x_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(mass_flux_y) mapToFrom2D(top_mass_flux_y)) + mapToFrom2Dfe(mass_flux_y_buffer, mass_flux_y) + mapToFrom2Dfe(top_mass_flux_y_buffer, top_mass_flux_y) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + depth + 2); j++) { - idx2(mass_flux_y, j, y_max + 1 + 2 + k) = idx2(top_mass_flux_y, j, top_ymin + 1 - 1 + 2 + k); + idx2f(,mass_flux_y, j, y_max + 1 + 2 + k) = idx2f(,top_mass_flux_y, j, top_ymin + 1 - 1 + 2 + k); } } } @@ -659,40 +749,42 @@ void update_tile_halo_t_kernel( void update_tile_halo_b_kernel( bool use_target, int x_min, int x_max, int y_min, int y_max, - clover::Buffer2D &density0, clover::Buffer2D &energy0, - clover::Buffer2D &pressure, clover::Buffer2D &viscosity, - clover::Buffer2D &soundspeed, clover::Buffer2D &density1, - clover::Buffer2D &energy1, clover::Buffer2D &xvel0, - clover::Buffer2D &yvel0, clover::Buffer2D &xvel1, - clover::Buffer2D &yvel1, clover::Buffer2D &vol_flux_x, - clover::Buffer2D &vol_flux_y, - clover::Buffer2D &mass_flux_x, - clover::Buffer2D &mass_flux_y, int bottom_xmin, int bottom_xmax, + clover::Buffer2D &density0_buffer, clover::Buffer2D &energy0_buffer, + clover::Buffer2D &pressure_buffer, clover::Buffer2D &viscosity_buffer, + clover::Buffer2D &soundspeed_buffer, clover::Buffer2D &density1_buffer, + clover::Buffer2D &energy1_buffer, clover::Buffer2D &xvel0_buffer, + clover::Buffer2D &yvel0_buffer, clover::Buffer2D &xvel1_buffer, + clover::Buffer2D &yvel1_buffer, clover::Buffer2D &vol_flux_x_buffer, + clover::Buffer2D &vol_flux_y_buffer, + clover::Buffer2D &mass_flux_x_buffer, + clover::Buffer2D &mass_flux_y_buffer, int bottom_xmin, int bottom_xmax, int bottom_ymin, int bottom_ymax, - clover::Buffer2D &bottom_density0, - clover::Buffer2D &bottom_energy0, - clover::Buffer2D &bottom_pressure, - clover::Buffer2D &bottom_viscosity, - clover::Buffer2D &bottom_soundspeed, - clover::Buffer2D &bottom_density1, - clover::Buffer2D &bottom_energy1, - clover::Buffer2D &bottom_xvel0, - clover::Buffer2D &bottom_yvel0, - clover::Buffer2D &bottom_xvel1, - clover::Buffer2D &bottom_yvel1, - clover::Buffer2D &bottom_vol_flux_x, - clover::Buffer2D &bottom_vol_flux_y, - clover::Buffer2D &bottom_mass_flux_x, - clover::Buffer2D &bottom_mass_flux_y, const int fields[NUM_FIELDS], + clover::Buffer2D &bottom_density0_buffer, + clover::Buffer2D &bottom_energy0_buffer, + clover::Buffer2D &bottom_pressure_buffer, + clover::Buffer2D &bottom_viscosity_buffer, + clover::Buffer2D &bottom_soundspeed_buffer, + clover::Buffer2D &bottom_density1_buffer, + clover::Buffer2D &bottom_energy1_buffer, + clover::Buffer2D &bottom_xvel0_buffer, + clover::Buffer2D &bottom_yvel0_buffer, + clover::Buffer2D &bottom_xvel1_buffer, + clover::Buffer2D &bottom_yvel1_buffer, + clover::Buffer2D &bottom_vol_flux_x_buffer, + clover::Buffer2D &bottom_vol_flux_y_buffer, + clover::Buffer2D &bottom_mass_flux_x_buffer, + clover::Buffer2D &bottom_mass_flux_y_buffer, const int fields[NUM_FIELDS], int depth) { // Density 0 if (fields[field_density0] == 1) { for (int k = 0; k < depth; ++k) { // DO j=x_min-depth, x_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(density0) mapToFrom2D(bottom_density0)) + mapToFrom2Dfe(density0_buffer, density0) + mapToFrom2Dfe(bottom_density0_buffer, bottom_density0) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + depth + 2); j++) { - idx2(density0, j, y_min - k) = idx2(bottom_density0, j, bottom_ymax + 1 - k); + idx2f(,density0, j, y_min - k) = idx2f(,bottom_density0, j, bottom_ymax + 1 - k); } } } @@ -702,9 +794,11 @@ void update_tile_halo_b_kernel( for (int k = 0; k < depth; ++k) { // DO j=x_min-depth, x_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(density1) mapToFrom2D(bottom_density1)) + mapToFrom2Dfe(density1_buffer, density1) + mapToFrom2Dfe(bottom_density1_buffer, bottom_density1) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + depth + 2); j++) { - idx2(density1, j, y_min - k) = idx2(bottom_density1, j, bottom_ymax + 1 - k); + idx2f(,density1, j, y_min - k) = idx2f(,bottom_density1, j, bottom_ymax + 1 - k); } } } @@ -714,9 +808,11 @@ void update_tile_halo_b_kernel( for (int k = 0; k < depth; ++k) { // DO j=x_min-depth, x_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(energy0) mapToFrom2D(bottom_energy0)) + mapToFrom2Dfe(energy0_buffer, energy0) + mapToFrom2Dfe(bottom_energy0_buffer, bottom_energy0) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + depth + 2); j++) { - idx2(energy0, j, y_min - k) = idx2(bottom_energy0, j, bottom_ymax + 1 - k); + idx2f(,energy0, j, y_min - k) = idx2f(,bottom_energy0, j, bottom_ymax + 1 - k); } } } @@ -726,9 +822,11 @@ void update_tile_halo_b_kernel( for (int k = 0; k < depth; ++k) { // DO j=x_min-depth, x_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(energy1) mapToFrom2D(bottom_energy1)) + mapToFrom2Dfe(energy1_buffer, energy1) + mapToFrom2Dfe(bottom_energy1_buffer, bottom_energy1) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + depth + 2); j++) { - idx2(energy1, j, y_min - k) = idx2(bottom_energy1, j, bottom_ymax + 1 - k); + idx2f(,energy1, j, y_min - k) = idx2f(,bottom_energy1, j, bottom_ymax + 1 - k); } } } @@ -738,9 +836,11 @@ void update_tile_halo_b_kernel( for (int k = 0; k < depth; ++k) { // DO j=x_min-depth, x_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(pressure) mapToFrom2D(bottom_pressure)) + mapToFrom2Dfe(pressure_buffer, pressure) + mapToFrom2Dfe(bottom_pressure_buffer, bottom_pressure) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + depth + 2); j++) { - idx2(pressure, j, y_min - k) = idx2(bottom_pressure, j, bottom_ymax + 1 - k); + idx2f(,pressure, j, y_min - k) = idx2f(,bottom_pressure, j, bottom_ymax + 1 - k); } } } @@ -750,9 +850,11 @@ void update_tile_halo_b_kernel( for (int k = 0; k < depth; ++k) { // DO j=x_min-depth, x_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(viscosity) mapToFrom2D(bottom_viscosity)) + mapToFrom2Dfe(viscosity_buffer, viscosity) + mapToFrom2Dfe(bottom_viscosity_buffer, bottom_viscosity) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + depth + 2); j++) { - idx2(viscosity, j, y_min - k) = idx2(bottom_viscosity, j, bottom_ymax + 1 - k); + idx2f(,viscosity, j, y_min - k) = idx2f(,bottom_viscosity, j, bottom_ymax + 1 - k); } } } @@ -762,9 +864,11 @@ void update_tile_halo_b_kernel( for (int k = 0; k < depth; ++k) { // DO j=x_min-depth, x_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(soundspeed) mapToFrom2D(bottom_soundspeed)) + mapToFrom2Dfe(soundspeed_buffer, soundspeed) + mapToFrom2Dfe(bottom_soundspeed_buffer, bottom_soundspeed) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + depth + 2); j++) { - idx2(soundspeed, j, y_min - k) = idx2(bottom_soundspeed, j, bottom_ymax + 1 - k); + idx2f(,soundspeed, j, y_min - k) = idx2f(,bottom_soundspeed, j, bottom_ymax + 1 - k); } } } @@ -774,9 +878,11 @@ void update_tile_halo_b_kernel( for (int k = 0; k < depth; ++k) { // DO j=x_min-depth, x_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(xvel0) mapToFrom2D(bottom_xvel0)) + mapToFrom2Dfe(xvel0_buffer, xvel0) + mapToFrom2Dfe(bottom_xvel0_buffer, bottom_xvel0) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + 1 + depth + 2); j++) { - idx2(xvel0, j, y_min - k) = idx2(bottom_xvel0, j, bottom_ymax + 1 - k); + idx2f(,xvel0, j, y_min - k) = idx2f(,bottom_xvel0, j, bottom_ymax + 1 - k); } } } @@ -786,9 +892,11 @@ void update_tile_halo_b_kernel( for (int k = 0; k < depth; ++k) { // DO j=x_min-depth, x_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(xvel1) mapToFrom2D(bottom_xvel1)) + mapToFrom2Dfe(xvel1_buffer, xvel1) + mapToFrom2Dfe(bottom_xvel1_buffer, bottom_xvel1) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + 1 + depth + 2); j++) { - idx2(xvel1, j, y_min - k) = idx2(bottom_xvel1, j, bottom_ymax + 1 - k); + idx2f(,xvel1, j, y_min - k) = idx2f(,bottom_xvel1, j, bottom_ymax + 1 - k); } } } @@ -798,9 +906,11 @@ void update_tile_halo_b_kernel( for (int k = 0; k < depth; ++k) { // DO j=x_min-depth, x_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(yvel0) mapToFrom2D(bottom_yvel0)) + mapToFrom2Dfe(yvel0_buffer, yvel0) + mapToFrom2Dfe(bottom_yvel0_buffer, bottom_yvel0) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + 1 + depth + 2); j++) { - idx2(yvel0, j, y_min - k) = idx2(bottom_yvel0, j, bottom_ymax + 1 - k); + idx2f(,yvel0, j, y_min - k) = idx2f(,bottom_yvel0, j, bottom_ymax + 1 - k); } } } @@ -810,9 +920,11 @@ void update_tile_halo_b_kernel( for (int k = 0; k < depth; ++k) { // DO j=x_min-depth, x_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(yvel1) mapToFrom2D(bottom_yvel1)) + mapToFrom2Dfe(yvel1_buffer, yvel1) + mapToFrom2Dfe(bottom_yvel1_buffer, bottom_yvel1) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + 1 + depth + 2); j++) { - idx2(yvel1, j, y_min - k) = idx2(bottom_yvel1, j, bottom_ymax + 1 - k); + idx2f(,yvel1, j, y_min - k) = idx2f(,bottom_yvel1, j, bottom_ymax + 1 - k); } } } @@ -822,9 +934,11 @@ void update_tile_halo_b_kernel( for (int k = 0; k < depth; ++k) { // DO j=x_min-depth, x_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(vol_flux_x) mapToFrom2D(bottom_vol_flux_x)) + mapToFrom2Dfe(vol_flux_x_buffer, vol_flux_x) + mapToFrom2Dfe(bottom_vol_flux_x_buffer, bottom_vol_flux_x) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + 1 + depth + 2); j++) { - idx2(vol_flux_x, j, y_min - k) = idx2(bottom_vol_flux_x, j, bottom_ymax + 1 - k); + idx2f(,vol_flux_x, j, y_min - k) = idx2f(,bottom_vol_flux_x, j, bottom_ymax + 1 - k); } } } @@ -834,9 +948,11 @@ void update_tile_halo_b_kernel( for (int k = 0; k < depth; ++k) { // DO j=x_min-depth, x_max+1+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(mass_flux_x) mapToFrom2D(bottom_mass_flux_x)) + mapToFrom2Dfe(mass_flux_x_buffer, mass_flux_x) + mapToFrom2Dfe(bottom_mass_flux_x_buffer, bottom_mass_flux_x) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + 1 + depth + 2); j++) { - idx2(mass_flux_x, j, y_min - k) = idx2(bottom_mass_flux_x, j, bottom_ymax + 1 - k); + idx2f(,mass_flux_x, j, y_min - k) = idx2f(,bottom_mass_flux_x, j, bottom_ymax + 1 - k); } } } @@ -846,9 +962,11 @@ void update_tile_halo_b_kernel( for (int k = 0; k < depth; ++k) { // DO j=x_min-depth, x_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(vol_flux_y) mapToFrom2D(bottom_vol_flux_y)) + mapToFrom2Dfe(vol_flux_y_buffer, vol_flux_y) + mapToFrom2Dfe(bottom_vol_flux_y_buffer, bottom_vol_flux_y) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + depth + 2); j++) { - idx2(vol_flux_y, j, y_min - k) = idx2(bottom_vol_flux_y, j, bottom_ymax + 1 - k); + idx2f(,vol_flux_y, j, y_min - k) = idx2f(,bottom_vol_flux_y, j, bottom_ymax + 1 - k); } } } @@ -858,9 +976,11 @@ void update_tile_halo_b_kernel( for (int k = 0; k < depth; ++k) { // DO j=x_min-depth, x_max+depth - omp(parallel(1) enable_target(use_target) mapToFrom2D(mass_flux_y) mapToFrom2D(bottom_mass_flux_y)) + mapToFrom2Dfe(mass_flux_y_buffer, mass_flux_y) + mapToFrom2Dfe(bottom_mass_flux_y_buffer, bottom_mass_flux_y) + omp(parallel(1) enable_target(use_target)) for (int j = (x_min - depth + 1); j < (x_max + depth + 2); j++) { - idx2(mass_flux_y, j, y_min - k) = idx2(bottom_mass_flux_y, j, bottom_ymax + 1 - k); + idx2f(,mass_flux_y, j, y_min - k) = idx2f(,bottom_mass_flux_y, j, bottom_ymax + 1 - k); } } } diff --git a/src/viscosity.cpp b/src/viscosity.cpp index 16e49d2..0fa95df 100644 --- a/src/viscosity.cpp +++ b/src/viscosity.cpp @@ -29,46 +29,40 @@ void viscosity_kernel( bool use_target, int x_min, int x_max, int y_min, int y_max, - clover::Buffer1D &celldx, - clover::Buffer1D &celldy, - clover::Buffer2D &density0, - clover::Buffer2D &pressure, - clover::Buffer2D &viscosity, - clover::Buffer2D &xvel0, - clover::Buffer2D &yvel0) { + field_type &field) { // DO k=y_min,y_max // DO j=x_min,x_max - omp(parallel(2) enable_target(use_target) - mapToFrom1D(celldx) - mapToFrom1D(celldy) - mapToFrom2D(density0) - mapToFrom2D(pressure) - mapToFrom2D(viscosity) - mapToFrom2D(xvel0) - mapToFrom2D(yvel0) - ) + mapToFrom1Df(field, celldx) + mapToFrom1Df(field, celldy) + mapToFrom2Df(field, density0) + mapToFrom2Df(field, pressure) + mapToFrom2Df(field, viscosity) + mapToFrom2Df(field, xvel0) + mapToFrom2Df(field, yvel0) + + omp(parallel(2) enable_target(use_target)) for (int j = (y_min + 1); j < (y_max + 2); j++) { for (int i = (x_min + 1); i < (x_max + 2); i++) { - double ugrad = (idx2(xvel0, i + 1, j + 0) + idx2(xvel0, i + 1, j + 1)) - (idx2(xvel0, i, j) + idx2(xvel0, i + 0, j + 1)); - double vgrad = (idx2(yvel0, i + 0, j + 1) + idx2(yvel0, i + 1, j + 1)) - (idx2(yvel0, i, j) + idx2(yvel0, i + 1, j + 0)); - double div = (idx1(celldx, i) * (ugrad) + idx1(celldy, j) * (vgrad)); - double strain2 = 0.5 * (idx2(xvel0, i + 0, j + 1) + - idx2(xvel0, i + 1, j + 1) - - idx2(xvel0, i, j) - - idx2(xvel0, i + 1, j + 0)) / idx1(celldy, j) + - 0.5 * (idx2(yvel0, i + 1, j + 0) + - idx2(yvel0, i + 1, j + 1) - - idx2(yvel0, i, j) - - idx2(yvel0, i + 0, j + 1)) / idx1(celldx, i); - double pgradx = (idx2(pressure, i + 1, j + 0) - idx2(pressure, i - 1, j + 0)) / (idx1(celldx, i) + idx1(celldx, i + 1)); - double pgrady = (idx2(pressure, i + 0, j + 1) - idx2(pressure, i + 0, j - 1)) / (idx1(celldy, j) + idx1(celldy, j + 2)); + double ugrad = (idx2f(field, xvel0, i + 1, j + 0) + idx2f(field, xvel0, i + 1, j + 1)) - (idx2f(field, xvel0, i, j) + idx2f(field, xvel0, i + 0, j + 1)); + double vgrad = (idx2f(field, yvel0, i + 0, j + 1) + idx2f(field, yvel0, i + 1, j + 1)) - (idx2f(field, yvel0, i, j) + idx2f(field, yvel0, i + 1, j + 0)); + double div = (idx1f(field, celldx, i) * (ugrad) + idx1f(field, celldy, j) * (vgrad)); + double strain2 = 0.5 * (idx2f(field, xvel0, i + 0, j + 1) + + idx2f(field, xvel0, i + 1, j + 1) - + idx2f(field, xvel0, i, j) - + idx2f(field, xvel0, i + 1, j + 0)) / idx1f(field, celldy, j) + + 0.5 * (idx2f(field, yvel0, i + 1, j + 0) + + idx2f(field, yvel0, i + 1, j + 1) - + idx2f(field, yvel0, i, j) - + idx2f(field, yvel0, i + 0, j + 1)) / idx1f(field, celldx, i); + double pgradx = (idx2f(field, pressure, i + 1, j + 0) - idx2f(field, pressure, i - 1, j + 0)) / (idx1f(field, celldx, i) + idx1f(field, celldx, i + 1)); + double pgrady = (idx2f(field, pressure, i + 0, j + 1) - idx2f(field, pressure, i + 0, j - 1)) / (idx1f(field, celldy, j) + idx1f(field, celldy, j + 2)); double pgradx2 = pgradx * pgradx; double pgrady2 = pgrady * pgrady; - double limiter = ((0.5 * (ugrad) / idx1(celldx, i)) * pgradx2 + - (0.5 * (vgrad) / idx1(celldy, j)) * pgrady2 + strain2 * pgradx * pgrady) / + double limiter = ((0.5 * (ugrad) / idx1f(field, celldx, i)) * pgradx2 + + (0.5 * (vgrad) / idx1f(field, celldy, j)) * pgrady2 + strain2 * pgradx * pgrady) / std::fmax(pgradx2 + pgrady2, g_small); - if ((limiter > 0.0) || (div >= 0.0)) { idx2(viscosity, i, j) = 0.0; } + if ((limiter > 0.0) || (div >= 0.0)) { idx2f(field, viscosity, i, j) = 0.0; } else { double dirx = 1.0; if (pgradx < 0.0)dirx = -1.0; @@ -77,11 +71,11 @@ void viscosity_kernel( if (pgradx < 0.0)diry = -1.0; pgrady = diry * std::fmax(g_small, std::fabs(pgrady)); double pgrad = std::sqrt(pgradx * pgradx + pgrady * pgrady); - double xgrad = std::fabs(idx1(celldx, i) * pgrad / pgradx); - double ygrad = std::fabs(idx1(celldy, j) * pgrad / pgrady); + double xgrad = std::fabs(idx1f(field, celldx, i) * pgrad / pgradx); + double ygrad = std::fabs(idx1f(field, celldy, j) * pgrad / pgrady); double grad = std::fmin(xgrad, ygrad); double grad2 = grad * grad; - idx2(viscosity, i, j) = 2.0 * idx2(density0, i, j) * grad2 * limiter * limiter; + idx2f(field, viscosity, i, j) = 2.0 * idx2f(field, density0, i, j) * grad2 * limiter * limiter; } } } @@ -101,10 +95,11 @@ void viscosity(global_variables &globals) { for (int tile = 0; tile < globals.config.tiles_per_chunk; ++tile) { tile_type &t = globals.chunk.tiles[tile]; viscosity_kernel(globals.use_target, - t.info.t_xmin, t.info.t_xmax, t.info.t_ymin, t.info.t_ymax, - t.field.celldx, t.field.celldy, t.field.density0, - t.field.pressure, t.field.viscosity, t.field.xvel0, - t.field.yvel0); + t.info.t_xmin, + t.info.t_xmax, + t.info.t_ymin, + t.info.t_ymax, + t.field); } #if FLUSH_BUFFER