cholla-hydro · evaneschneider · May 22, 2024 · Feb 26, 2024 · Feb 5, 2024 · Feb 5, 2024
diff --git a/src/integrators/VL_1D_cuda.cu b/src/integrators/VL_1D_cuda.cu
@@ -92,7 +92,7 @@ void VL_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Rea
                      n_fields);
   #endif
   #ifdef PLMC
-  hipLaunchKernelGGL(PLMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0,
+  hipLaunchKernelGGL(PLMC_cuda<0>, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama,
                      n_fields);
   #endif
   #ifdef PLMP
@@ -104,7 +104,7 @@ void VL_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Rea
                      gama, 0, n_fields);
   #endif
   #ifdef PPMC
-  hipLaunchKernelGGL(PPMC_VL, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, gama, 0);
+  hipLaunchKernelGGL(PPMC_VL<0>, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, gama);
   #endif
   GPU_Error_Check();
 

diff --git a/src/integrators/VL_2D_cuda.cu b/src/integrators/VL_2D_cuda.cu
@@ -102,10 +102,10 @@ void VL_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_of
                      dt, gama, 1, n_fields);
   #endif
   #ifdef PLMC
-  hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama,
-                     0, n_fields);
-  hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama,
-                     1, n_fields);
+  hipLaunchKernelGGL(PLMC_cuda<0>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt,
+                     gama, n_fields);
+  hipLaunchKernelGGL(PLMC_cuda<1>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, dy, dt,
+                     gama, n_fields);
   #endif
   #ifdef PPMP
   hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx,
@@ -114,8 +114,8 @@ void VL_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_of
                      dt, gama, 1, n_fields);
   #endif  // PPMP
   #ifdef PPMC
-  hipLaunchKernelGGL(PPMC_VL, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, gama, 0);
-  hipLaunchKernelGGL(PPMC_VL, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, gama, 1);
+  hipLaunchKernelGGL(PPMC_VL<0>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, gama);
+  hipLaunchKernelGGL(PPMC_VL<1>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, gama);
   #endif  // PPMC
   GPU_Error_Check();
 

diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu
@@ -145,8 +145,7 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int
 
   // Step 2: Calculate first-order upwind fluxes
   #ifdef EXACT
-  cuda_utilities::AutomaticLaunchParams static const exact_launch_params(Calculate_Exact_Fluxes_CUDA,
-                                                                         n_cellsCalculate_Exact_Fluxes_CUDA);
+  cuda_utilities::AutomaticLaunchParams static const exact_launch_params(Calculate_Exact_Fluxes_CUDA, n_cells);
   hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, exact_launch_params.get_numBlocks(),
                      exact_launch_params.get_threadsPerBlock(), 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0,
                      n_fields);
@@ -250,13 +249,13 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int
                      dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, n_fields);
   #endif  // PLMP
   #ifdef PLMC
-  cuda_utilities::AutomaticLaunchParams static const plmc_vl_launch_params(PLMC_cuda, n_cells);
-  hipLaunchKernelGGL(PLMC_cuda, plmc_vl_launch_params.get_numBlocks(), plmc_vl_launch_params.get_threadsPerBlock(), 0,
-                     0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0, n_fields);
-  hipLaunchKernelGGL(PLMC_cuda, plmc_vl_launch_params.get_numBlocks(), plmc_vl_launch_params.get_threadsPerBlock(), 0,
-                     0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, 1, n_fields);
-  hipLaunchKernelGGL(PLMC_cuda, plmc_vl_launch_params.get_numBlocks(), plmc_vl_launch_params.get_threadsPerBlock(), 0,
-                     0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama, 2, n_fields);
+  cuda_utilities::AutomaticLaunchParams static const plmc_vl_launch_params(PLMC_cuda<0>, n_cells);
+  hipLaunchKernelGGL(PLMC_cuda<0>, plmc_vl_launch_params.get_numBlocks(), plmc_vl_launch_params.get_threadsPerBlock(),
+                     0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, n_fields);
+  hipLaunchKernelGGL(PLMC_cuda<1>, plmc_vl_launch_params.get_numBlocks(), plmc_vl_launch_params.get_threadsPerBlock(),
+                     0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, n_fields);
+  hipLaunchKernelGGL(PLMC_cuda<2>, plmc_vl_launch_params.get_numBlocks(), plmc_vl_launch_params.get_threadsPerBlock(),
+                     0, 0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama, n_fields);
   #endif  // PLMC
   #ifdef PPMP
   cuda_utilities::AutomaticLaunchParams static const ppmp_launch_params(PPMP_cuda, n_cells);
@@ -268,13 +267,13 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int
                      dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, n_fields);
   #endif  // PPMP
   #ifdef PPMC
-  cuda_utilities::AutomaticLaunchParams static const ppmc_vl_launch_params(PPMC_VL, n_cells);
-  hipLaunchKernelGGL(PPMC_VL, ppmc_vl_launch_params.get_numBlocks(), ppmc_vl_launch_params.get_threadsPerBlock(), 0, 0,
-                     dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, gama, 0);
-  hipLaunchKernelGGL(PPMC_VL, ppmc_vl_launch_params.get_numBlocks(), ppmc_vl_launch_params.get_threadsPerBlock(), 0, 0,
-                     dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, gama, 1);
-  hipLaunchKernelGGL(PPMC_VL, ppmc_vl_launch_params.get_numBlocks(), ppmc_vl_launch_params.get_threadsPerBlock(), 0, 0,
-                     dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, gama, 2);
+  cuda_utilities::AutomaticLaunchParams static const ppmc_vl_launch_params(PPMC_VL<0>, n_cells);
+  hipLaunchKernelGGL(PPMC_VL<0>, ppmc_vl_launch_params.get_numBlocks(), ppmc_vl_launch_params.get_threadsPerBlock(), 0,
+                     0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, gama);
+  hipLaunchKernelGGL(PPMC_VL<1>, ppmc_vl_launch_params.get_numBlocks(), ppmc_vl_launch_params.get_threadsPerBlock(), 0,
+                     0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, gama);
+  hipLaunchKernelGGL(PPMC_VL<2>, ppmc_vl_launch_params.get_numBlocks(), ppmc_vl_launch_params.get_threadsPerBlock(), 0,
+                     0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, gama);
   #endif  // PPMC
   GPU_Error_Check();
 

diff --git a/src/integrators/simple_1D_cuda.cu b/src/integrators/simple_1D_cuda.cu
@@ -64,7 +64,7 @@ void Simple_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost,
   GPU_Error_Check();
 #endif
 #ifdef PLMC
-  hipLaunchKernelGGL(PLMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0,
+  hipLaunchKernelGGL(PLMC_cuda<0>, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama,
                      n_fields);
   GPU_Error_Check();
 #endif
@@ -74,7 +74,7 @@ void Simple_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost,
   GPU_Error_Check();
 #endif
 #ifdef PPMC
-  hipLaunchKernelGGL(PPMC_CTU, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0);
+  hipLaunchKernelGGL(PPMC_CTU<0>, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama);
   GPU_Error_Check();
 #endif
 

diff --git a/src/integrators/simple_2D_cuda.cu b/src/integrators/simple_2D_cuda.cu
@@ -65,9 +65,9 @@ void Simple_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int
                      gama, 1, n_fields);
 #endif
 #ifdef PLMC
-  hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0,
+  hipLaunchKernelGGL(PLMC_cuda<0>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama,
                      n_fields);
-  hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, 1,
+  hipLaunchKernelGGL(PLMC_cuda<1>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama,
                      n_fields);
 #endif
 #ifdef PPMP
@@ -77,8 +77,8 @@ void Simple_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int
                      gama, 1, n_fields);
 #endif
 #ifdef PPMC
-  hipLaunchKernelGGL(PPMC_CTU, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0);
-  hipLaunchKernelGGL(PPMC_CTU, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, 1);
+  hipLaunchKernelGGL(PPMC_CTU<0>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama);
+  hipLaunchKernelGGL(PPMC_CTU<1>, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama);
 #endif
   GPU_Error_Check();
 

diff --git a/src/integrators/simple_3D_cuda.cu b/src/integrators/simple_3D_cuda.cu
@@ -99,11 +99,11 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx,
                      gama, 2, n_fields);
   #endif  // PLMP
   #ifdef PLMC
-  hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0,
+  hipLaunchKernelGGL(PLMC_cuda<0>, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama,
                      n_fields);
-  hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, 1,
+  hipLaunchKernelGGL(PLMC_cuda<1>, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama,
                      n_fields);
-  hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama, 2,
+  hipLaunchKernelGGL(PLMC_cuda<2>, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama,
                      n_fields);
   #endif
   #ifdef PPMP
@@ -115,9 +115,9 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx,
                      gama, 2, n_fields);
   #endif  // PPMP
   #ifdef PPMC
-  hipLaunchKernelGGL(PPMC_CTU, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0);
-  hipLaunchKernelGGL(PPMC_CTU, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, 1);
-  hipLaunchKernelGGL(PPMC_CTU, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama, 2);
+  hipLaunchKernelGGL(PPMC_CTU<0>, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama);
+  hipLaunchKernelGGL(PPMC_CTU<1>, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama);
+  hipLaunchKernelGGL(PPMC_CTU<2>, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama);
   GPU_Error_Check();
   #endif  // PPMC
 

diff --git a/src/reconstruction/plmc_cuda.cu b/src/reconstruction/plmc_cuda.cu
@@ -21,8 +21,9 @@
  gamma, int dir)
  *  \brief When passed a stencil of conserved variables, returns the left and
  right boundary values for the interface calculated using plm. */
+template <int dir>
 __global__ __launch_bounds__(TPB) void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx,
-                                                 int ny, int nz, Real dx, Real dt, Real gamma, int dir, int n_fields)
+                                                 int ny, int nz, Real dx, Real dt, Real gamma, int n_fields)
 {
   // get a thread ID
   int const thread_id = threadIdx.x + blockIdx.x * blockDim.x;
@@ -60,15 +61,15 @@ __global__ __launch_bounds__(TPB) void PLMC_cuda(Real *dev_conserved, Real *dev_
   // load the 3-cell stencil into registers
   // cell i
   hydro_utilities::Primitive const cell_i =
-      reconstruction::Load_Data(dev_conserved, xid, yid, zid, nx, ny, n_cells, o1, o2, o3, gamma);
+      hydro_utilities::Load_Cell_Primitive<dir>(dev_conserved, xid, yid, zid, nx, ny, n_cells, gamma);
 
   // cell i-1. The equality checks the direction and will subtract one from the correct direction
-  hydro_utilities::Primitive const cell_imo = reconstruction::Load_Data(
-      dev_conserved, xid - int(dir == 0), yid - int(dir == 1), zid - int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma);
+  hydro_utilities::Primitive const cell_imo = hydro_utilities::Load_Cell_Primitive<dir>(
+      dev_conserved, xid - int(dir == 0), yid - int(dir == 1), zid - int(dir == 2), nx, ny, n_cells, gamma);
 
   // cell i+1. The equality checks the direction and add one to the correct direction
-  hydro_utilities::Primitive const cell_ipo = reconstruction::Load_Data(
-      dev_conserved, xid + int(dir == 0), yid + int(dir == 1), zid + int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma);
+  hydro_utilities::Primitive const cell_ipo = hydro_utilities::Load_Cell_Primitive<dir>(
+      dev_conserved, xid + int(dir == 0), yid + int(dir == 1), zid + int(dir == 2), nx, ny, n_cells, gamma);
 
   // calculate the adiabatic sound speed in cell i
   Real const sound_speed         = hydro_utilities::Calc_Sound_Speed(cell_i.pressure, cell_i.density, gamma);
@@ -295,3 +296,14 @@ __global__ __launch_bounds__(TPB) void PLMC_cuda(Real *dev_conserved, Real *dev_
   id = cuda_utilities::compute1DIndex(xid - int(dir == 0), yid - int(dir == 1), zid - int(dir == 2), nx, ny);
   reconstruction::Write_Data(interface_R_imh, dev_bounds_R, dev_conserved, id, n_cells, o1, o2, o3, gamma);
 }
+
+// Instantiate the relevant template specifications
+template __global__ __launch_bounds__(TPB) void PLMC_cuda<0>(Real *dev_conserved, Real *dev_bounds_L,
+                                                             Real *dev_bounds_R, int nx, int ny, int nz, Real dx,
+                                                             Real dt, Real gamma, int n_fields);
+template __global__ __launch_bounds__(TPB) void PLMC_cuda<1>(Real *dev_conserved, Real *dev_bounds_L,
+                                                             Real *dev_bounds_R, int nx, int ny, int nz, Real dx,
+                                                             Real dt, Real gamma, int n_fields);
+template __global__ __launch_bounds__(TPB) void PLMC_cuda<2>(Real *dev_conserved, Real *dev_bounds_L,
+                                                             Real *dev_bounds_R, int nx, int ny, int nz, Real dx,
+                                                             Real dt, Real gamma, int n_fields);
diff --git a/src/reconstruction/plmc_cuda.h b/src/reconstruction/plmc_cuda.h
@@ -15,7 +15,8 @@
  gamma, int dir)
  *  \brief When passed a stencil of conserved variables, returns the left and
  right boundary values for the interface calculated using plm. */
+template <int dir>
 __global__ __launch_bounds__(TPB) void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx,
-                                                 int ny, int nz, Real dx, Real dt, Real gamma, int dir, int n_fields);
+                                                 int ny, int nz, Real dx, Real dt, Real gamma, int n_fields);
 
 #endif  // PLMC_CUDA_H
diff --git a/src/reconstruction/plmc_cuda_tests.cu b/src/reconstruction/plmc_cuda_tests.cu
@@ -148,8 +148,21 @@ TEST(tHYDROPlmcReconstructor, CorrectInputExpectCorrectOutput)
     cuda_utilities::DeviceVector<double> dev_interface_right(host_grid.size(), true);
 
     // Launch kernel
-    hipLaunchKernelGGL(PLMC_cuda, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(),
-                       dev_interface_right.data(), nx_rot, ny_rot, nz_rot, dx, dt, gamma, direction, n_fields);
+    std::cout << "direction = " << direction << std::endl;
+    switch (direction) {
+      case 0:
+        hipLaunchKernelGGL(PLMC_cuda<0>, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(),
+                           dev_interface_right.data(), nx_rot, ny_rot, nz_rot, dx, dt, gamma, n_fields);
+        break;
+      case 1:
+        hipLaunchKernelGGL(PLMC_cuda<1>, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(),
+                           dev_interface_right.data(), nx_rot, ny_rot, nz_rot, dx, dt, gamma, n_fields);
+        break;
+      case 2:
+        hipLaunchKernelGGL(PLMC_cuda<2>, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(),
+                           dev_interface_right.data(), nx_rot, ny_rot, nz_rot, dx, dt, gamma, n_fields);
+        break;
+    }
     GPU_Error_Check();
     GPU_Error_Check(cudaDeviceSynchronize());
 
@@ -261,8 +274,20 @@ TEST(tMHDPlmcReconstructor, CorrectInputExpectCorrectOutput)
     cuda_utilities::DeviceVector<double> dev_interface_right(n_cells_interface, true);
 
     // Launch kernel
-    hipLaunchKernelGGL(PLMC_cuda, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(),
-                       dev_interface_right.data(), nx, ny, nz, dx, dt, gamma, direction, n_fields);
+    switch (direction) {
+      case 0:
+        hipLaunchKernelGGL(PLMC_cuda<0>, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(),
+                           dev_interface_right.data(), nx, ny, nz, dx, dt, gamma, n_fields);
+        break;
+      case 1:
+        hipLaunchKernelGGL(PLMC_cuda<1>, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(),
+                           dev_interface_right.data(), nx, ny, nz, dx, dt, gamma, n_fields);
+        break;
+      case 2:
+        hipLaunchKernelGGL(PLMC_cuda<2>, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(),
+                           dev_interface_right.data(), nx, ny, nz, dx, dt, gamma, n_fields);
+        break;
+    }
     GPU_Error_Check();
     GPU_Error_Check(cudaDeviceSynchronize());