Merge pull request eth-cscs#79 from eth-cscs/device_type

Enable device_type to be none
marinlauber · Dec 1, 2023 · 36ecf9b · 36ecf9b
2 parents c9d6a55 + 9b668ac
commit 36ecf9b
Show file tree

Hide file tree

Showing 3 changed files with 57 additions and 17 deletions.
diff --git a/src/init_global_grid.jl b/src/init_global_grid.jl
@@ -18,8 +18,8 @@ Initialize a Cartesian grid of MPI processes (and also MPI itself by default) de
  - `reorder::Integer=1`: the reorder argument to `MPI.Cart_create` in order to create the Cartesian process topology.
  - `comm::MPI.Comm=MPI.COMM_WORLD`: the input communicator argument to `MPI.Cart_create` in order to create the Cartesian process topology.
  - `init_MPI::Bool=true`: whether to initialize MPI (`true`) or not (`false`).
- - `device_type::String="auto"`: the type of the device to be used if available: "CUDA", "AMDGPU" or "auto". If `device_type` is "auto" (default), it is automatically determined, depending on which of the modules used for programming the devices (CUDA.jl or AMDGPU.jl) is functional; if both are functional, an error will be given if `device_type` is set as "auto".
- - `select_device::Bool=true`: whether to automatically select the device (GPU) (`true`) or not (`false`) if CUDA is functional. If `true`, it selects the device corresponding to the node-local MPI rank. This method of device selection suits both single and multi-device compute nodes and is recommended in general. It is also the default method of device selection of the *function* [`select_device`](@ref).
+ - `device_type::String="auto"`: the type of the device to be used if available: `"CUDA"`, `"AMDGPU"`, `"none"` or `"auto"`. Set `device_type="none"` if you want to use only CPUs on a system having also GPUs. If `device_type` is `"auto"` (default), it is automatically determined, depending on which of the modules used for programming the devices (CUDA.jl or AMDGPU.jl) is functional; if both are functional, an error will be given if `device_type` is set as `"auto"`.
+ - `select_device::Bool=true`: whether to automatically select the device (GPU) (`true`) or not (`false`) if CUDA or AMDGPU is functional and `device_type` not `"none"`. If `true`, it selects the device corresponding to the node-local MPI rank. This method of device selection suits both single and multi-device compute nodes and is recommended in general. It is also the default method of device selection of the *function* [`select_device`](@ref).
  For more information, refer to the documentation of MPI.jl / MPI.
 
 # Return values
@@ -68,10 +68,12 @@ function init_global_grid(nx::Integer, ny::Integer, nz::Integer; dimx::Integer=0
  if haskey(ENV, "IGG_LOOPVECTORIZATION_DIMY") loopvectorization[2] = (parse(Int64, ENV["IGG_LOOPVECTORIZATION_DIMY"]) > 0); end
  if haskey(ENV, "IGG_LOOPVECTORIZATION_DIMZ") loopvectorization[3] = (parse(Int64, ENV["IGG_LOOPVECTORIZATION_DIMZ"]) > 0); end
  end
- if !(device_type in [DEVICE_TYPE_AUTO, DEVICE_TYPE_CUDA, DEVICE_TYPE_AMDGPU]) error("Argument `device_type`: invalid value obtained ($device_type). Valid values are: $DEVICE_TYPE_CUDA, $DEVICE_TYPE_AMDGPU, $DEVICE_TYPE_AUTO") end
+ if !(device_type in [DEVICE_TYPE_NONE, DEVICE_TYPE_AUTO, DEVICE_TYPE_CUDA, DEVICE_TYPE_AMDGPU]) error("Argument `device_type`: invalid value obtained ($device_type). Valid values are: $DEVICE_TYPE_CUDA, $DEVICE_TYPE_AMDGPU, $DEVICE_TYPE_NONE, $DEVICE_TYPE_AUTO") end
  if ((device_type == DEVICE_TYPE_AUTO) && cuda_functional() && amdgpu_functional()) error("Automatic detection of the device type to be used not possible: both CUDA and AMDGPU are functional. Set keyword argument `device_type` to $DEVICE_TYPE_CUDA or $DEVICE_TYPE_AMDGPU.") end
- if (device_type in [DEVICE_TYPE_CUDA, DEVICE_TYPE_AUTO]) cuda_enabled = cuda_functional() end # NOTE: cuda could be enabled/disabled depending on some additional criteria.
- if (device_type in [DEVICE_TYPE_AMDGPU, DEVICE_TYPE_AUTO]) amdgpu_enabled = amdgpu_functional() end # NOTE: amdgpu could be enabled/disabled depending on some additional criteria.
+ if (device_type != DEVICE_TYPE_NONE)
+ if (device_type in [DEVICE_TYPE_CUDA, DEVICE_TYPE_AUTO]) cuda_enabled = cuda_functional() end # NOTE: cuda could be enabled/disabled depending on some additional criteria.
+ if (device_type in [DEVICE_TYPE_AMDGPU, DEVICE_TYPE_AUTO]) amdgpu_enabled = amdgpu_functional() end # NOTE: amdgpu could be enabled/disabled depending on some additional criteria.
+ end
  if (any(nxyz .< 1)) error("Invalid arguments: nx, ny, and nz cannot be less than 1."); end
  if (any(dims .< 0)) error("Invalid arguments: dimx, dimy, and dimz cannot be negative."); end
  if (any(periods .∉ ((0,1),))) error("Invalid arguments: periodx, periody, and periodz must be either 0 or 1."); end

diff --git a/src/shared.jl b/src/shared.jl
@@ -30,6 +30,7 @@ const NDIMS_MPI = 3 # Internally, we set the number of dimens
 const NNEIGHBORS_PER_DIM = 2 # Number of neighbors per dimension (left neighbor + right neighbor).
 const GG_ALLOC_GRANULARITY = 32 # Internal buffers are allocated with a granulariy of GG_ALLOC_GRANULARITY elements in order to ensure correct reinterpretation when used for different types and to reduce amount of re-allocations.
 const GG_THREADCOPY_THRESHOLD = 32768 # When LoopVectorization is deactivated, then the GG_THREADCOPY_THRESHOLD defines the size in bytes upon which memory copy is performed with multiple threads.
+const DEVICE_TYPE_NONE = "none"
 const DEVICE_TYPE_AUTO = "auto"
 const DEVICE_TYPE_CUDA = "CUDA"
 const DEVICE_TYPE_AMDGPU = "AMDGPU"

diff --git a/test/test_select_device.jl b/test/test_select_device.jl
@@ -16,20 +16,57 @@ nprocs = MPI.Comm_size(MPI.COMM_WORLD); # NOTE: these tests can run with any num
 
 @testset "$(basename(@__FILE__)) (processes: $nprocs)" begin
  @testset "1. select_device" begin
- @static if test_cuda
- me, = init_global_grid(3, 4, 5; quiet=true, init_MPI=false, device_type="CUDA");
- gpu_id = select_device();
- @test gpu_id < length(CUDA.devices())
- finalize_global_grid(finalize_MPI=false);
+ @static if test_cuda && !test_amdgpu
+ @testset "\"CUDA\"" begin
+ me, = init_global_grid(3, 4, 5; quiet=true, init_MPI=false, device_type="CUDA");
+ gpu_id = select_device();
+ @test gpu_id < length(CUDA.devices())
+ finalize_global_grid(finalize_MPI=false);
+ end;
+ @testset "\"auto\"" begin
+ me, = init_global_grid(3, 4, 5; quiet=true, init_MPI=false, device_type="auto");
+ gpu_id = select_device();
+ @test gpu_id < length(CUDA.devices())
+ finalize_global_grid(finalize_MPI=false);
+ end;
  end
- @static if test_amdgpu
- me, = init_global_grid(3, 4, 5; quiet=true, init_MPI=false, device_type="AMDGPU");
- gpu_id = select_device();
- @test gpu_id < length(AMDGPU.devices())
- finalize_global_grid(finalize_MPI=false);
+ @static if test_amdgpu && !test_cuda
+ @testset "\"AMDGPU\"" begin
+ me, = init_global_grid(3, 4, 5; quiet=true, init_MPI=false, device_type="AMDGPU");
+ gpu_id = select_device();
+ @test gpu_id < length(AMDGPU.devices())
+ finalize_global_grid(finalize_MPI=false);
+ end;
+ @testset "\"auto\"" begin
+ me, = init_global_grid(3, 4, 5; quiet=true, init_MPI=false, device_type="auto");
+ gpu_id = select_device();
+ @test gpu_id < length(AMDGPU.devices())
+ finalize_global_grid(finalize_MPI=false);
+ end;
+ end
+ @static if !(test_cuda || test_amdgpu) || (test_cuda && test_amdgpu)
+ @testset "\"auto\"" begin
+ me, = init_global_grid(3, 4, 5; quiet=true, init_MPI=false, device_type="auto");
+ @test_throws ErrorException select_device()
+ finalize_global_grid(finalize_MPI=false);
+ end;
+ end
+ @static if !test_cuda
+ @testset "\"CUDA\"" begin
+ me, = init_global_grid(3, 4, 5; quiet=true, init_MPI=false, device_type="CUDA");
+ @test_throws ErrorException select_device()
+ finalize_global_grid(finalize_MPI=false);
+ end;
+ end
+ @static if !test_amdgpu
+ @testset "\"AMDGPU\"" begin
+ me, = init_global_grid(3, 4, 5; quiet=true, init_MPI=false, device_type="AMDGPU");
+ @test_throws ErrorException select_device()
+ finalize_global_grid(finalize_MPI=false);
+ end;
  end
- @static if !(test_cuda || test_amdgpu)
- me, = init_global_grid(3, 4, 5; quiet=true, init_MPI=false);
+ @testset "\"none\"" begin
+ me, = init_global_grid(3, 4, 5; quiet=true, init_MPI=false, device_type="none");
  @test_throws ErrorException select_device()
  finalize_global_grid(finalize_MPI=false);
  end