Skip to content

Commit

Permalink
Untested: fix broken types for querying peer-to-peer properties
Browse files Browse the repository at this point in the history
  • Loading branch information
lukstafi committed Sep 23, 2024
1 parent 92066f9 commit 6002725
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 56 deletions.
4 changes: 4 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@

- Partitioned the API into modules.

### Fixed

- Broken types for `can_access_peer` and `get_p2p_attributes`.

## [0.4.1] 2024-09-12

### Fixed
Expand Down
4 changes: 2 additions & 2 deletions cuda_ffi/bindings.ml
Original file line number Diff line number Diff line change
Expand Up @@ -94,12 +94,12 @@ module Functions (F : Ctypes.FOREIGN) = struct

let cu_device_can_access_peer =
F.foreign "cuDeviceCanAccessPeer"
F.(ptr int @-> cu_deviceptr @-> cu_deviceptr @-> returning E.cu_result)
F.(ptr int @-> E.cu_device @-> E.cu_device @-> returning E.cu_result)

let cu_device_get_p2p_attribute =
F.foreign "cuDeviceGetP2PAttribute"
F.(
ptr int @-> cu_deviceptr @-> cu_deviceptr @-> E.cu_device_p2p_attribute
ptr int @-> E.cu_device_p2p_attribute @-> E.cu_device @-> E.cu_device
@-> returning E.cu_result)

let cu_mem_free = F.foreign "cuMemFree" F.(cu_deviceptr @-> returning E.cu_result)
Expand Down
68 changes: 33 additions & 35 deletions cudajit.ml
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,38 @@ module Device = struct
type computemode = DEFAULT | PROHIBITED | EXCLUSIVE_PROCESS [@@deriving sexp]
type flush_GPU_direct_RDMA_writes_options = HOST | MEMOPS [@@deriving sexp]

type p2p_attribute =
| PERFORMANCE_RANK of int
| ACCESS_SUPPORTED of bool
| NATIVE_ATOMIC_SUPPORTED of bool
| CUDA_ARRAY_ACCESS_SUPPORTED of bool

let get_p2p_attributes ~dst ~src =
let open Ctypes in
let result = ref [] in
let value = allocate int 0 in
check "cu_device_get_p2p_attribute"
@@ Cuda.cu_device_get_p2p_attribute value CU_DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK dst src;
result := PERFORMANCE_RANK !@value :: !result;
check "cu_device_get_p2p_attribute"
@@ Cuda.cu_device_get_p2p_attribute value CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED dst src;
result := ACCESS_SUPPORTED (!@value = 1) :: !result;
check "cu_device_get_p2p_attribute"
@@ Cuda.cu_device_get_p2p_attribute value CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED dst
src;
result := NATIVE_ATOMIC_SUPPORTED (!@value = 1) :: !result;
check "cu_device_get_p2p_attribute"
@@ Cuda.cu_device_get_p2p_attribute value CU_DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED
dst src;
result := CUDA_ARRAY_ACCESS_SUPPORTED (!@value = 1) :: !result;
!result

let can_access_peer ~dst ~src =
let open Ctypes in
let can_access_peer = allocate int 0 in
check "cu_device_can_access_peer" @@ Cuda.cu_device_can_access_peer can_access_peer dst src;
!@can_access_peer <> 0

let computemode_of_cu = function
| CU_COMPUTEMODE_DEFAULT -> DEFAULT
| CU_COMPUTEMODE_PROHIBITED -> PROHIBITED
Expand Down Expand Up @@ -1048,9 +1080,7 @@ module Context = struct
let open Ctypes in
let ctx = allocate_n cu_context ~count:1 in
let open Unsigned.UInt in
let flags =
List.fold_left (fun flags flag -> Infix.(flags lor uint_of_flag flag)) zero flags
in
let flags = List.fold_left (fun flags flag -> Infix.(flags lor uint_of_flag flag)) zero flags in
check "cu_ctx_create" @@ Cuda.cu_ctx_create ctx flags device;
!@ctx

Expand Down Expand Up @@ -1254,38 +1284,6 @@ module Deviceptr = struct
check "cu_memcpy_D_to_D" @@ Cuda.cu_memcpy_D_to_D dst src
@@ Unsigned.Size_t.of_int size_in_bytes

type p2p_attribute =
| PERFORMANCE_RANK of int
| ACCESS_SUPPORTED of bool
| NATIVE_ATOMIC_SUPPORTED of bool
| CUDA_ARRAY_ACCESS_SUPPORTED of bool

let get_p2p_attributes ~dst:(Deviceptr dst) ~src:(Deviceptr src) =
let open Ctypes in
let result = ref [] in
let value = allocate int 0 in
check "cu_device_get_p2p_attribute"
@@ Cuda.cu_device_get_p2p_attribute value dst src CU_DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK;
result := PERFORMANCE_RANK !@value :: !result;
check "cu_device_get_p2p_attribute"
@@ Cuda.cu_device_get_p2p_attribute value dst src CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED;
result := ACCESS_SUPPORTED (!@value = 1) :: !result;
check "cu_device_get_p2p_attribute"
@@ Cuda.cu_device_get_p2p_attribute value dst src
CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED;
result := NATIVE_ATOMIC_SUPPORTED (!@value = 1) :: !result;
check "cu_device_get_p2p_attribute"
@@ Cuda.cu_device_get_p2p_attribute value dst src
CU_DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED;
result := CUDA_ARRAY_ACCESS_SUPPORTED (!@value = 1) :: !result;
!result

let can_access_peer ~dst:(Deviceptr dst) ~src:(Deviceptr src) =
let open Ctypes in
let can_access_peer = allocate int 0 in
check "cu_device_can_access_peer" @@ Cuda.cu_device_can_access_peer can_access_peer dst src;
!@can_access_peer <> 0

(** Provide either both [kind] and [length], or just [size_in_bytes]. *)
let memcpy_peer ?kind ?length ?size_in_bytes ~dst:(Deviceptr dst) ~dst_ctx ~src:(Deviceptr src)
~src_ctx () =
Expand Down
38 changes: 19 additions & 19 deletions cudajit.mli
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,25 @@ module Device : sig
{{:https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__PRIMARY__CTX.html#group__CUDA__PRIMARY__CTX_1g5d38802e8600340283958a117466ce12}
cuDevicePrimaryCtxReset}. *)

(** See
{{:https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__TYPES.html#group__CUDA__TYPES_1g578d7cf687ce20f7e99468e8c14e22de}
CUdevice_P2PAttribute}. *)
type p2p_attribute =
| PERFORMANCE_RANK of int
| ACCESS_SUPPORTED of bool
| NATIVE_ATOMIC_SUPPORTED of bool
| CUDA_ARRAY_ACCESS_SUPPORTED of bool

val get_p2p_attributes : dst:t -> src:t -> p2p_attribute list
(** See
{{:https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__PEER__ACCESS.html#group__CUDA__PEER__ACCESS_1g4c55c60508f8eba4546b51f2ee545393}
cuDeviceGetP2PAttribute}. *)

val can_access_peer : dst:t -> src:t -> bool
(** See
{{:https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__PEER__ACCESS.html#group__CUDA__PEER__ACCESS_1g496bdaae1f632ebfb695b99d2c40f19e}
cuDeviceCanAccessPeer}. *)

(** See
{{:https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__TYPES.html#group__CUDA__TYPES_1g637aab2eadb52e1c1c048b8bad9592d1}
CUcomputemode}. *)
Expand Down Expand Up @@ -424,25 +443,6 @@ module Deviceptr : sig
{{:https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1g1725774abf8b51b91945f3336b778c8b}
cuMemcpyDtoD}. *)

(** See
{{:https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__TYPES.html#group__CUDA__TYPES_1g578d7cf687ce20f7e99468e8c14e22de}
CUdevice_P2PAttribute}. *)
type p2p_attribute =
| PERFORMANCE_RANK of int
| ACCESS_SUPPORTED of bool
| NATIVE_ATOMIC_SUPPORTED of bool
| CUDA_ARRAY_ACCESS_SUPPORTED of bool

val get_p2p_attributes : dst:t -> src:t -> p2p_attribute list
(** See
{{:https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__PEER__ACCESS.html#group__CUDA__PEER__ACCESS_1g4c55c60508f8eba4546b51f2ee545393}
cuDeviceGetP2PAttribute}. *)

val can_access_peer : dst:t -> src:t -> bool
(** See
{{:https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__PEER__ACCESS.html#group__CUDA__PEER__ACCESS_1g496bdaae1f632ebfb695b99d2c40f19e}
cuDeviceCanAccessPeer}. *)

val memcpy_peer :
?kind:('a, 'b) Bigarray.kind ->
?length:int ->
Expand Down

0 comments on commit 6002725

Please sign in to comment.