intel · fwyzard · Apr 24, 2020 · Apr 23, 2020 · Apr 30, 2020 · Apr 27, 2020
@@ -1298,7 +1298,8 @@ using pi_usm_migration_flags = _pi_usm_migration_flags;
 /// \param context is the pi_context
 /// \param pi_usm_mem_properties are optional allocation properties
 /// \param size_t is the size of the allocation
-/// \param alignment is the desired alignment of the allocation
+/// \param alignment is the desired alignment of the allocation. 0 indicates no
+///        requirements, and uses the backend default alignment.
 pi_result piextUSMHostAlloc(void **result_ptr, pi_context context,
                             pi_usm_mem_properties *properties, size_t size,
                             pi_uint32 alignment);
@@ -1310,7 +1311,8 @@ pi_result piextUSMHostAlloc(void **result_ptr, pi_context context,
 /// \param device is the device the memory will be allocated on
 /// \param pi_usm_mem_properties are optional allocation properties
 /// \param size_t is the size of the allocation
-/// \param alignment is the desired alignment of the allocation
+/// \param alignment is the desired alignment of the allocation. 0 indicates no
+///        requirements, and uses the backend default alignment.
 pi_result piextUSMDeviceAlloc(void **result_ptr, pi_context context,
                               pi_device device,
                               pi_usm_mem_properties *properties, size_t size,
@@ -1323,7 +1325,8 @@ pi_result piextUSMDeviceAlloc(void **result_ptr, pi_context context,
 /// \param device is the device the memory will be allocated on
 /// \param pi_usm_mem_properties are optional allocation properties
 /// \param size_t is the size of the allocation
-/// \param alignment is the desired alignment of the allocation
+/// \param alignment is the desired alignment of the allocation. 0 indicates no
+///        requirements, and uses the backend default alignment.
 pi_result piextUSMSharedAlloc(void **result_ptr, pi_context context,
                               pi_device device,
                               pi_usm_mem_properties *properties, size_t size,
@@ -1340,8 +1343,7 @@ pi_result piextUSMFree(pi_context context, void *ptr);
 /// \param queue is the queue to submit to
 /// \param ptr is the ptr to memset
 /// \param value is value to set.  It is interpreted as an 8-bit value and the
-/// upper
-///        24 bits are ignored
+///        upper 24 bits are ignored
 /// \param count is the size in bytes to memset
 /// \param num_events_in_waitlist is the number of events to wait on
 /// \param events_waitlist is an array of events to wait on

@@ -3396,17 +3396,29 @@ pi_result cuda_piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj,
 pi_result cuda_piextUSMHostAlloc(void **result_ptr, pi_context context,
                                  pi_usm_mem_properties *properties, size_t size,
                                  pi_uint32 alignment) {
+  // from empirical testing with CUDA 10.2 on a Tesla K40
+  static constexpr pi_uint32 max_alignment = 0x200;
+
+  // enforce a valid pointer to the allocated memory
   assert(result_ptr != nullptr);
+  // check the the context is valid
   assert(context != nullptr);
+  // check that the property list is empty
   assert(properties == nullptr);
+  // check that the alignment is not larger than max_alignment, and is either 0
+  // or a power of 2
+  assert(alignment <= max_alignment && (alignment & (alignment - 1)) == 0);
+
   pi_result result = PI_SUCCESS;
   try {
     ScopedContext active(context);
     result = PI_CHECK_ERROR(cuMemAllocHost(result_ptr, size));
   } catch (pi_result error) {
     result = error;
   }
-  assert(reinterpret_cast<std::uintptr_t>(*result_ptr) % alignment == 0);
+  // check that the result is suitable aligned
+  assert((alignment == 0) ||
+         (reinterpret_cast<std::uintptr_t>(*result_ptr) % alignment == 0));
   return result;
 }
 
@@ -3416,18 +3428,31 @@ pi_result cuda_piextUSMDeviceAlloc(void **result_ptr, pi_context context,
                                    pi_device device,
                                    pi_usm_mem_properties *properties,
                                    size_t size, pi_uint32 alignment) {
+  // from empirical testing with CUDA 10.2 on a Tesla K40
+  static constexpr pi_uint32 max_alignment = 0x200;
+
+  // enforce a valid pointer to the allocated memory
   assert(result_ptr != nullptr);
+  // check the the context is valid
   assert(context != nullptr);
+  // check that the device is valid
   assert(device != nullptr);
+  // check that the property list is empty
   assert(properties == nullptr);
+  // check that the alignment is not larger than max_alignment, and is either 0
+  // or a power of 2
+  assert(alignment <= max_alignment && (alignment & (alignment - 1)) == 0);
+
   pi_result result = PI_SUCCESS;
   try {
     ScopedContext active(context);
     result = PI_CHECK_ERROR(cuMemAlloc((CUdeviceptr *)result_ptr, size));
   } catch (pi_result error) {
     result = error;
   }
-  assert(reinterpret_cast<std::uintptr_t>(*result_ptr) % alignment == 0);
+  // check that the result is suitable aligned
+  assert((alignment == 0) ||
+         (reinterpret_cast<std::uintptr_t>(*result_ptr) % alignment == 0));
   return result;
 }
 
@@ -3437,10 +3462,21 @@ pi_result cuda_piextUSMSharedAlloc(void **result_ptr, pi_context context,
                                    pi_device device,
                                    pi_usm_mem_properties *properties,
                                    size_t size, pi_uint32 alignment) {
+  // from empirical testing with CUDA 10.2 on a Tesla K40
+  static constexpr pi_uint32 max_alignment = 0x200;
+
+  // enforce a valid pointer to the allocated memory
   assert(result_ptr != nullptr);
+  // check the the context is valid
   assert(context != nullptr);
+  // check that the device is valid
   assert(device != nullptr);
+  // check that the property list is empty
   assert(properties == nullptr);
+  // check that the alignment is not larger than max_alignment, and is either 0
+  // or a power of 2
+  assert(alignment <= max_alignment && (alignment & (alignment - 1)) == 0);
+
   pi_result result = PI_SUCCESS;
   try {
     ScopedContext active(context);
@@ -3449,7 +3485,9 @@ pi_result cuda_piextUSMSharedAlloc(void **result_ptr, pi_context context,
   } catch (pi_result error) {
     result = error;
   }
-  assert(reinterpret_cast<std::uintptr_t>(*result_ptr) % alignment == 0);
+  // check that the result is suitable aligned
+  assert((alignment == 0) ||
+         (reinterpret_cast<std::uintptr_t>(*result_ptr) % alignment == 0));
   return result;
 }
 
@@ -3481,8 +3519,12 @@ pi_result cuda_piextUSMEnqueueMemset(pi_queue queue, void *ptr, pi_int32 value,
                                      pi_uint32 num_events_in_waitlist,
                                      const pi_event *events_waitlist,
                                      pi_event *event) {
+  // enforce that the queue is valid
   assert(queue != nullptr);
-  assert(ptr != nullptr);
+  // check that the pointer is valid
+  if (ptr == nullptr) {
+    return PI_INVALID_VALUE;
+  }
   CUstream cuStream = queue->get();
   pi_result result = PI_SUCCESS;
   std::unique_ptr<_pi_event> event_ptr{nullptr};
@@ -3514,9 +3556,12 @@ pi_result cuda_piextUSMEnqueueMemcpy(pi_queue queue, pi_bool blocking,
                                      pi_uint32 num_events_in_waitlist,
                                      const pi_event *events_waitlist,
                                      pi_event *event) {
+  // enforce that the queue is valid
   assert(queue != nullptr);
-  assert(dst_ptr != nullptr);
-  assert(src_ptr != nullptr);
+  // check that the source and destination pointers are valid
+  if (dst_ptr == nullptr || src_ptr == nullptr) {
+    return PI_INVALID_VALUE;
+  }
   CUstream cuStream = queue->get();
   pi_result result = PI_SUCCESS;
   std::unique_ptr<_pi_event> event_ptr{nullptr};
@@ -3553,8 +3598,12 @@ pi_result cuda_piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr,
                                        pi_uint32 num_events_in_waitlist,
                                        const pi_event *events_waitlist,
                                        pi_event *event) {
+  // enforce that the queue is valid
   assert(queue != nullptr);
-  assert(ptr != nullptr);
+  // check that the pointer is valid
+  if (ptr == nullptr) {
+    return PI_INVALID_VALUE;
+  }
   CUstream cuStream = queue->get();
   pi_result result = PI_SUCCESS;
   std::unique_ptr<_pi_event> event_ptr{nullptr};
@@ -3589,8 +3638,12 @@ pi_result cuda_piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr,
 pi_result cuda_piextUSMEnqueueMemAdvise(pi_queue queue, const void *ptr,
                                         size_t length, int advice,
                                         pi_event *event) {
+  // enforce that the queue is valid
   assert(queue != nullptr);
-  assert(ptr != nullptr);
+  // check that the pointer is valid
+  if (ptr == nullptr) {
+    return PI_INVALID_VALUE;
+  }
   // TODO implement a mapping to cuMemAdvise once the expected behaviour
   // of piextUSMEnqueueMemAdvise is detailed in the USM extension
   return cuda_piEnqueueEventsWait(queue, 0, nullptr, event);

@@ -1,4 +1,3 @@
-// XFAIL: cuda
 // piextUSM*Alloc functions for CUDA are not behaving as described in
 // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc
 // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc

@@ -1,4 +1,3 @@
-// XFAIL: cuda
 // piextUSM*Alloc functions for CUDA are not behaving as described in
 // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc
 // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc

@@ -1,4 +1,3 @@
-// XFAIL: cuda
 // piextUSM*Alloc functions for CUDA are not behaving as described in
 // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc
 // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc

@@ -1,5 +1,4 @@
 // UNSUPPORTED: windows
-// XFAIL: cuda
 // piextUSM*Alloc functions for CUDA are not behaving as described in
 // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc
 // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc

@@ -1,4 +1,3 @@
-// XFAIL: cuda
 // piextUSM*Alloc functions for CUDA are not behaving as described in
 // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc
 // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc

@@ -1,4 +1,3 @@
-// XFAIL: cuda
 // piextUSM*Alloc functions for CUDA are not behaving as described in
 // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc
 // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc

@@ -1,4 +1,3 @@
-// XFAIL: cuda
 // piextUSM*Alloc functions for CUDA are not behaving as described in
 // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc
 // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc

@@ -3,7 +3,6 @@
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 
 // REQUIRES: cpu
-// XFAIL: cuda
 // TODO: ptxas fatal   : Unresolved extern function '_Z20__spirv_ocl_lgamma_rfPi'
 
 #include <CL/sycl.hpp>

@@ -1,4 +1,3 @@
-// XFAIL: cuda
 // SYCL runtime and piextUSM*Alloc functions for CUDA not behaving as described
 // in: https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc
 //

@@ -5,7 +5,6 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-// XFAIL: cuda
 // piextUSM*Alloc functions for CUDA are not behaving as described in
 // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc
 // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc

@@ -1,4 +1,3 @@
-// XFAIL: cuda
 // piextUSM*Alloc functions for CUDA are not behaving as described in
 // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc
 // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc

@@ -1,4 +1,3 @@
-// XFAIL: cuda
 // piextUSM*Alloc functions for CUDA are not behaving as described in
 // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc
 // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc

@@ -1,4 +1,3 @@
-// XFAIL: cuda
 // piextUSM*Alloc functions for CUDA are not behaving as described in
 // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc
 // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc

@@ -1,4 +1,3 @@
-// XFAIL: cuda
 // piextUSM*Alloc functions for CUDA are not behaving as described in
 // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc
 // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc

@@ -1,4 +1,3 @@
-// XFAIL: cuda
 // piextUSM*Alloc functions for CUDA are not behaving as described in
 // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc
 // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc

@@ -1,4 +1,3 @@
-// XFAIL: cuda
 // piextUSM*Alloc functions for CUDA are not behaving as described in
 // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc
 // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc

@@ -1,4 +1,3 @@
-// XFAIL: cuda
 // piextUSM*Alloc functions for CUDA are not behaving as described in
 // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc
 // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc