Skip to content

Commit

Permalink
[libc] Round up time for GPU nanosleep implementation (#81630)
Browse files Browse the repository at this point in the history
Summary:
The GPU `nanosleep` tests would occasionally fail. This was due to the
fact that we used integer division to determine how many ticks we had to
sleep for. This would then truncate, leaving us with a value just
slightly below the requested value. This would then occasionally leave
us with a return value of `-1`. This patch just changes the code to
round up by 1 so we always sleep for at least the requested value.
  • Loading branch information
jhuber6 authored Feb 13, 2024
1 parent d79c3c5 commit 1dacfd1
Showing 1 changed file with 8 additions and 7 deletions.
15 changes: 8 additions & 7 deletions libc/src/time/gpu/nanosleep.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,19 @@

namespace LIBC_NAMESPACE {

constexpr uint64_t TICKS_PER_NS = 1000000000UL;
constexpr uint64_t TICKS_PER_SEC = 1000000000UL;

LLVM_LIBC_FUNCTION(int, nanosleep,
(const struct timespec *req, struct timespec *rem)) {
if (!GPU_CLOCKS_PER_SEC || !req)
return -1;

uint64_t nsecs = req->tv_nsec + req->tv_sec * TICKS_PER_NS;
uint64_t nsecs = req->tv_nsec + req->tv_sec * TICKS_PER_SEC;
uint64_t tick_rate = TICKS_PER_SEC / GPU_CLOCKS_PER_SEC;

uint64_t start = gpu::fixed_frequency_clock();
#if defined(LIBC_TARGET_ARCH_IS_NVPTX) && __CUDA_ARCH__ >= 700
uint64_t end = start + nsecs / (TICKS_PER_NS / GPU_CLOCKS_PER_SEC);
uint64_t end = start + (nsecs + tick_rate - 1) / tick_rate;
uint64_t cur = gpu::fixed_frequency_clock();
// The NVPTX architecture supports sleeping and guaruntees the actual time
// slept will be somewhere between zero and twice the requested amount. Here
Expand All @@ -34,7 +35,7 @@ LLVM_LIBC_FUNCTION(int, nanosleep,
nsecs -= nsecs > cur - start ? cur - start : 0;
}
#elif defined(LIBC_TARGET_ARCH_IS_AMDGPU)
uint64_t end = start + nsecs / (TICKS_PER_NS / GPU_CLOCKS_PER_SEC);
uint64_t end = start + (nsecs + tick_rate - 1) / tick_rate;
uint64_t cur = gpu::fixed_frequency_clock();
// The AMDGPU architecture does not provide a sleep implementation with a
// known delay so we simply repeatedly sleep with a large value of ~960 clock
Expand All @@ -56,11 +57,11 @@ LLVM_LIBC_FUNCTION(int, nanosleep,

// Check to make sure we slept for at least the desired duration and set the
// remaining time if not.
uint64_t elapsed = (stop - start) * (TICKS_PER_NS / GPU_CLOCKS_PER_SEC);
uint64_t elapsed = (stop - start) * tick_rate;
if (elapsed < nsecs) {
if (rem) {
rem->tv_sec = (nsecs - elapsed) / TICKS_PER_NS;
rem->tv_nsec = (nsecs - elapsed) % TICKS_PER_NS;
rem->tv_sec = (nsecs - elapsed) / TICKS_PER_SEC;
rem->tv_nsec = (nsecs - elapsed) % TICKS_PER_SEC;
}
return -1;
}
Expand Down

0 comments on commit 1dacfd1

Please sign in to comment.