Skip to content
This repository was archived by the owner on May 3, 2024. It is now read-only.

Commit

Permalink
Merge pull request #9 from benkno/master
Browse files Browse the repository at this point in the history
Nvidia busywait fix
  • Loading branch information
RadiantBlockchain authored Sep 12, 2022
2 parents e07693b + 1eef3cc commit 6b08b23
Show file tree
Hide file tree
Showing 7 changed files with 133 additions and 25 deletions.
8 changes: 4 additions & 4 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ dnl * any later version. See COPYING for more details.

##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##
##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##
m4_define([v_maj], [5])
m4_define([v_min], [5])
m4_define([v_mic], [0])
m4_define([v_maj], [1])
m4_define([v_min], [0])
m4_define([v_mic], [3])
##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##
m4_define([v_ver], [v_maj.v_min.v_mic])
m4_define([lt_rev], m4_eval(v_maj + v_min))
Expand All @@ -24,7 +24,7 @@ m4_define([lt_age], v_min)
##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##
##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##

AC_INIT([bfgminer], [v_ver], [luke-jr+bfgminer@utopios.org])
AC_INIT([rad-bfgminer], [v_ver], [])

AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM
Expand Down
100 changes: 98 additions & 2 deletions driver-opencl.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <time.h>

#include <sys/types.h>

Expand All @@ -47,6 +48,33 @@
#include "adl.h"
#include "util.h"

#ifdef WIN32
uint64_t get_nano(void)
{
static uint64_t ticks_per_sec;
LARGE_INTEGER t;

if (!ticks_per_sec) {
QueryPerformanceFrequency(&t);
ticks_per_sec = t.QuadPart;
}

QueryPerformanceCounter(&t);

return t.QuadPart * 1000000000ULL / ticks_per_sec;
}
#else
uint64_t get_nano(void)
{
struct timespec t;

clock_gettime(CLOCK_MONOTONIC, &t);

return (uint64_t)t.tv_sec * 1000000000ULL + (uint64_t)t.tv_nsec;
}
#endif


/* TODO: cleanup externals ********************/


Expand Down Expand Up @@ -166,6 +194,9 @@ CL_API_ENTRY cl_int CL_API_CALL
const void * /* arg_value */) CL_API_SUFFIX__VERSION_1_0;

/* Flush and Finish APIs */
CL_API_ENTRY cl_int CL_API_CALL
(*clFlush)(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;

CL_API_ENTRY cl_int CL_API_CALL
(*clFinish)(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;

Expand Down Expand Up @@ -203,6 +234,21 @@ CL_API_ENTRY cl_int CL_API_CALL
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;

/* Event Object APIs */
CL_API_ENTRY cl_int CL_API_CALL
(*clWaitForEvents)(cl_uint /* num_events */,
const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0;

CL_API_ENTRY cl_int CL_API_CALL
(*clGetEventInfo)(cl_event /* event */,
cl_event_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;

CL_API_ENTRY cl_int CL_API_CALL
(*clReleaseEvent)(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0;

#ifdef WIN32
#define dlsym (void*)GetProcAddress
#define dlclose FreeLibrary
Expand Down Expand Up @@ -249,10 +295,14 @@ load_opencl_symbols() {
LOAD_OCL_SYM(clCreateKernel);
LOAD_OCL_SYM(clReleaseKernel);
LOAD_OCL_SYM(clSetKernelArg);
LOAD_OCL_SYM(clFlush);
LOAD_OCL_SYM(clFinish);
LOAD_OCL_SYM(clEnqueueReadBuffer);
LOAD_OCL_SYM(clEnqueueWriteBuffer);
LOAD_OCL_SYM(clEnqueueNDRangeKernel);
LOAD_OCL_SYM(clGetEventInfo);
LOAD_OCL_SYM(clWaitForEvents);
LOAD_OCL_SYM(clReleaseEvent);

return true;
}
Expand All @@ -277,6 +327,7 @@ extern char *opt_kernel_path;
extern int gpur_thr_id;
extern bool opt_noadl;
extern bool have_opencl;
extern bool opt_no_sleep;
static _clState *clStates[MAX_GPUDEVICES];
static struct opencl_kernel_interface kernel_interfaces[];

Expand Down Expand Up @@ -1896,21 +1947,66 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
return -1;
}

// On Windows clEnqueueNDRangeKernel is blocking without this
clFinish(clState->commandQueue);

cl_event kernelEvent;
if (kinfo->goffset)
{
size_t global_work_offset[1];

global_work_offset[0] = work->blk.nonce;
status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, global_work_offset,
globalThreads, localThreads, 0, NULL, NULL);
globalThreads, localThreads, 0, NULL, &kernelEvent);
} else
status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, NULL,
globalThreads, localThreads, 0, NULL, NULL);
globalThreads, localThreads, 0, NULL, &kernelEvent);
if (unlikely(status != CL_SUCCESS)) {
applog(LOG_ERR, "Error %d: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)", status);
return -1;
}

clFlush(clState->commandQueue);

// Sleep to avoid Nvidia busywait
double us = gpu->kernel_wait_us;
uint64_t startnano = get_nano();
if (!opt_no_sleep) {
cl_int kernelStatus;
clGetEventInfo(kernelEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &kernelStatus, NULL);
while(us >= 200 && kernelStatus != CL_COMPLETE) {
us *= 0.5;
usleep(us);
clGetEventInfo(kernelEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &kernelStatus, NULL);
}
}

clWaitForEvents(1, &kernelEvent);
uint64_t delta = get_nano() - startnano;
unsigned int timerIndex = gpu->kernel_wait_index;
gpu->kernel_wait_times[timerIndex] = delta;

if (timerIndex == gpu->kernel_wait_min) {
// Previous minimum is being removed, find the new minimum
uint64_t min = 0;
for (unsigned int i = 1; i < 50; i++) {
if (gpu->kernel_wait_times[i] < gpu->kernel_wait_times[min]) {
min = i;
}
}
gpu->kernel_wait_min = min;
gpu->kernel_wait_us = gpu->kernel_wait_times[min] >> 10;
} else if (delta < gpu->kernel_wait_times[gpu->kernel_wait_min]) {
gpu->kernel_wait_min = timerIndex;
gpu->kernel_wait_us = delta >> 10;
}
gpu->kernel_wait_index++;
if (gpu->kernel_wait_index == 50) {
gpu->kernel_wait_index = 0;
}

clReleaseEvent(kernelEvent);

status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
buffersize, thrdata->res, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS)) {
Expand Down
2 changes: 1 addition & 1 deletion gen-version.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ if [ -e .git ]; then
# Some versions of git require `git diff` to scan and update dirty-or-not status
git diff >/dev/null 2>/dev/null

gitdesc=$(git describe)
gitdesc=$(git describe 2>/dev/null)
fi
if [ -z "$gitdesc" ]; then
current=$(sed 's/^\#define[[:space:]]\+BFG_GIT_DESCRIBE[[:space:]]\+\"\(.*\)\"$/\1/;t;d' version.h)
Expand Down
26 changes: 11 additions & 15 deletions make-release
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ sw="$1"; shift || true

test -n "$DEBUG_RELEASE" || DEBUG_RELEASE=1

builds=(win32 win64)
builds=(win64)

win32_machine='i686-pc-mingw32'
win32_CFLAGS='-march=i686'
Expand All @@ -41,7 +41,7 @@ sed 's/^\[submodule "\(.*\)"\]$/\1/;t;d' .gitmodules |
done
git submodule update --init
{
git archive --prefix "$sw"/ --format tar "$tag"
git archive --prefix "$sw"/ --format tar TMP
git submodule --quiet foreach --recursive 'test x$name = xccan || git archive --prefix "'"$sw"'/$path/" --format tar HEAD'
(
cd ccan-upstream
Expand All @@ -65,7 +65,7 @@ docs='
AUTHORS
COPYING
NEWS
README
README.md
README.ASIC
README.FPGA
README.GPU
Expand All @@ -92,13 +92,6 @@ for build in "${builds[@]}"; do
CFLAGS="${CFLAGS} -Wall" \
--disable-cpumining \
--enable-opencl \
--enable-adl \
--enable-bitforce \
--enable-icarus \
--enable-modminer \
--enable-ztex \
--enable-keccak \
--enable-scrypt \
--without-system-libbase58 \
--host="$machine"
make $MAKEOPTS
Expand All @@ -112,7 +105,7 @@ for build in "${builds[@]}"; do
*.exe \
libblkmaker/.libs/*.dll \
libbase58/.libs/*.dll \
opencl \
opencl/rad.cl \
example.conf \
windows-build.txt \
miner.php \
Expand Down Expand Up @@ -165,6 +158,8 @@ for build in "${builds[@]}"; do
for my $check_libdir (
"/usr/$machine/usr/lib",
"/usr/$machine/usr/bin",
"/usr/$machine/lib",
"/usr/$machine/bin",
) {
$dll = ciexist "$check_libdir/${dlllc}";
if ($dll) {
Expand All @@ -174,11 +169,12 @@ for build in "${builds[@]}"; do
}
if (not defined $libdir) {
return if $opt;
die "Cannot find $dlllc\n"
printf("Cannot find $dlllc\n")
} else {
system("cp -v -L \"$libdir/$dll\" \"$PKGDIR\"") && die "Copy $dll failed\n";
system("\"${machine}-strip\" \"$PKGDIR/$dll\"") && die "Strip $dll failed\n";
push @todo, $dll;
}
system("cp -v -L \"$libdir/$dll\" \"$PKGDIR\"") && die "Copy $dll failed\n";
system("\"${machine}-strip\" \"$PKGDIR/$dll\"") && die "Strip $dll failed\n";
push @todo, $dll;
$have{$dlllc} = undef;
1
}
Expand Down
15 changes: 13 additions & 2 deletions miner.c
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,7 @@ bool opt_disable_client_reconnect = false;
static bool no_work;
bool opt_worktime;
bool opt_weighed_stats;
bool opt_no_sleep = false;

char *opt_kernel_path;
char *cgminer_path;
Expand Down Expand Up @@ -2829,6 +2830,9 @@ static struct opt_table opt_config_table[] = {
"Display extra work time debug information"),
OPT_WITH_ARG("--pools",
opt_set_bool, NULL, NULL, opt_hidden),
OPT_WITHOUT_ARG("--no-sleep",
opt_set_bool, &opt_no_sleep,
"Don't sleep while waiting for OpenCL kernel"),
OPT_ENDTABLE
};

Expand Down Expand Up @@ -6176,7 +6180,9 @@ static void disable_curses(void)
delwin(logwin);
delwin(statuswin);
delwin(mainwin);
//endwin();
#ifndef WIN32
endwin();
#endif
#ifdef WIN32
// Move the cursor to after curses output.
HANDLE hout = GetStdHandle(STD_OUTPUT_HANDLE);
Expand Down Expand Up @@ -8539,7 +8545,8 @@ static void set_options(void)
immedok(logwin, true);
loginput_mode(8);
retry:
wlogprint("\n[L]ongpoll: %s\n", want_longpoll ? "On" : "Off");
wlogprint("\n[N]o sleep: %s\n", opt_no_sleep ? "On" : "Off");
wlogprint("[L]ongpoll: %s\n", want_longpoll ? "On" : "Off");
wlogprint("[Q]ueue: %d\n[S]cantime: %d\n[E]xpiry: %d\n[R]etries: %d\n"
"[W]rite config file\n[B]FGMiner restart\n",
opt_queue, opt_scantime, opt_expiry, opt_retries);
Expand All @@ -8555,6 +8562,10 @@ static void set_options(void)
}
opt_queue = selected;
goto retry;
} else if (!strncasecmp(&input, "n", 1)) {
opt_no_sleep = !opt_no_sleep;
applog(LOG_WARNING, "No sleep %s", opt_no_sleep ? "enabled" : "disabled");
goto retry;
} else if (!strncasecmp(&input, "l", 1)) {
if (want_longpoll)
stop_longpoll();
Expand Down
5 changes: 5 additions & 0 deletions miner.h
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,11 @@ struct cgpu_info {
pthread_mutex_t device_mutex;
pthread_cond_t device_cond;

uint64_t kernel_wait_times[50];
double kernel_wait_us;
unsigned int kernel_wait_index;
unsigned int kernel_wait_min;

enum dev_enable deven;
bool already_set_defaults;
int accepted;
Expand Down
2 changes: 1 addition & 1 deletion version.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@

const char * const bfgminer_name_space_ver = PACKAGE " " VERSION;
const char * const bfgminer_name_slash_ver = PACKAGE "/" VERSION;
const char * const bfgminer_ver = "rad-bfgminer";
const char * const bfgminer_ver = VERSION;

0 comments on commit 6b08b23

Please sign in to comment.