Skip to content

Commit

Permalink
bench : pass memcpy threads from cli
Browse files Browse the repository at this point in the history
  • Loading branch information
ggerganov committed Nov 21, 2023
1 parent 9befab5 commit 146169e
Showing 1 changed file with 8 additions and 8 deletions.
16 changes: 8 additions & 8 deletions whisper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6138,7 +6138,7 @@ WHISPER_API const char * whisper_bench_memcpy_str(int n_threads) {

// multi-thread

for (uint32_t n_threads = 1; n_threads <= std::thread::hardware_concurrency(); n_threads++) {
for (uint32_t k = 1; k <= n_threads; k++) {
char * src = (char *) malloc(size);
char * dst = (char *) malloc(size);

Expand All @@ -6149,8 +6149,8 @@ WHISPER_API const char * whisper_bench_memcpy_str(int n_threads) {
double tsum = 0.0;

auto helper = [&](int th) {
const int64_t i0 = (th + 0)*size/n_threads;
const int64_t i1 = (th + 1)*size/n_threads;
const int64_t i0 = (th + 0)*size/k;
const int64_t i1 = (th + 1)*size/k;

for (size_t i = 0; i < n; i++) {
memcpy(dst + i0, src + i0, i1 - i0);
Expand All @@ -6161,22 +6161,22 @@ WHISPER_API const char * whisper_bench_memcpy_str(int n_threads) {

const int64_t t0 = ggml_time_us();

std::vector<std::thread> threads(n_threads - 1);
for (uint32_t th = 0; th < n_threads - 1; ++th) {
std::vector<std::thread> threads(k - 1);
for (uint32_t th = 0; th < k - 1; ++th) {
threads[th] = std::thread(helper, th);
}

helper(n_threads - 1);
helper(k - 1);

for (uint32_t th = 0; th < n_threads - 1; ++th) {
for (uint32_t th = 0; th < k - 1; ++th) {
threads[th].join();
}

const int64_t t1 = ggml_time_us();

tsum += (t1 - t0)*1e-6;

snprintf(strbuf, sizeof(strbuf), "memcpy: %7.2f GB/s (%2d thread)\n", (double) (n*size)/(tsum*1e9), n_threads);
snprintf(strbuf, sizeof(strbuf), "memcpy: %7.2f GB/s (%2d thread)\n", (double) (n*size)/(tsum*1e9), k);
s += strbuf;

// needed to prevent the compiler from optimizing the memcpy away
Expand Down

0 comments on commit 146169e

Please sign in to comment.