Skip to content

Commit

Permalink
latency experiments: reduce vGPR data-dependency in fma_f64, and WG s…
Browse files Browse the repository at this point in the history
…ize 32
  • Loading branch information
preda committed Dec 18, 2024
1 parent f9b750e commit 65f374d
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 7 deletions.
2 changes: 1 addition & 1 deletion src/Gpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1080,7 +1080,7 @@ void Gpu::selftestTrig() {
for (int w = 0; w < int(WHATS.size()); ++w) {
const int what = w;
testTime(what, bufCarry);
vector<i64> times = bufCarry.read(4096);
vector<i64> times = bufCarry.read(4096 * 2);
[[maybe_unused]] i64 prev = 0;
u64 min = -1;
u64 sum = 0;
Expand Down
16 changes: 10 additions & 6 deletions src/cl/selftest.cl
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#include "fft16.cl"

// Measure instruction latency.
KERNEL(64) testTime(int what, global i64* io) {
KERNEL(32) testTime(int what, global i64* io) {
#if HAS_ASM
i64 clock0, clock1;

Expand All @@ -41,11 +41,12 @@ KERNEL(64) testTime(int what, global i64* io) {
"s_waitcnt lgkmcnt(0)\n\t"
: "=s"(clock1));
} else if (what == 0) { // V_NOP
// clock0 = __builtin_readcyclecounter();
__asm (
"s_waitcnt lgkmcnt(0)\n\t"
"s_memtime %0\n\t"
"s_waitcnt lgkmcnt(0)\n\t"
: "=s"(clock0) : );
: "=s"(clock0));

for (int i = 0; i < 48; ++i) {
__asm("v_nop");
Expand Down Expand Up @@ -107,16 +108,19 @@ KERNEL(64) testTime(int what, global i64* io) {
"s_waitcnt lgkmcnt(0)\n\t"
: "=s"(clock1));
} else if (what == 4) { // V_FMA_F64
double a = 2, b = 3;
double a = 2, b = 3, c = 4, d = 5;

__asm (
"s_waitcnt lgkmcnt(0)\n\t"
"s_memtime %0\n\t"
"s_waitcnt lgkmcnt(0)\n\t"
: "=s"(clock0) : "v"(a), "v"(b));
: "=s"(clock0) : "v"(a), "v"(b), "v"(c), "v"(d));

for (int i = 0; i < 48; ++i) {
__asm("v_fma_f64 %0, %0, %1, %0" : : "v"(a), "v"(b));
for (int i = 0; i < 24; ++i) {
__asm(
"v_fma_f64 %0, %0, %1, %0\n\t"
"v_fma_f64 %2, %2, %3, %2\n\t"
: : "v"(a), "v"(b), "v"(c), "v"(d));
}

__asm(
Expand Down

0 comments on commit 65f374d

Please sign in to comment.