Skip to content

Commit ae9663f

Browse files
authoredJun 9, 2023
Windows nvcc workaround (#1753)
Fix gibberish output on Windows when using CUDA
1 parent b33dee2 commit ae9663f

File tree

1 file changed

+9
-0
lines changed

1 file changed

+9
-0
lines changed
 

‎ggml-cuda.cu

+9
Original file line numberDiff line numberDiff line change
@@ -1512,6 +1512,14 @@ static void ggml_cuda_op(const ggml_tensor * src0, const ggml_tensor * src1, ggm
15121512
i01_high = row_high % ne01;
15131513
}
15141514
}
1515+
1516+
// There is possibly a bug in the Windows nvcc compiler regarding instruction reordering or optimizing out local variables.
1517+
// Removing the first assert or changing the order of the arguments causes the second assert to fail.
1518+
// Removing both asserts results in i01_high becoming 0 which in turn results in garbage output.
1519+
// The root cause seems to be a problem with i0_offset_high becoming 0 when it should always be >0 (for single GPU).
1520+
GGML_ASSERT(i01_low == 0 || g_device_count > 1);
1521+
GGML_ASSERT(i01_high == ne01 || g_device_count > 1);
1522+
15151523
const int64_t i01_diff = i01_high - i01_low;
15161524
if (i01_diff == 0) {
15171525
continue;
@@ -1727,6 +1735,7 @@ void ggml_cuda_load_data(const char * fname, struct ggml_tensor * tensor, const
17271735
row_low -= row_low % GGML_CUDA_DMMV_Y;
17281736
row_high = id == g_device_count - 1 ? nrows : nrows*g_tensor_split[id + 1];
17291737
row_high -= row_high % GGML_CUDA_DMMV_Y;
1738+
GGML_ASSERT(nrows % GGML_CUDA_DMMV_Y == 0);
17301739
} else {
17311740
GGML_ASSERT(false);
17321741
}

0 commit comments

Comments
 (0)
Please sign in to comment.