Skip to content

Commit b2f7e04

Browse files
authored
sync : ggml (conv ops + cuda MSVC fixes) (#3765)
ggml-ci
1 parent abd21fc commit b2f7e04

File tree

3 files changed

+371
-96
lines changed

3 files changed

+371
-96
lines changed

ggml-cuda.cu

+5-5
Original file line numberDiff line numberDiff line change
@@ -5664,10 +5664,10 @@ void ggml_init_cublas() {
56645664
GGML_ASSERT(g_device_count <= GGML_CUDA_MAX_DEVICES);
56655665
int64_t total_vram = 0;
56665666
fprintf(stderr, "%s: found %d " GGML_CUDA_NAME " devices:\n", __func__, g_device_count);
5667-
for (int64_t id = 0; id < g_device_count; ++id) {
5667+
for (int id = 0; id < g_device_count; ++id) {
56685668
cudaDeviceProp prop;
56695669
CUDA_CHECK(cudaGetDeviceProperties(&prop, id));
5670-
fprintf(stderr, " Device %ld: %s, compute capability %d.%d\n", id, prop.name, prop.major, prop.minor);
5670+
fprintf(stderr, " Device %d: %s, compute capability %d.%d\n", id, prop.name, prop.major, prop.minor);
56715671

56725672
g_tensor_split[id] = total_vram;
56735673
total_vram += prop.totalGlobalMem;
@@ -5677,15 +5677,15 @@ void ggml_init_cublas() {
56775677
g_compute_capabilities[id] = 100*prop.major + 10*prop.minor;
56785678
#endif // defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)
56795679
}
5680-
for (int64_t id = 0; id < g_device_count; ++id) {
5680+
for (int id = 0; id < g_device_count; ++id) {
56815681
g_tensor_split[id] /= total_vram;
56825682
}
56835683

5684-
for (int64_t id = 0; id < g_device_count; ++id) {
5684+
for (int id = 0; id < g_device_count; ++id) {
56855685
CUDA_CHECK(ggml_cuda_set_device(id));
56865686

56875687
// create cuda streams
5688-
for (int64_t is = 0; is < MAX_STREAMS; ++is) {
5688+
for (int is = 0; is < MAX_STREAMS; ++is) {
56895689
CUDA_CHECK(cudaStreamCreateWithFlags(&g_cudaStreams[id][is], cudaStreamNonBlocking));
56905690
}
56915691

0 commit comments

Comments
 (0)