Skip to content

Commit 610c63b

Browse files
committed
fix: serialize calls to ggml_backend_tensor_set
1 parent 79426d5 commit 610c63b

File tree

1 file changed

+17
-8
lines changed

1 file changed

+17
-8
lines changed

model.cpp

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2024,6 +2024,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
20242024
const size_t total_tensors_to_process = processed_tensor_storages.size();
20252025
const int64_t t_start = ggml_time_ms();
20262026
int last_n_threads = 1;
2027+
std::mutex tensor_backend_mutex;
20272028

20282029
for (size_t file_index = 0; file_index < file_paths_.size(); file_index++) {
20292030
std::string file_path = file_paths_[file_index];
@@ -2215,21 +2216,29 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
22152216
// copy to device memory
22162217
t1 = ggml_time_ms();
22172218
convert_time_ms.fetch_add(t1 - t0);
2218-
t0 = ggml_time_ms();
2219-
ggml_backend_tensor_set(dst_tensor, read_buffer.data(), 0, ggml_nbytes(dst_tensor));
2220-
t1 = ggml_time_ms();
2221-
copy_to_backend_time_ms.fetch_add(t1 - t0);
2219+
2220+
{
2221+
std::lock_guard<std::mutex> lock(tensor_backend_mutex);
2222+
t0 = ggml_time_ms();
2223+
ggml_backend_tensor_set(dst_tensor, read_buffer.data(), 0, ggml_nbytes(dst_tensor));
2224+
t1 = ggml_time_ms();
2225+
copy_to_backend_time_ms.fetch_add(t1 - t0);
2226+
}
22222227
} else {
22232228
// convert first, then copy to device memory
22242229

22252230
convert_buffer.resize(ggml_nbytes(dst_tensor));
22262231
convert_tensor((void*)read_buffer.data(), tensor_storage.type, (void*)convert_buffer.data(), dst_tensor->type, (int)tensor_storage.nelements() / (int)tensor_storage.ne[0], (int)tensor_storage.ne[0]);
22272232
t1 = ggml_time_ms();
22282233
convert_time_ms.fetch_add(t1 - t0);
2229-
t0 = ggml_time_ms();
2230-
ggml_backend_tensor_set(dst_tensor, convert_buffer.data(), 0, ggml_nbytes(dst_tensor));
2231-
t1 = ggml_time_ms();
2232-
copy_to_backend_time_ms.fetch_add(t1 - t0);
2234+
2235+
{
2236+
std::lock_guard<std::mutex> lock(tensor_backend_mutex);
2237+
t0 = ggml_time_ms();
2238+
ggml_backend_tensor_set(dst_tensor, convert_buffer.data(), 0, ggml_nbytes(dst_tensor));
2239+
t1 = ggml_time_ms();
2240+
copy_to_backend_time_ms.fetch_add(t1 - t0);
2241+
}
22332242
}
22342243
}
22352244
}

0 commit comments

Comments
 (0)