diff --git a/llama.cpp b/llama.cpp index e74fd7234eb84..6fbfeca54dfac 100644 --- a/llama.cpp +++ b/llama.cpp @@ -7655,18 +7655,21 @@ static void llama_convert_tensor_internal( return; } - auto block_size = tensor->type == GGML_TYPE_F16 ? 1 : (size_t)ggml_blck_size(tensor->type); - auto block_size_bytes = ggml_type_size(tensor->type); + size_t block_size = tensor->type == GGML_TYPE_F16 ? 1 : (size_t)ggml_blck_size(tensor->type); + size_t block_size_bytes = ggml_type_size(tensor->type); GGML_ASSERT(nelements % block_size == 0); - auto nblocks = nelements / block_size; - auto blocks_per_thread = nblocks / nthread; - auto spare_blocks = nblocks - (blocks_per_thread * nthread); // if blocks aren't divisible by thread count - - for (auto tnum = 0, in_buff_offs = 0, out_buff_offs = 0; tnum < nthread; tnum++) { - auto thr_blocks = blocks_per_thread + (tnum == nthread - 1 ? spare_blocks : 0); // num blocks for this thread - auto thr_elems = thr_blocks * block_size; // number of elements for this thread - auto thr_block_bytes = thr_blocks * block_size_bytes; // number of input bytes for this thread + size_t nblocks = nelements / block_size; + size_t blocks_per_thread = nblocks / nthread; + size_t spare_blocks = nblocks - (blocks_per_thread * nthread); // if blocks aren't divisible by thread count + + size_t in_buff_offs = 0; + size_t out_buff_offs = 0; + + for (int tnum = 0; tnum < nthread; tnum++) { + size_t thr_blocks = blocks_per_thread + (tnum == nthread - 1 ? spare_blocks : 0); // num blocks for this thread + size_t thr_elems = thr_blocks * block_size; // number of elements for this thread + size_t thr_block_bytes = thr_blocks * block_size_bytes; // number of input bytes for this thread auto compute = [qtype] (ggml_type typ, uint8_t * inbuf, float * outbuf, int nels) { if (typ == GGML_TYPE_F16) {