@@ -19714,8 +19714,23 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
1971419714 gguf_free(c);
1971519715 }
1971619716
19717- LLAMA_LOG_INFO("%s: model size = %8.2f MB\n", __func__, total_size_org/1024.0/1024.0);
19718- LLAMA_LOG_INFO("%s: quant size = %8.2f MB\n", __func__, total_size_new/1024.0/1024.0);
19717+ LLAMA_LOG_INFO("\n===========================================\n");
19718+ LLAMA_LOG_INFO("%s: model size = %8.2f B\n", __func__, total_size_org/1.0);
19719+ LLAMA_LOG_INFO("%s: model size = %8.2f KB\n", __func__, total_size_org/1000.0);
19720+ LLAMA_LOG_INFO("%s: model size = %8.2f KiB\n", __func__, total_size_org/1024.0);
19721+ LLAMA_LOG_INFO("%s: model size = %8.2f MB\n", __func__, total_size_org/1000.0/1000.0);
19722+ LLAMA_LOG_INFO("%s: model size = %8.2f MiB\n", __func__, total_size_org/1024.0/1024.0);
19723+ LLAMA_LOG_INFO("%s: model size = %8.2f GB\n", __func__, total_size_org/1000.0/1000.0/1000.0);
19724+ LLAMA_LOG_INFO("%s: model size = %8.2f GiB\n", __func__, total_size_org/1024.0/1024.0/1024.0);
19725+ LLAMA_LOG_INFO("===========================================\n");
19726+ LLAMA_LOG_INFO("%s: quant size = %8.2f B\n", __func__, total_size_new/1.0);
19727+ LLAMA_LOG_INFO("%s: quant size = %8.2f KB\n", __func__, total_size_new/1000.0);
19728+ LLAMA_LOG_INFO("%s: quant size = %8.2f KiB\n", __func__, total_size_new/1024.0);
19729+ LLAMA_LOG_INFO("%s: quant size = %8.2f MB\n", __func__, total_size_new/1000.0/1000.0);
19730+ LLAMA_LOG_INFO("%s: quant size = %8.2f MiB\n", __func__, total_size_new/1024.0/1024.0);
19731+ LLAMA_LOG_INFO("%s: quant size = %8.2f GB\n", __func__, total_size_new/1000.0/1000.0/1000.0);
19732+ LLAMA_LOG_INFO("%s: quant size = %8.2f GiB\n", __func__, total_size_new/1024.0/1024.0/1024.0);
19733+ LLAMA_LOG_INFO("===========================================");
1971919734
1972019735 if (qs.n_fallback > 0) {
1972119736 LLAMA_LOG_WARN("%s: WARNING: %d of %d tensor(s) required fallback quantization\n",
0 commit comments