@@ -6859,22 +6859,68 @@ static void llm_load_print_meta(llama_model_loader & ml, llama_model & model) {
68596859 LLAMA_LOG_INFO("%s: model type = %s\n", __func__, llama_model_type_name(model.type));
68606860 LLAMA_LOG_INFO("%s: model ftype = %s\n", __func__, llama_model_ftype_name(model.ftype).c_str());
68616861 if (ml.n_elements >= 1e12) {
6862- LLAMA_LOG_INFO("%s: model params = %.2f T\n", __func__, ml.n_elements*1e-12);
6862+ LLAMA_LOG_INFO("%s: model params = %.3f T\n", __func__, ml.n_elements*1e-12);
68636863 } else if (ml.n_elements >= 1e9) {
6864- LLAMA_LOG_INFO("%s: model params = %.2f B\n", __func__, ml.n_elements*1e-9);
6864+ LLAMA_LOG_INFO("%s: model params = %.3f B\n", __func__, ml.n_elements*1e-9);
68656865 } else if (ml.n_elements >= 1e6) {
6866- LLAMA_LOG_INFO("%s: model params = %.2f M\n", __func__, ml.n_elements*1e-6);
6866+ LLAMA_LOG_INFO("%s: model params = %.3f M\n", __func__, ml.n_elements*1e-6);
68676867 } else {
6868- LLAMA_LOG_INFO("%s: model params = %.2f K\n", __func__, ml.n_elements*1e-3);
6869- }
6870-
6871- LLAMA_LOG_INFO("%s: model size = %.2f Bytes (%.2f BPW) \n", __func__, ml.n_bytes/1.0, ml.n_bytes*8.0/ml.n_elements);
6872- LLAMA_LOG_INFO("%s: model size = %.2f KB (%.2f BPW) \n", __func__, ml.n_bytes/1000.0, ml.n_bytes*8.0/ml.n_elements);
6873- LLAMA_LOG_INFO("%s: model size = %.2f KiB (%.2f BPW) \n", __func__, ml.n_bytes/1024.0, ml.n_bytes*8.0/ml.n_elements);
6874- LLAMA_LOG_INFO("%s: model size = %.2f MB (%.2f BPW) \n", __func__, ml.n_bytes/1000.0/1000.0 , ml.n_bytes*8.0/ml.n_elements);
6875- LLAMA_LOG_INFO("%s: model size = %.2f MiB (%.2f BPW) \n", __func__, ml.n_bytes/1024.0/1024.0, ml.n_bytes*8.0/ml.n_elements);
6876- LLAMA_LOG_INFO("%s: model size = %.2f GB (%.2f BPW) \n", __func__, ml.n_bytes/1000.0/1000.0/1000.0, ml.n_bytes*8.0/ml.n_elements);
6877- LLAMA_LOG_INFO("%s: model size = %.2f GiB (%.2f BPW) \n", __func__, ml.n_bytes/1024.0/1024.0/1024.0, ml.n_bytes*8.0/ml.n_elements);
6868+ LLAMA_LOG_INFO("%s: model params = %.3f K\n", __func__, ml.n_elements*1e-3);
6869+ // LLAMA_LOG_INFO("%s: model params = %.2f T\n", __func__, ml.n_elements*1e-12);
6870+ // } else if (ml.n_elements >= 1e9) {
6871+ // LLAMA_LOG_INFO("%s: model params = %.2f B\n", __func__, ml.n_elements*1e-9);
6872+ // } else if (ml.n_elements >= 1e6) {
6873+ // LLAMA_LOG_INFO("%s: model params = %.2f M\n", __func__, ml.n_elements*1e-6);
6874+ // } else {
6875+ // LLAMA_LOG_INFO("%s: model params = %.2f K\n", __func__, ml.n_elements*1e-3);
6876+ }
6877+
6878+ LLAMA_LOG_INFO("%s: model size = %.2f Bytes (%.3f BPW) \n", __func__, ml.n_bytes/1.0, ml.n_bytes*8.0/ml.n_elements);
6879+ LLAMA_LOG_INFO("%s: model size = %.2f KB (%.3f BPW) \n", __func__, ml.n_bytes/1000.0, ml.n_bytes*8.0/ml.n_elements);
6880+ LLAMA_LOG_INFO("%s: model size = %.2f KiB (%.3f BPW) \n", __func__, ml.n_bytes/1024.0, ml.n_bytes*8.0/ml.n_elements);
6881+ LLAMA_LOG_INFO("%s: model size = %.2f MB (%.3f BPW) \n", __func__, ml.n_bytes/1000.0/1000.0 , ml.n_bytes*8.0/ml.n_elements);
6882+ LLAMA_LOG_INFO("%s: model size = %.2f MiB (%.3f BPW) \n", __func__, ml.n_bytes/1024.0/1024.0, ml.n_bytes*8.0/ml.n_elements);
6883+ LLAMA_LOG_INFO("%s: model size = %.2f GB (%.3f BPW) \n", __func__, ml.n_bytes/1000.0/1000.0/1000.0, ml.n_bytes*8.0/ml.n_elements);
6884+ LLAMA_LOG_INFO("%s: model size = %.2f GiB (%.3f BPW) \n", __func__, ml.n_bytes/1024.0/1024.0/1024.0, ml.n_bytes*8.0/ml.n_elements);
6885+
6886+ // if (ml.n_bytes < GiB) {
6887+ // LLAMA_LOG_INFO("%s: model size = %.3f MiB (%.3f BPW) \n", __func__, ml.n_bytes/1024.0/1024.0, ml.n_bytes*8.0/ml.n_elements);
6888+ // } else {
6889+ // LLAMA_LOG_INFO("%s: model size = %.3f GiB (%.3f BPW) \n", __func__, ml.n_bytes/1024.0/1024.0/1024.0, ml.n_bytes*8.0/ml.n_elements);
6890+ // }
6891+
6892+ {
6893+ auto n_bytes = ml.n_bytes;
6894+ auto n_elements = ml.n_elements;
6895+ auto meta_tke = ml.get_tensor_meta("token_embd.weight");
6896+ auto meta_out = ml.get_tensor_meta("output.weight");
6897+ if (meta_tke && meta_out) {
6898+ n_bytes -= ggml_nbytes(meta_tke);
6899+ n_elements -= ggml_nelements(meta_tke);
6900+ n_bytes -= ggml_nbytes(meta_out);
6901+ n_elements -= ggml_nelements(meta_out);
6902+
6903+ LLAMA_LOG_INFO("%s: repeating layers = %.2f Bytes (%.3f BPW) \n", __func__, n_bytes/1.0, n_bytes*8.0/n_elements);
6904+ LLAMA_LOG_INFO("%s: repeating layers = %.2f KB (%.3f BPW) \n", __func__, n_bytes/1000.0, n_bytes*8.0/n_elements);
6905+ LLAMA_LOG_INFO("%s: repeating layers = %.2f KiB (%.3f BPW) \n", __func__, n_bytes/1024.0, n_bytes*8.0/n_elements);
6906+ LLAMA_LOG_INFO("%s: repeating layers = %.2f MB (%.3f BPW) \n", __func__, n_bytes/1000.0/1000.0, n_bytes*8.0/n_elements);
6907+ LLAMA_LOG_INFO("%s: repeating layers = %.2f MiB (%.3f BPW) \n", __func__, n_bytes/1024.0/1024.0, n_bytes*8.0/n_elements);
6908+ LLAMA_LOG_INFO("%s: repeating layers = %.2f GB (%.3f BPW) \n", __func__, n_bytes/1000.0/1000.0/1000.0, n_bytes*8.0/n_elements);
6909+ LLAMA_LOG_INFO("%s: repeating layers = %.2f GiB (%.3f BPW) \n", __func__, n_bytes/1024.0/1024.0/1024.0, n_bytes*8.0/n_elements);
6910+
6911+ // if (n_bytes < GiB) {
6912+ // LLAMA_LOG_INFO("%s: repeating layers = %.3f MiB (%.3f BPW", __func__, n_bytes/1024.0/1024.0, n_bytes*8.0/n_elements);
6913+ // } else {
6914+ // LLAMA_LOG_INFO("%s: repeating layers = %.3f GiB (%.3f BPW", __func__, n_bytes/1024.0/1024.0/1024.0, n_bytes*8.0/n_elements);
6915+ // }
6916+
6917+ if (ml.n_elements >= 1e9) {
6918+ LLAMA_LOG_INFO(", %.3f B parameters)\n", n_elements*1e-9);
6919+ } else {
6920+ LLAMA_LOG_INFO(", %.3f M parameters)\n", n_elements*1e-6);
6921+ }
6922+ }
6923+ }
68786924
68796925 // general kv
68806926 LLAMA_LOG_INFO("%s: general.name = %s\n", __func__, model.name.c_str());
0 commit comments