1
1
#!/usr/bin/env python3
2
2
from __future__ import annotations
3
3
4
+ import logging
4
5
import argparse
5
6
import concurrent .futures
6
7
import enum
@@ -637,7 +638,7 @@ def __repr__(self) -> str:
637
638
638
639
639
640
def permute (weights : NDArray , n_head : int , n_head_kv : int ) -> NDArray :
640
- # print ( "permute debug " + str(weights.shape[0]) + " x " + str(weights.shape[1]) + " nhead " + str(n_head) + " nheadkv " + str(n_kv_head) )
641
+ # logging.info ( "permute debug " + str(weights.shape[0]) + " x " + str(weights.shape[1]) + " nhead " + str(n_head) + " nheadkv " + str(n_kv_head) )
641
642
if n_head_kv is not None and n_head != n_head_kv :
642
643
n_head = n_head_kv
643
644
return (weights .reshape (n_head , 2 , weights .shape [0 ] // n_head // 2 , * weights .shape [1 :])
@@ -1026,12 +1027,12 @@ def check_vocab_size(params: Params, vocab: BaseVocab, pad_vocab: bool = False)
1026
1027
1027
1028
# Check for a vocab size mismatch
1028
1029
if params .n_vocab == vocab .vocab_size :
1029
- print ("Ignoring added_tokens.json since model matches vocab size without it." )
1030
+ logging . warning ("Ignoring added_tokens.json since model matches vocab size without it." )
1030
1031
return
1031
1032
1032
1033
if pad_vocab and params .n_vocab > vocab .vocab_size :
1033
1034
pad_count = params .n_vocab - vocab .vocab_size
1034
- print (
1035
+ logging . debug (
1035
1036
f"Padding vocab with { pad_count } token(s) - <dummy00001> through <dummy{ pad_count :05} >"
1036
1037
)
1037
1038
for i in range (1 , pad_count + 1 ):
@@ -1159,7 +1160,7 @@ def write_tensor_data(self, ftype: GGMLFileType, model: LazyModel, concurrency:
1159
1160
elapsed = time .time () - start
1160
1161
size = ' x ' .join (f"{ dim :6d} " for dim in lazy_tensor .shape )
1161
1162
padi = len (str (len (model )))
1162
- print (
1163
+ logging . info (
1163
1164
f"[{ i + 1 :{padi }d} /{ len (model )} ] Writing tensor { name :38s} | size { size :16} | type { lazy_tensor .data_type .name :4} | T+{ int (elapsed ):4} "
1164
1165
)
1165
1166
self .gguf .write_tensor_data (ndarray )
@@ -1274,12 +1275,12 @@ def convert_model_names(model: LazyModel, params: Params, skip_unknown: bool) ->
1274
1275
# HF models permut or pack some of the tensors, so we need to undo that
1275
1276
for i in itertools .count ():
1276
1277
if f"model.layers.{ i } .self_attn.q_proj.weight" in model :
1277
- print (f"Permuting layer { i } " )
1278
+ logging . debug (f"Permuting layer { i } " )
1278
1279
tmp [f"model.layers.{ i } .self_attn.q_proj.weight" ] = permute_lazy (model [f"model.layers.{ i } .self_attn.q_proj.weight" ], params .n_head , params .n_head )
1279
1280
tmp [f"model.layers.{ i } .self_attn.k_proj.weight" ] = permute_lazy (model [f"model.layers.{ i } .self_attn.k_proj.weight" ], params .n_head , params .n_head_kv )
1280
1281
# tmp[f"model.layers.{i}.self_attn.v_proj.weight"] = model[f"model.layers.{i}.self_attn.v_proj.weight"]
1281
1282
elif f"model.layers.{ i } .self_attn.W_pack.weight" in model :
1282
- print (f"Unpacking and permuting layer { i } " )
1283
+ logging . debug (f"Unpacking and permuting layer { i } " )
1283
1284
tmp [f"model.layers.{ i } .self_attn.q_proj.weight" ] = permute_part_lazy (model [f"model.layers.{ i } .self_attn.W_pack.weight" ], 0 , params .n_head , params .n_head )
1284
1285
tmp [f"model.layers.{ i } .self_attn.k_proj.weight" ] = permute_part_lazy (model [f"model.layers.{ i } .self_attn.W_pack.weight" ], 1 , params .n_head , params .n_head_kv )
1285
1286
tmp [f"model.layers.{ i } .self_attn.v_proj.weight" ] = part_lazy (model [f"model.layers.{ i } .self_attn.W_pack.weight" ], 2 )
@@ -1292,15 +1293,15 @@ def convert_model_names(model: LazyModel, params: Params, skip_unknown: bool) ->
1292
1293
tensor_type , name_new = tmap .get_type_and_name (name , try_suffixes = (".weight" , ".bias" )) or (None , None )
1293
1294
if name_new is None :
1294
1295
if skip_unknown :
1295
- print (f"Unexpected tensor name: { name } - skipping" )
1296
+ logging . warning (f"Unexpected tensor name: { name } - skipping" )
1296
1297
continue
1297
1298
raise ValueError (f"Unexpected tensor name: { name } . Use --skip-unknown to ignore it (e.g. LLaVA)" )
1298
1299
1299
1300
if tensor_type in should_skip :
1300
- print (f"skipping tensor { name_new } " )
1301
+ logging . debug (f"skipping tensor { name_new } " )
1301
1302
continue
1302
1303
1303
- print (f"{ name :48s} -> { name_new :40s} | { lazy_tensor .data_type .name :6s} | { lazy_tensor .shape } " )
1304
+ logging . debug (f"{ name :48s} -> { name_new :40s} | { lazy_tensor .data_type .name :6s} | { lazy_tensor .shape } " )
1304
1305
out [name_new ] = lazy_tensor
1305
1306
1306
1307
return out
@@ -1365,7 +1366,7 @@ def load_some_model(path: Path) -> ModelPlus:
1365
1366
paths = find_multifile_paths (path )
1366
1367
models_plus : list [ModelPlus ] = []
1367
1368
for path in paths :
1368
- print (f"Loading model file { path } " )
1369
+ logging . info (f"Loading model file { path } " )
1369
1370
models_plus .append (lazy_load_file (path ))
1370
1371
1371
1372
model_plus = merge_multifile_models (models_plus )
@@ -1406,7 +1407,7 @@ def _create_vocab_by_path(self, vocab_types: list[str]) -> Vocab:
1406
1407
else :
1407
1408
raise FileNotFoundError (f"Could not find a tokenizer matching any of { vocab_types } " )
1408
1409
1409
- print (f"Loaded vocab file { vocab .fname_tokenizer !r} , type { vocab .name !r} " )
1410
+ logging . info (f"Loaded vocab file { vocab .fname_tokenizer !r} , type { vocab .name !r} " )
1410
1411
return vocab
1411
1412
1412
1413
def load_vocab (self , vocab_types : list [str ] | None , model_parent_path : Path ) -> tuple [BaseVocab , gguf .SpecialVocab ]:
@@ -1466,8 +1467,18 @@ def main(args_in: list[str] | None = None) -> None:
1466
1467
parser .add_argument ("--big-endian" , action = "store_true" , help = "model is executed on big endian machine" )
1467
1468
parser .add_argument ("--pad-vocab" , action = "store_true" , help = "add pad tokens when model vocab expects more than tokenizer metadata provides" )
1468
1469
parser .add_argument ("--skip-unknown" , action = "store_true" , help = "skip unknown tensor names instead of failing" )
1470
+ parser .add_argument ("--verbose" , action = "store_true" , help = "increase output verbosity" )
1469
1471
1470
1472
args = parser .parse_args (args_in )
1473
+
1474
+ if args .dump_single or args .dump :
1475
+ # Avoid printing anything besides the dump output
1476
+ logging .basicConfig (level = logging .CRITICAL )
1477
+ elif args .verbose :
1478
+ logging .basicConfig (level = logging .DEBUG )
1479
+ else :
1480
+ logging .basicConfig (level = logging .INFO )
1481
+
1471
1482
if args .no_vocab and args .vocab_only :
1472
1483
raise ValueError ("--vocab-only does not make sense with --no-vocab" )
1473
1484
@@ -1484,6 +1495,7 @@ def main(args_in: list[str] | None = None) -> None:
1484
1495
if args .dump :
1485
1496
do_dump_model (model_plus )
1486
1497
return
1498
+
1487
1499
endianess = gguf .GGUFEndian .LITTLE
1488
1500
if args .big_endian :
1489
1501
endianess = gguf .GGUFEndian .BIG
@@ -1506,7 +1518,7 @@ def main(args_in: list[str] | None = None) -> None:
1506
1518
"q8_0" : GGMLFileType .MostlyQ8_0 ,
1507
1519
}[args .outtype ]
1508
1520
1509
- print (f"params = { params } " )
1521
+ logging . info (f"params = { params } " )
1510
1522
1511
1523
model_parent_path = model_plus .paths [0 ].parent
1512
1524
vocab_path = Path (args .vocab_dir or args .model or model_parent_path )
@@ -1521,27 +1533,26 @@ def main(args_in: list[str] | None = None) -> None:
1521
1533
outfile = args .outfile
1522
1534
OutputFile .write_vocab_only (outfile , params , vocab , special_vocab ,
1523
1535
endianess = endianess , pad_vocab = args .pad_vocab )
1524
- print (f"Wrote { outfile } " )
1536
+ logging . info (f"Wrote { outfile } " )
1525
1537
return
1526
1538
1527
1539
if model_plus .vocab is not None and args .vocab_dir is None and not args .no_vocab :
1528
1540
vocab = model_plus .vocab
1529
1541
1530
- print (f"Vocab info: { vocab } " )
1531
- print (f"Special vocab info: { special_vocab } " )
1532
-
1542
+ logging .info (f"Vocab info: { vocab } " )
1543
+ logging .info (f"Special vocab info: { special_vocab } " )
1533
1544
model = model_plus .model
1534
1545
model = convert_model_names (model , params , args .skip_unknown )
1535
1546
ftype = pick_output_type (model , args .outtype )
1536
1547
model = convert_to_output_type (model , ftype )
1537
1548
outfile = args .outfile or default_outfile (model_plus .paths , ftype )
1538
1549
1539
1550
params .ftype = ftype
1540
- print (f"Writing { outfile } , format { ftype } " )
1551
+ logging . info (f"Writing { outfile } , format { ftype } " )
1541
1552
1542
1553
OutputFile .write_all (outfile , ftype , params , model , vocab , special_vocab ,
1543
1554
concurrency = args .concurrency , endianess = endianess , pad_vocab = args .pad_vocab )
1544
- print (f"Wrote { outfile } " )
1555
+ logging . info (f"Wrote { outfile } " )
1545
1556
1546
1557
1547
1558
if __name__ == '__main__' :
0 commit comments