ggml-org
diff --git a/‎convert-baichuan-hf-to-gguf.py‎
Lines changed: 1 addition & 1 deletion b/‎convert-baichuan-hf-to-gguf.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎convert-bloom-hf-to-gguf.py‎
Lines changed: 1 addition & 1 deletion b/‎convert-bloom-hf-to-gguf.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎convert-falcon-hf-to-gguf.py‎
Lines changed: 1 addition & 1 deletion b/‎convert-falcon-hf-to-gguf.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎convert-gptneox-hf-to-gguf.py‎
Lines changed: 1 addition & 1 deletion b/‎convert-gptneox-hf-to-gguf.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎convert-llama-ggml-to-gguf.py‎
Lines changed: 2 additions & 22 deletions b/‎convert-llama-ggml-to-gguf.py‎
Lines changed: 2 additions & 22 deletions
diff --git a/‎convert-mpt-hf-to-gguf.py‎
Lines changed: 1 addition & 1 deletion b/‎convert-mpt-hf-to-gguf.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎convert-persimmon-to-gguf.py‎
Lines changed: 1 addition & 1 deletion b/‎convert-persimmon-to-gguf.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎convert-starcoder-hf-to-gguf.py‎
Lines changed: 1 addition & 1 deletion b/‎convert-starcoder-hf-to-gguf.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎convert.py‎
Lines changed: 4 additions & 6 deletions b/‎convert.py‎
Lines changed: 4 additions & 6 deletions
diff --git a/‎examples/train-text-from-scratch/convert-train-checkpoint-to-gguf.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/train-text-from-scratch/convert-train-checkpoint-to-gguf.py‎
Lines changed: 1 addition & 1 deletion
@@ -16,7 +16,7 @@
 from sentencepiece import SentencePieceProcessor  # type: ignore[import]
 
 if 'NO_LOCAL_GGUF' not in os.environ:
-    sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
+    sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
 import gguf
 
 
 
@@ -17,7 +17,7 @@
 from transformers import AutoTokenizer  # type: ignore[import]
 
 if 'NO_LOCAL_GGUF' not in os.environ:
-    sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
+    sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
 import gguf
 
 
 
@@ -17,7 +17,7 @@
 from transformers import AutoTokenizer  # type: ignore[import]
 
 if 'NO_LOCAL_GGUF' not in os.environ:
-    sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
+    sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
 import gguf
 
 
 
@@ -16,7 +16,7 @@
 from transformers import AutoTokenizer  # type: ignore[import]
 
 if 'NO_LOCAL_GGUF' not in os.environ:
-    sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
+    sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
 import gguf
 
 
 
@@ -12,29 +12,9 @@
 
 import os
 if 'NO_LOCAL_GGUF' not in os.environ:
-    sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
+    sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
 import gguf
 
-# Note: Does not support GGML_QKK_64
-QK_K = 256
-# Items here are (block size, type size)
-GGML_QUANT_SIZES = {
-    gguf.GGMLQuantizationType.F32  : (1, 4),
-    gguf.GGMLQuantizationType.F16  : (1, 2),
-    gguf.GGMLQuantizationType.Q4_0 : (32, 2 + 16),
-    gguf.GGMLQuantizationType.Q4_1 : (32, 2 + 2 + 16),
-    gguf.GGMLQuantizationType.Q5_0 : (32, 2 + 4 + 16),
-    gguf.GGMLQuantizationType.Q5_1 : (32, 2 + 2 + 4 + 16),
-    gguf.GGMLQuantizationType.Q8_0 : (32, 2 + 32),
-    gguf.GGMLQuantizationType.Q8_1 : (32, 4 + 4 + 32),
-    gguf.GGMLQuantizationType.Q2_K : (256, 2 + 2 + QK_K // 16 + QK_K // 4),
-    gguf.GGMLQuantizationType.Q3_K : (256, 2 + QK_K // 4 + QK_K // 8 + 12),
-    gguf.GGMLQuantizationType.Q4_K : (256, 2 + 2 + QK_K // 2 + 12),
-    gguf.GGMLQuantizationType.Q5_K : (256, 2 + 2 + QK_K // 2 + QK_K // 8 + 12),
-    gguf.GGMLQuantizationType.Q6_K : (256, 2 + QK_K // 2 + QK_K // 4 + QK_K // 16),
-    gguf.GGMLQuantizationType.Q8_K : (256, 4 + QK_K + QK_K // 8),
-}
-
 class GGMLFormat(IntEnum):
     GGML = 0
     GGMF = 1
@@ -125,7 +105,7 @@ def load(self, data, offset):
         (n_dims, name_len, dtype) = struct.unpack('<3I', data[offset:offset + 12])
         assert n_dims >= 0 and n_dims <= 4, f'Invalid tensor dimensions {n_dims}'
         assert name_len < 4096, 'Absurd tensor name length'
-        quant = GGML_QUANT_SIZES.get(dtype)
+        quant = gguf.GGML_QUANT_SIZES.get(dtype)
         assert quant is not None, 'Unknown tensor type'
         (blksize, tysize) = quant
         offset += 12
 
@@ -16,7 +16,7 @@
 from transformers import AutoTokenizer  # type: ignore[import]
 
 if 'NO_LOCAL_GGUF' not in os.environ:
-    sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
+    sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
 import gguf
 
 
 
@@ -6,7 +6,7 @@
 from pathlib import Path
 from sentencepiece import SentencePieceProcessor
 if 'NO_LOCAL_GGUF' not in os.environ:
-    sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
+    sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
 import gguf
 
 def _flatten_dict(dct, tensors, prefix=None):
 
@@ -16,7 +16,7 @@
 from transformers import AutoTokenizer  # type: ignore[import]
 
 if 'NO_LOCAL_GGUF' not in os.environ:
-    sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
+    sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
 import gguf
 
 
 
@@ -3,11 +3,9 @@
 
 import argparse
 import concurrent.futures
-import copy
 import enum
 import faulthandler
 import functools
-import io
 import itertools
 import json
 import math
@@ -23,14 +21,14 @@
 from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
 from dataclasses import dataclass
 from pathlib import Path
-from typing import IO, TYPE_CHECKING, Any, Callable, Generator, Iterable, Literal, Sequence, TypeVar
+from typing import IO, TYPE_CHECKING, Any, Callable, Iterable, Literal, TypeVar
 
 import numpy as np
 from sentencepiece import SentencePieceProcessor  # type: ignore[import]
 
 import os
 if 'NO_LOCAL_GGUF' not in os.environ:
-    sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
+    sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
 import gguf
 
 if TYPE_CHECKING:
@@ -851,7 +849,7 @@ def add_meta_vocab(self, vocab: Vocab) -> None:
         elif isinstance(vocab, BpeVocab):
             self.gguf.add_tokenizer_model("gpt2")
         else:
-            raise ValueError(f'Unknown vocab type: Not BpeVocab or SentencePieceVocab')
+            raise ValueError('Unknown vocab type: Not BpeVocab or SentencePieceVocab')
         self.gguf.add_token_list(tokens)
         self.gguf.add_token_scores(scores)
         self.gguf.add_token_types(toktypes)
@@ -905,7 +903,7 @@ def maybe_do_quantize(item: tuple[DataType, NDArray]) -> NDArray:
         return dt.quantize(arr)
 
     @staticmethod
-    def write_all(fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: Vocab, svocab: gguf.SpecialVocab, concurrency: int = DEFAULT_CONCURRENCY, endianess=gguf.GGUFEndian.LITTLE) -> None:
+    def write_all(fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: Vocab, svocab: gguf.SpecialVocab, concurrency: int = DEFAULT_CONCURRENCY, endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None:
         check_vocab_size(params, vocab)
 
         of = OutputFile(fname_out, endianess=endianess)
 
@@ -9,7 +9,7 @@
 from pathlib import Path
 
 if 'NO_LOCAL_GGUF' not in os.environ:
-    sys.path.insert(1, str(Path(__file__).parent / '..' / '..' / 'gguf-py' / 'gguf'))
+    sys.path.insert(1, str(Path(__file__).parent / '..' / '..' / 'gguf-py'))
 import gguf
 
 # gguf constants