Skip to content

Commit b45cf0e

Browse files
dmlapSunMarc
andauthored
Guard against unset resolved_archive_file (#35628)
* archive_file may not be specified When loading a pre-trained model from a gguf file, resolved_archive_file may not be set. Guard against that case in the safetensors availability check. * Remap partial disk offload to cpu for GGUF files GGUF files don't support disk offload so attempt to remap them to the CPU when device_map is auto. If device_map is anything else but None, raise a NotImplementedError. * Don't remap auto device_map and raise RuntimeError If device_map=auto and modules are selected for disk offload, don't attempt to map them to any other device. Raise a runtime error when a GGUF model is configured to map any modules to disk. --------- Co-authored-by: Marc Sun <57196510+SunMarc@users.noreply.github.com>
1 parent 96f01a3 commit b45cf0e

File tree

2 files changed

+50
-1
lines changed

2 files changed

+50
-1
lines changed

src/transformers/modeling_utils.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4267,6 +4267,12 @@ def from_pretrained(
42674267
# check if we don't have tied param in different devices
42684268
check_tied_parameters_on_same_device(tied_params, device_map)
42694269

4270+
if gguf_path and device_map is not None and "disk" in device_map.values():
4271+
raise RuntimeError(
4272+
"One or more modules is configured to be mapped to disk. Disk offload is not supported for models "
4273+
"loaded from GGUF files."
4274+
)
4275+
42704276
if from_tf:
42714277
if resolved_archive_file.endswith(".index"):
42724278
# Load from a TensorFlow 1.X checkpoint - provided by original authors
@@ -4525,7 +4531,7 @@ def _load_pretrained_model(
45254531
archive_file = (
45264532
resolved_archive_file[0] if isinstance(resolved_archive_file, (list, tuple)) else resolved_archive_file
45274533
)
4528-
is_safetensors = archive_file.endswith(".safetensors")
4534+
is_safetensors = archive_file is not None and archive_file.endswith(".safetensors")
45294535
if offload_folder is None and not is_safetensors:
45304536
raise ValueError(
45314537
"The current `device_map` had weights offloaded to the disk. Please provide an `offload_folder`"

tests/quantization/ggml/test_ggml.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,49 @@ def test_q6_k_fp16(self):
219219
EXPECTED_TEXT = "Hello, World!\n\nStep 3: Add"
220220
self.assertEqual(tokenizer.decode(out[0], skip_special_tokens=True), EXPECTED_TEXT)
221221

222+
def test_gguf_errors_disk_offload(self):
223+
from collections import OrderedDict
224+
225+
q2_k_gguf_model_id = self.gguf_filename.format(quant_type=QuantType.Q2_K.name)
226+
with self.assertRaises(RuntimeError):
227+
AutoModelForCausalLM.from_pretrained(
228+
self.gguf_model_id,
229+
device_map=OrderedDict(
230+
[
231+
("model.embed_tokens", "cpu"),
232+
("lm_head", "cpu"),
233+
("model.layers.0", "cpu"),
234+
("model.layers.1", "cpu"),
235+
("model.layers.2", "cpu"),
236+
("model.layers.3", "cpu"),
237+
("model.layers.4", "cpu"),
238+
("model.layers.5", "cpu"),
239+
("model.layers.6", "cpu"),
240+
("model.layers.7", "cpu"),
241+
("model.layers.8", "cpu"),
242+
("model.layers.9", "cpu"),
243+
("model.layers.10", "disk"),
244+
("model.layers.11", "disk"),
245+
("model.layers.12", "disk"),
246+
("model.layers.13", "disk"),
247+
("model.layers.14", "disk"),
248+
("model.layers.15", "disk"),
249+
("model.layers.16", "disk"),
250+
("model.layers.17", "disk"),
251+
("model.layers.18", "disk"),
252+
("model.layers.19", "disk"),
253+
("model.layers.20", "disk"),
254+
("model.layers.21", "disk"),
255+
("model.layers.22", "disk"),
256+
("model.norm", "disk"),
257+
("model.rotary_emb", "disk"),
258+
]
259+
),
260+
gguf_file=q2_k_gguf_model_id,
261+
offload_folder="offload",
262+
offload_state_dict=True,
263+
)
264+
222265

223266
@require_gguf
224267
@require_torch_gpu

0 commit comments

Comments
 (0)