huggingface · ArthurZucker · Apr 5, 2024 · Apr 3, 2024 · Apr 5, 2024
@@ -87,7 +87,7 @@ def validate_environment(self, *args, **kwargs):
                     """
                     Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the
                     quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules
-                    in 32-bit, you need to set `load_in_8bit_fp32_cpu_offload=True` and pass a custom `device_map` to
+                    in 32-bit, you need to set `llm_int8_enable_fp32_cpu_offload=True` and pass a custom `device_map` to
                     `from_pretrained`. Check
                     https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu
                     for more details.

@@ -87,7 +87,7 @@ def validate_environment(self, *args, **kwargs):
                     """
                     Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the
                     quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules
-                    in 32-bit, you need to set `load_in_8bit_fp32_cpu_offload=True` and pass a custom `device_map` to
+                    in 32-bit, you need to set `llm_int8_enable_fp32_cpu_offload=True` and pass a custom `device_map` to
                     `from_pretrained`. Check
                     https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu
                     for more details.