diff --git a/src/peft/peft_model.py b/src/peft/peft_model.py index cb9d3bf159..c2d0275afe 100644 --- a/src/peft/peft_model.py +++ b/src/peft/peft_model.py @@ -31,6 +31,7 @@ from transformers.modeling_outputs import SequenceClassifierOutput, TokenClassifierOutput from transformers.utils import PushToHubMixin +from . import __version__ from .tuners import ( AdaLoraModel, AdaptionPromptModel, @@ -49,7 +50,7 @@ TaskType, _set_adapter, _set_trainable, - add_or_edit_model_card, + add_library_to_model_card, get_peft_model_state_dict, hub_file_exists, set_peft_model_state_dict, @@ -128,7 +129,7 @@ def save_pretrained(self, save_directory, safe_serialization=False, **kwargs): if os.path.isfile(save_directory): raise ValueError(f"Provided path ({save_directory}) should be a directory, not a file") os.makedirs(save_directory, exist_ok=True) - add_or_edit_model_card(save_directory) + self.create_or_update_model_card(save_directory) for adapter_name, peft_config in self.peft_config.items(): # save only the trainable weights @@ -526,6 +527,46 @@ def set_adapter(self, adapter_name): def active_peft_config(self): return self.peft_config[self.active_adapter] + def create_or_update_model_card(self, output_dir): + """ + Updates or create model card to include information about peft: + 1. Adds `peft` library tag + 2. Adds peft version + 3. Adds quantization information if it was used + """ + # Adds `peft` library tag + add_library_to_model_card(output_dir) + + with open(os.path.join(output_dir, "README.md"), "r") as f: + lines = f.readlines() + + quantization_config = None + if hasattr(self.config, "quantization_config"): + quantization_config = self.config.quantization_config.to_dict() + training_config_text = "" + # Adds quantization information if it was used + if quantization_config is not None: + training_config_text += "\nThe following `bitsandbytes` quantization config was used during training:\n" + training_config_text += "\n".join([f"- {name}: {value}" for name, value in quantization_config.items()]) + training_config_text += "\n" + + training_procedure_heading = "## Training procedure\n" + if training_procedure_heading in lines: + lines.insert(lines.index(training_procedure_heading) + 2, training_config_text) + else: + lines.append(f"{training_procedure_heading}\n{training_config_text}") + + # Adds peft version + framework_block_heading = "### Framework versions\n" + if framework_block_heading in lines: + lines.insert(lines.index(framework_block_heading) + 2, f"- PEFT {__version__}\n") + else: + lines.append(f"{framework_block_heading}\n\n- PEFT {__version__}\n") + + # write the lines back to README.md + with open(os.path.join(output_dir, "README.md"), "w") as f: + f.writelines(lines) + class PeftModelForSequenceClassification(PeftModel): """ diff --git a/src/peft/utils/__init__.py b/src/peft/utils/__init__.py index 90a93859c7..50e5da620b 100644 --- a/src/peft/utils/__init__.py +++ b/src/peft/utils/__init__.py @@ -27,7 +27,7 @@ WEIGHTS_NAME, SAFETENSORS_WEIGHTS_NAME, _set_trainable, - add_or_edit_model_card, + add_library_to_model_card, bloom_model_postprocess_past_key_value, prepare_model_for_int8_training, prepare_model_for_kbit_training, diff --git a/src/peft/utils/other.py b/src/peft/utils/other.py index dc531e7d9e..971689dafb 100644 --- a/src/peft/utils/other.py +++ b/src/peft/utils/other.py @@ -21,7 +21,7 @@ # Add or edit model card to have `library_name: peft` -def add_or_edit_model_card(output_dir): +def add_library_to_model_card(output_dir): if os.path.exists(os.path.join(output_dir, "README.md")): with open(os.path.join(output_dir, "README.md"), "r") as f: lines = f.readlines()