diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml index fd55a47cd80..2c7345d4f88 100644 --- a/docs/source/en/_toctree.yml +++ b/docs/source/en/_toctree.yml @@ -313,6 +313,8 @@ title: ErnieM - local: model_doc/esm title: ESM + - local: model_doc/falcon + title: Falcon - local: model_doc/flan-t5 title: FLAN-T5 - local: model_doc/flan-ul2 diff --git a/docs/source/en/model_doc/falcon.md b/docs/source/en/model_doc/falcon.md new file mode 100644 index 00000000000..9bf6c32a4ec --- /dev/null +++ b/docs/source/en/model_doc/falcon.md @@ -0,0 +1,84 @@ + + +# Falcon + +## Overview + +Falcon is a class of causal decoder-only models built by [TII](https://www.tii.ae/). The largest Falcon checkpoints +have been trained on >=1T tokens of text, with a particular emphasis on the [RefinedWeb](https://arxiv.org/abs/2306.01116) +corpus. They are made available under the Apache 2.0 license. + + +Falcon's architecture is modern and optimized for inference, with multi-query attention and support for efficient +attention variants like `FlashAttention`. Both 'base' models trained only as causal language models as well as +'instruct' models that have received further fine-tuning are available. + + +Falcon models are (as of 2023) some of the largest and most powerful open-source language models, +and consistently rank highly in the [OpenLLM leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard). + +## Converting custom checkpoints + + + +Falcon models were initially added to the Hugging Face Hub as custom code checkpoints. However, Falcon is now fully +supported in the Transformers library. If you fine-tuned a model from a custom code checkpoint, we recommend converting +your checkpoint to the new in-library format, as this should give significant improvements to stability and +performance, especially for generation, as well as removing the need to use `trust_remote_code=True`! + + + +You can convert custom code checkpoints to full Transformers checkpoints using the `convert_custom_code_checkpoint.py` +script located in the +[Falcon model directory](https://github.com/huggingface/transformers/tree/main/src/transformers/models/falcon) +of the Transformers library. To use this script, simply call it with +`python convert_custom_code_checkpoint.py --checkpoint_dir my_model`. This will convert your checkpoint in-place, and +you can immediately load it from the directory afterwards with e.g. `from_pretrained()`. If your model hasn't been +uploaded to the Hub, we recommend making a backup before attempting the conversion, just in case! + + +## FalconConfig + +[[autodoc]] FalconConfig + - all + +## FalconModel + +[[autodoc]] FalconModel + - forward + +## FalconForCausalLM + +[[autodoc]] FalconForCausalLM + - forward + +## FalconForSequenceClassification + +[[autodoc]] FalconForSequenceClassification + - forward + +## FalconForTokenClassification + +[[autodoc]] FalconForTokenClassification + - forward + +## FalconForQuestionAnswering + +[[autodoc]] FalconForQuestionAnswering + - forward + + diff --git a/src/transformers/models/falcon/convert_custom_code_checkpoint.py b/src/transformers/models/falcon/convert_custom_code_checkpoint.py new file mode 100644 index 00000000000..4687c49807f --- /dev/null +++ b/src/transformers/models/falcon/convert_custom_code_checkpoint.py @@ -0,0 +1,73 @@ +import json +from argparse import ArgumentParser +from pathlib import Path + + +""" +This script converts Falcon custom code checkpoints to modern Falcon checkpoints that use code in the Transformers +library. After conversion, performance (especially for generation) should improve and the checkpoint can be loaded +without needing trust_remote_code=True. +""" + +parser = ArgumentParser() +parser.add_argument( + "--checkpoint_dir", + type=Path, + required=True, + help="Directory containing a custom code checkpoint to convert to a modern Falcon checkpoint.", +) +args = parser.parse_args() + +if not args.checkpoint_dir.is_dir(): + raise ValueError("--checkpoint_dir argument should be a directory!") + +if ( + not (args.checkpoint_dir / "configuration_RW.py").is_file() + or not (args.checkpoint_dir / "modelling_RW.py").is_file() +): + raise ValueError( + "The model directory should contain configuration_RW.py and modelling_RW.py files! Are you sure this is a custom code checkpoint?" + ) +(args.checkpoint_dir / "configuration_RW.py").unlink() +(args.checkpoint_dir / "modelling_RW.py").unlink() + +config = args.checkpoint_dir / "config.json" +text = config.read_text() +text = text.replace("RWForCausalLM", "FalconForCausalLM") +text = text.replace("RefinedWebModel", "falcon") +text = text.replace("RefinedWeb", "falcon") +json_config = json.loads(text) +del json_config["auto_map"] + +if "n_head" in json_config: + json_config["num_attention_heads"] = json_config.pop("n_head") +if "n_layer" in json_config: + json_config["num_hidden_layers"] = json_config.pop("n_layer") +if "n_head_kv" in json_config: + json_config["num_kv_heads"] = json_config.pop("n_head_kv") + json_config["new_decoder_architecture"] = True +else: + json_config["new_decoder_architecture"] = False +bos_token_id = json_config.get("bos_token_id", 1) +eos_token_id = json_config.get("eos_token_id", 2) +config.unlink() +config.write_text(json.dumps(json_config, indent=2, sort_keys=True)) + +tokenizer_config = args.checkpoint_dir / "tokenizer_config.json" +if tokenizer_config.is_file(): + text = tokenizer_config.read_text() + json_config = json.loads(text) + if json_config["tokenizer_class"] == "PreTrainedTokenizerFast": + json_config["model_input_names"] = ["input_ids", "attention_mask"] + tokenizer_config.unlink() + tokenizer_config.write_text(json.dumps(json_config, indent=2, sort_keys=True)) + +generation_config_path = args.checkpoint_dir / "generation_config.json" +generation_dict = { + "_from_model_config": True, + "bos_token_id": bos_token_id, + "eos_token_id": eos_token_id, + "transformers_version": "4.33.0.dev0", +} +generation_config_path.write_text(json.dumps(generation_dict, indent=2, sort_keys=True)) +print("Done! Please double-check that the new checkpoint works as expected.")