diff --git a/all_configs.json b/all_configs.json index 4393ecd..d0dc5cb 100644 --- a/all_configs.json +++ b/all_configs.json @@ -1 +1 @@ -{"NousResearch/Llama-2-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "google/flan-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "google/flan-t5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "t5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "PascalNotin/Tranception_Small": {"architectures": ["TranceptionLMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 12, "vocab_size": 25}, "bigscience/bloom-560m": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 24, "num_attention_heads": 16, "vocab_size": 250880}, "distilgpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 6, "vocab_size": 50257}, "hf-internal-testing/tiny-random-gpt2": {"intermediate_size": 37, "n_embd": 32, "n_head": 4, "n_inner": null, "n_layer": 5, "vocab_size": 1000}, "tiiuae/falcon-7b": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "t5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "bigscience/bloomz-1b1": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 24, "num_attention_heads": 16, "vocab_size": 250880}, "gpt2-medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_layer": 24, "vocab_size": 50257}, "mrm8488/t5-base-finetuned-common_gen": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "lmsys/fastchat-t5-3b-v1.0": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32110}, "gpt2-xl": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_layer": 48, "vocab_size": 50257}, "meta-llama/Llama-2-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "hf-internal-testing/tiny-random-t5": {"d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 1103}, "EleutherAI/pythia-6.9b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "databricks/dolly-v2-3b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50280}, "hf-internal-testing/tiny-random-GPTNeoXForCausalLM": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 32, "intermediate_size": 37, "num_attention_heads": 4, "num_hidden_layers": 5, "vocab_size": 1024}, "meta-llama/Llama-2-7b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "microsoft/DialoGPT-medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_layer": 24, "vocab_size": 50257}, "google/mt5-base": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "TheBloke/Wizard-Vicuna-7B-Uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "hf-internal-testing/tiny-random-BloomModel": {"architectures": ["BloomModel"], "hidden_size": 32, "n_head": 4, "n_layer": 5, "vocab_size": 1024}, "google/flan-t5-xxl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "lmsys/vicuna-7b-v1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "huggyllama/llama-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "mrm8488/t5-base-finetuned-summarize-news": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "google/flan-t5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "tiiuae/falcon-40b-instruct": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "ramsrigouthamg/t5_sentence_paraphraser": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "flexudy/t5-base-multi-sentence-doctor": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "lewtun/tiny-random-mt5": {"architectures": ["MT5Model"], "d_ff": 1024, "d_model": 16, "num_heads": 4, "num_layers": 2, "vocab_size": 250112}, "gpt2-large": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_layer": 36, "vocab_size": 50257}, "valhalla/t5-base-e2e-qg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32102}, "sshleifer/tiny-gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2, "n_head": 2, "n_layer": 2, "vocab_size": 50257}, "fxmarty/tiny-llama-fast-tokenizer": {"architectures": ["LlamaForCausalLM"], "hidden_size": 16, "intermediate_size": 64, "num_attention_heads": 4, "num_hidden_layers": 2, "vocab_size": 32000}, "decapoda-research/llama-7b-hf": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "stabilityai/StableBeluga2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "syzymon/long_llama_3b": {"architectures": ["LongLlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "NousResearch/Llama-2-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Llama-2-70B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "tiiuae/falcon-7b-instruct": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "google/flan-t5-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "meta-llama/Llama-2-13b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "petals-team/StableBeluga2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "meta-llama/Llama-2-70b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "EleutherAI/gpt-neox-20b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 64, "num_hidden_layers": 44, "vocab_size": 50432}, "hf-internal-testing/tiny-random-GPTBigCodeForCausalLM": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "nferruz/ProtGPT2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50257}, "philschmid/flan-t5-xxl-sharded-fp16": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "HuggingFaceM4/tiny-random-LlamaForCausalLM": {"architectures": ["LlamaForCausalLM"], "hidden_size": 16, "intermediate_size": 64, "num_attention_heads": 4, "num_hidden_layers": 2, "vocab_size": 32000}, "hf-internal-testing/tiny-random-BloomForCausalLM": {"architectures": ["BloomForCausalLM"], "hidden_size": 32, "n_head": 4, "n_layer": 5, "vocab_size": 1024}, "hf-internal-testing/tiny-random-GPT2LMHeadModel": {"architectures": ["GPT2LMHeadModel"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "Vamsi/T5_Paraphrase_Paws": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "lmsys/vicuna-7b-v1.3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "meta-llama/Llama-2-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ybelkada/tiny-random-T5ForConditionalGeneration-calibrated": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 32100}, "prithivida/parrot_paraphraser_on_T5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "hf-internal-testing/tiny-random-GPTBigCodeModel": {"architectures": ["GPTBigCodeModel"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "hkunlp/instructor-large": {"architectures": ["T5EncoderModel"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "lmsys/vicuna-7b-v1.5": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "fabiochiu/t5-small-medium-title-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "TheBloke/Llama-2-7b-Chat-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "skt/kogpt2-base-v2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 51200}, "google/t5-v1_1-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Maykeye/TinyLLama-v0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 64, "intermediate_size": 256, "num_attention_heads": 16, "num_hidden_layers": 8, "vocab_size": 32000}, "TheBloke/Llama-2-13B-chat-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "tiiuae/falcon-40b": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "sonoisa/t5-base-japanese-title-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Rostlab/prot_t5_xl_uniref50": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 128}, "TheBloke/vicuna-7B-v1.3-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "daryl149/llama-2-7b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "stabilityai/StableBeluga-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "meta-llama/Llama-2-70b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TheBloke/MythoMax-L2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "czurita/nsql-llama-2-7B-sharded-bf16-2GB": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "vennify/t5-base-grammar-correction": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "czearing/story-to-title": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "google/byt5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3840, "d_model": 1536, "num_heads": 16, "num_layers": 36, "vocab_size": 384}, "HuggingFaceH4/starchat-beta": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49156}, "codellama/CodeLlama-34b-Instruct-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "openlm-research/open_llama_13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "optimum/t5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "t5-3b": {"architectures": ["T5WithLMHeadModel"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "humarin/chatgpt_paraphraser_on_T5_base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Gustavosta/MagicPrompt-Stable-Diffusion": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "bigscience/bloomz-7b1": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 30, "num_attention_heads": 32, "vocab_size": 250880}, "trl-internal-testing/tiny-random-GPTNeoXForCausalLM": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 32, "intermediate_size": 37, "num_attention_heads": 4, "num_hidden_layers": 5, "vocab_size": 1024}, "NousResearch/Yarn-Llama-2-7b-64k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "khhuang/zerofec-qa2claim-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b-v3": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "khhuang/zerofec-daqa-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "declare-lab/flan-alpaca-gpt4-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "codellama/CodeLlama-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "lmsys/vicuna-13b-v1.3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Rostlab/prot_t5_xl_half_uniref50-enc": {"architectures": ["T5EncoderModel"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 128}, "google/mt5-small": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "Salesforce/safety-flan-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "patrickvonplaten/t5-tiny-random": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 256, "d_model": 64, "num_heads": 2, "num_layers": 2, "vocab_size": 32128}, "google/flan-ul2": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 16384, "d_model": 4096, "num_heads": 16, "num_layers": 32, "vocab_size": 32128}, "EleutherAI/pythia-70m": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "bigscience/mt0-large": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 250112}, "stevhliu/my_awesome_billsum_model": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "EleutherAI/pythia-70m-deduped": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "lmsys/vicuna-13b-v1.5": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "PAIXAI/Astrid-1B": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 50304}, "Phind/Phind-CodeLlama-34B-Python-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "pszemraj/flan-t5-large-grammar-synthesis": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "Voicelab/vlt5-base-keywords": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50048}, "togethercomputer/Llama-2-7B-32K-Instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "huggyllama/llama-65b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "ai-forever/ruGPT-3.5-13B": {"architectures": ["GPT2LMHeadModel"], "n_embd": 5120, "n_head": 40, "n_inner": null, "n_layer": 40, "vocab_size": 50272}, "Einmalumdiewelt/T5-Base_GNAD": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "google/t5-v1_1-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "Austism/chronos-hermes-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "upstage/SOLAR-0-70b-16bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "bigscience/bloom-7b1": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "nlpai-lab/kullm-polyglot-12.8b-v2": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 30080}, "codellama/CodeLlama-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "hf-internal-testing/tiny-random-GPT2Model": {"architectures": ["GPT2Model"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "Gryphe/MythoMax-L2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "openlm-research/open_llama_3b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/Llama-2-70B-chat-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "trl-internal-testing/dummy-GPT2-correct-vocab": {"architectures": ["GPT2LMHeadModel"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 50257}, "charsiu/g2p_multilingual_byT5_small_100": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3584, "d_model": 1472, "num_heads": 6, "num_layers": 12, "vocab_size": 384}, "EleutherAI/pythia-160m": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50304}, "ElnaggarLab/ankh-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 48, "vocab_size": 144}, "trl-internal-testing/tiny-random-GPT2LMHeadModel": {"architectures": ["GPT2LMHeadModel"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "openlm-research/open_llama_7b_v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/OpenAssistant-Llama2-13B-Orca-v2-8K-3166-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "codellama/CodeLlama-7b-Instruct-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "WizardLM/WizardCoder-Python-34B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32001}, "pszemraj/grammar-synthesis-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "TheBloke/llama-2-70b-Guanaco-QLoRA-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "openlm-research/open_llama_3b_v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "IDEA-CCNL/Wenzhong-GPT2-110M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 12, "vocab_size": 50304}, "microsoft/DialoGPT-small": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50257}, "trl-internal-testing/tiny-random-BloomForCausalLM": {"architectures": ["BloomForCausalLM"], "hidden_size": 32, "n_head": 4, "n_layer": 5, "vocab_size": 1024}, "trl-internal-testing/tiny-random-T5ForConditionalGeneration": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 1302}, "hf-internal-testing/tiny-random-onnx-mt5": {"architectures": ["MT5Model"], "d_ff": 1024, "d_model": 16, "num_heads": 4, "num_layers": 2, "vocab_size": 250112}, "NousResearch/Llama-2-7b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "trl-internal-testing/tiny-random-MT5ForConditionalGeneration": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 256, "d_model": 64, "num_heads": 4, "num_layers": 8, "vocab_size": 5100}, "fxmarty/tiny-testing-gpt2-remote-code": {"architectures": ["GPT2CustomLMHeadModel"], "intermediate_size": 37, "n_embd": 32, "n_head": 4, "n_inner": null, "n_layer": 5, "vocab_size": 1000}, "castorini/monot5-base-msmarco-10k": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "microsoft/DialoGPT-large": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_layer": 36, "vocab_size": 50257}, "bigscience/bloomz-560m": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 24, "num_attention_heads": 16, "vocab_size": 250880}, "Open-Orca/OpenOrca-Platypus2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "google/byt5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3584, "d_model": 1472, "num_heads": 6, "num_layers": 12, "vocab_size": 384}, "bigscience/bloom-1b7": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 24, "num_attention_heads": 16, "vocab_size": 250880}, "elinas/chronos-13b-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "google/t5-efficient-tiny": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 256, "num_heads": 4, "num_layers": 4, "vocab_size": 32128}, "bigscience/bloom-1b1": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 24, "num_attention_heads": 16, "vocab_size": 250880}, "EleutherAI/polyglot-ko-1.3b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 30080}, "bigscience/bloom-3b": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 30, "num_attention_heads": 32, "vocab_size": 250880}, "TinyPixel/Llama-2-7B-bf16-sharded": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "edumunozsala/llama-2-7b-int4-python-code-20k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "yahma/llama-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "beomi/KoAlpaca-Polyglot-12.8B": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 30003}, "stanfordnlp/backpack-gpt2": {"architectures": ["BackpackGPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50264}, "prithivida/grammar_error_correcter_v1": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "lvkaokao/llama2-7b-hf-chat-lora-v3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "google/t5-v1_1-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "TheBloke/gpt4-alpaca-lora_mlp-65B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "google/mt5-large": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 250112}, "EleutherAI/pythia-2.8b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50304}, "cyberagent/open-calm-7b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 52224}, "lvwerra/gpt2-imdb": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50257}, "WizardLM/WizardLM-13B-V1.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "KoboldAI/GPT-NeoX-20B-Erebus": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 64, "num_hidden_layers": 44, "vocab_size": 50432}, "togethercomputer/RedPajama-INCITE-Instruct-3B-v1": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "aditi2222/automatic_title_generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "shibing624/chinese-alpaca-plus-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 49954}, "optimum/gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "togethercomputer/LLaMA-2-7B-32K": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "amazon/FalconLite": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65040}, "EleutherAI/polyglot-ko-5.8b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 16, "num_hidden_layers": 28, "vocab_size": 30080}, "databricks/dolly-v2-7b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50280}, "snrspeaks/t5-one-line-summary": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "lmsys/vicuna-33b-v1.3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/OpenOrca-Platypus2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "TheBloke/Llama-2-13B-Chat-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "sdadas/mt5-base-translator-pl-en": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250100}, "TheBloke/Llama-2-7b-chat-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "bigcode/gpt_bigcode-santacoder": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 49280}, "h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b-v2": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "TheBloke/airoboros-l2-70B-GPT4-2.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "lmsys/vicuna-13b-v1.5-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "bigcode/santacoder": {"architectures": ["GPT2LMHeadCustomModel"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 49280}, "togethercomputer/RedPajama-INCITE-Chat-3B-v1": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "ai-forever/mGPT": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2048, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 100000}, "openlm-research/open_llama_7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "decapoda-research/llama-13b-hf": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "OpenAssistant/codellama-13b-oasst-sft-v10": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "rinna/bilingual-gpt-neox-4b-instruction-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2816, "intermediate_size": 11264, "num_attention_heads": 22, "num_hidden_layers": 36, "vocab_size": 65536}, "KoboldAI/LLaMA2-13B-Holomax-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "MBZUAI/LaMini-T5-61M": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "google/t5-v1_1-xxl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "EleutherAI/pythia-1.4b-deduped": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "stabilityai/StableBeluga-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "tiiuae/falcon-rw-1b": {"architectures": ["FalconForCausalLM"], "hidden_size": 2048, "num_attention_heads": 32, "num_hidden_layers": 24, "vocab_size": 50304}, "ClueAI/ChatYuan-large-v2": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "af1tang/personaGPT": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50263}, "google/t5-large-lm-adapt": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "vilsonrodrigues/falcon-7b-instruct-sharded": {"architectures": ["FalconForCausalLM"], "hidden_size": 4544, "num_attention_heads": 71, "num_hidden_layers": 32, "vocab_size": 65024}, "petals-team/falcon-rw-1b": {"architectures": ["FalconForCausalLM"], "hidden_size": 2048, "num_attention_heads": 32, "num_hidden_layers": 24, "vocab_size": 50304}, "bigscience/T0_3B": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "TheTravellingEngineer/llama2-7b-hf-guanaco": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Salesforce/codet5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "EleutherAI/pythia-2.8b-deduped": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50304}, "The-Face-Of-Goonery/Huginn-13b-v1.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "FredZhang7/distilgpt2-stable-diffusion-v2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 6, "vocab_size": 50257}, "WizardLM/WizardCoder-15B-V1.0": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49153}, "EleutherAI/pythia-410m-deduped": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 1024, "intermediate_size": 4096, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "huggyllama/llama-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ybelkada/falcon-7b-sharded-bf16": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "MingZhong/unieval-sum": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "NousResearch/Nous-Hermes-Llama2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "csebuetnlp/mT5_multilingual_XLSum": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "hkunlp/instructor-xl": {"architectures": ["T5EncoderModel"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "h2oai/h2ogpt-4096-llama2-13b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "HuggingFaceH4/tiny-random-LlamaForCausalLM": {"architectures": ["LlamaForCausalLM"], "hidden_size": 16, "intermediate_size": 64, "num_attention_heads": 4, "num_hidden_layers": 2, "vocab_size": 32000}, "EleutherAI/polyglot-ko-12.8b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 30080}, "databricks/dolly-v2-12b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50280}, "mrm8488/t5-base-finetuned-span-sentiment-extraction": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "WizardLM/WizardLM-70B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32001}, "codellama/CodeLlama-34b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "Salesforce/codet5-base-multi-sum": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "MBZUAI/LaMini-T5-738M": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "codellama/CodeLlama-13b-Instruct-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "h2oai/h2ogpt-4096-llama2-7b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50288}, "bigscience/bloom": {"architectures": ["BloomForCausalLM"], "n_layer": 70, "num_attention_heads": 112, "vocab_size": 250880}, "TigerResearch/tigerbot-13b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 60928}, "TheBloke/airoboros-l2-70B-gpt4-1.4.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Austism/chronos-hermes-13b-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "snrspeaks/KeyPhraseTransformer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Rocketknight1/tiny-random-falcon-7b": {"architectures": ["FalconForCausalLM"], "hidden_size": 1136, "num_attention_heads": 71, "num_hidden_layers": 2, "vocab_size": 65024}, "hf-internal-testing/tiny-random-T5Model": {"architectures": ["T5Model"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 32100}, "sambanovasystems/BLOOMChat-176B-v1": {"architectures": ["BloomForCausalLM"], "hidden_size": 14336, "n_head": 112, "n_layer": 70, "vocab_size": 250880}, "huggyllama/llama-30b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "elyza/ELYZA-japanese-Llama-2-7b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "lcw99/t5-base-korean-text-summary": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50358}, "it5/it5-base-news-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32103}, "uer/gpt2-chinese-cluecorpussmall": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 21128}, "t5-11b": {"architectures": ["T5WithLMHeadModel"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "KoboldAI/LLaMA2-13B-Holomax": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Llama-2-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "bigscience/bloomz-3b": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 30, "num_attention_heads": 32, "vocab_size": 250880}, "lmsys/vicuna-7b-v1.5-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "sonoisa/t5-base-japanese": {"architectures": ["T5Model"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "line-corporation/japanese-large-lm-3.6b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 3072, "intermediate_size": 12288, "num_attention_heads": 32, "num_hidden_layers": 30, "vocab_size": 51200}, "TheBloke/Llama-2-7B-32K-Instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "EleutherAI/pythia-410m": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 1024, "intermediate_size": 4096, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "NousResearch/Llama-2-70b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TheBloke/falcon-7b-instruct-GPTQ": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "eachadea/vicuna-13b-1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "beomi/llama-2-ko-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 46336}, "TheBloke/falcon-40b-instruct-GPTQ": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "TheBloke/Llama-2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "garage-bAInd/Platypus2-70B-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "rajkumarrrk/gpt2-fine-tuned-on-imdb-positive-reviews": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "cerebras/Cerebras-GPT-13B": {"architectures": ["GPT2Model"], "n_embd": 5120, "n_head": 40, "n_inner": 20480, "n_layer": 40, "vocab_size": 50257}, "rinna/japanese-gpt2-medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": 4096, "n_layer": 24, "vocab_size": 32000}, "bigscience/T0pp": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "Phind/Phind-CodeLlama-34B-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "beomi/kykim-gpt3-kor-small_based_on_gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 42000}, "Pi3141/DialoGPT-medium-elon-3": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "EleutherAI/pythia-1b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 50304}, "ai-forever/rugpt3large_based_on_gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "jondurbin/airoboros-l2-13b-gpt4-m2.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "codellama/CodeLlama-13b-Python-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "AUTOMATIC/promptgen-lexart": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 6, "vocab_size": 50257}, "Salesforce/codet5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "h2oai/h2ogpt-oig-oasst1-512-6_9b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "rinna/japanese-gpt-neox-3.6b-instruction-ppo": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2816, "intermediate_size": 11264, "num_attention_heads": 22, "num_hidden_layers": 36, "vocab_size": 32000}, "prithivida/informal_to_formal_styletransfer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "TheBloke/Wizard-Vicuna-13B-Uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "matsuo-lab/weblab-10b-instruction-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4864, "intermediate_size": 19456, "num_attention_heads": 38, "num_hidden_layers": 36, "vocab_size": 50277}, "succinctly/text2image-prompt-generator": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TheBloke/Llama-2-7B-Chat-GGML": {}, "TheBloke/Llama-2-70B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "sentence-transformers/gtr-t5-large": {"architectures": ["T5EncoderModel"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "h2oai/h2ogpt-gm-oasst1-en-2048-falcon-40b-v2": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "togethercomputer/RedPajama-INCITE-Base-3B-v1": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "rinna/bilingual-gpt-neox-4b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2816, "intermediate_size": 11264, "num_attention_heads": 22, "num_hidden_layers": 36, "vocab_size": 65536}, "TheBloke/Llama-2-13B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "pankajmathur/orca_mini_v3_70b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "OpenAssistant/llama2-13b-orca-8k-3319": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "TheBloke/StableBeluga-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "defog/sqlcoder": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49152}, "WizardLM/WizardCoder-Python-13B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "stabilityai/stablelm-tuned-alpha-3b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 16, "vocab_size": 50688}, "cyberagent/open-calm-small": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 52096}, "TheBloke/Wizard-Vicuna-30B-Uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/WizardLM-70B-V1.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32001}, "bigscience/bigscience-small-testing": {"architectures": ["BloomModel"], "hidden_size": 64, "n_head": 8, "n_inner": null, "n_layer": 2, "vocab_size": 250880}, "cyberagent/open-calm-1b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 52096}, "lamini/lamini_docs_finetuned": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "EnglishVoice/t5-base-uk-to-us-english": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "codellama/CodeLlama-7b-Python-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/WizardLM-13B-V1.2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "EleutherAI/pythia-160m-deduped": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50304}, "jphme/Llama-2-13b-chat-german": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "EleutherAI/pythia-12b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50688}, "TheBloke/WizardLM-1.0-Uncensored-Llama2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Salesforce/codet5p-220m": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "google/mt5-xl": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 250112}, "cerebras/Cerebras-GPT-111M": {"n_inner": 3072, "n_embd": 768, "n_head": 12, "n_layer": 10, "vocab_size": 50257}, "google/t5-v1_1-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TheBloke/vicuna-7B-v1.5-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "chavinlo/alpaca-native": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "kimnt93/kmv-7b-03": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "NumbersStation/nsql-llama-2-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "cerebras/Cerebras-GPT-1.3B": {"n_inner": 8192, "n_embd": 2048, "n_head": 16, "n_layer": 24, "vocab_size": 50257}, "trl-internal-testing/tiny-T5ForConditionalGeneration-correct-vocab": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 32128}, "akreal/tiny-random-LlamaForCausalLM": {"architectures": ["LlamaForCausalLM"], "hidden_size": 16, "intermediate_size": 64, "num_attention_heads": 4, "num_hidden_layers": 2, "vocab_size": 32000}, "akreal/tiny-random-BloomForCausalLM": {"architectures": ["BloomForCausalLM"], "hidden_size": 32, "n_head": 4, "n_layer": 5, "vocab_size": 1024}, "NousResearch/Nous-Hermes-llama-2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ai-forever/rugpt3small_based_on_gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50264}, "VMware/open-llama-7b-v2-open-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "robertmyers/targon-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "TheBloke/Nous-Hermes-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "TheBloke/WizardLM-33B-V1.0-Uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/WizardLM-7B-uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "ramsrigouthamg/t5_paraphraser": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "tinkoff-ai/ruDialoGPT-medium": {"n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50261}, "OpenAssistant/falcon-7b-sft-mix-2000": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65040}, "bigcode/tiny_starcoder_py": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 20, "vocab_size": 49152}, "rinna/japanese-gpt-1b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 44928}, "TheBloke/orca_mini_v3_70B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "UBC-NLP/turjuman": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 110080}, "h2oai/h2ogpt-4096-llama2-70b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Phind/Phind-CodeLlama-34B-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "elyza/ELYZA-japanese-Llama-2-7b-fast-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 45043}, "iarfmoose/t5-base-question-generator": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32102}, "TheBloke/Llama-2-7B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "mrm8488/t5-base-finetuned-emotion": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "hkunlp/instructor-base": {"architectures": ["T5EncoderModel"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "fxmarty/onnx-tiny-random-gpt2-without-merge": {"architectures": ["GPT2Model"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "fxmarty/onnx-tiny-random-gpt2-with-merge": {"architectures": ["GPT2Model"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "microsoft/GODEL-v1_1-large-seq2seq": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32102}, "rinna/japanese-gpt-neox-3.6b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2816, "intermediate_size": 11264, "num_attention_heads": 22, "num_hidden_layers": 36, "vocab_size": 32000}, "cyberagent/open-calm-3b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 52224}, "eachadea/vicuna-7b-1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "beomi/KoAlpaca-Polyglot-5.8B": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 16, "num_hidden_layers": 28, "vocab_size": 30080}, "grammarly/coedit-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "TheBloke/Platypus2-70B-Instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "codellama/CodeLlama-34b-Python-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "noamwies/llama-test-gqa-with-better-transformer": {"architectures": ["LlamaForCausalLM"], "hidden_size": 128, "intermediate_size": 344, "num_attention_heads": 8, "num_hidden_layers": 2, "vocab_size": 2000}, "bigscience/bloomz-7b1-mt": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 30, "num_attention_heads": 32, "vocab_size": 250880}, "Salesforce/codet5p-770m": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "OpenAssistant/pythia-12b-sft-v8-7k-steps": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50288}, "augtoma/qCammel-70-x": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "NousResearch/Llama-2-13b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "plguillou/t5-base-fr-sum-cnndm": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32102}, "WeOpenML/PandaLM-7B-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "VMware/open-llama-7b-open-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "pankajmathur/orca_mini_v3_7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "google/t5-xl-lm-adapt": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "LinkSoul/Chinese-Llama-2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "line-corporation/japanese-large-lm-3.6b-instruction-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 3072, "intermediate_size": 12288, "num_attention_heads": 32, "num_hidden_layers": 30, "vocab_size": 51200}, "OpenAssistant/oasst-sft-1-pythia-12b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50288}, "ehartford/WizardLM-7B-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "upstage/llama-30b-instruct-2048": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "cyberagent/open-calm-large": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 1536, "intermediate_size": 6144, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 52096}, "Gryphe/MythoLogic-L2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "eenzeenee/t5-small-korean-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 50358}, "google/t5-xxl-lm-adapt": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "mywateriswet/ShuanBot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "hf-internal-testing/tiny-random-bloom": {"architectures": ["BloomModel"], "hidden_size": 64, "n_head": 8, "n_inner": null, "n_layer": 2, "vocab_size": 250880}, "TheBloke/Llama-2-13B-chat-GGML": {}, "decapoda-research/llama-30b-hf": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "lmsys/longchat-7b-v1.5-32k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ziqingyang/chinese-alpaca-2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 55296}, "nlpai-lab/kullm-polyglot-5.8b-v2": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 16, "num_hidden_layers": 28, "vocab_size": 30080}, "google/byt5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3968, "d_model": 1536, "num_heads": 12, "num_layers": 18, "vocab_size": 384}, "stabilityai/stablelm-tuned-alpha-7b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 48, "num_hidden_layers": 16, "vocab_size": 50432}, "PygmalionAI/pygmalion-1.3b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "stanford-crfm/BioMedLM": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2560, "n_head": 20, "n_inner": null, "n_layer": 32, "vocab_size": 28896}, "PY007/TinyLlama-1.1B-step-50K-105b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 2048, "intermediate_size": 5632, "num_attention_heads": 32, "num_hidden_layers": 22, "vocab_size": 32000}, "georgesung/llama2_7b_chat_uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "bigscience/mt0-small": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "TheBloke/WizardCoder-15B-1.0-GPTQ": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49153}, "google/t5-base-lm-adapt": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "OpenAssistant/falcon-40b-sft-top1-560": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65040}, "TheBloke/WizardLM-30B-Uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "TheBloke/WizardCoder-Python-34B-V1.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32001}, "garage-bAInd/Camel-Platypus2-70B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "DeepFloyd/t5-v1_1-xxl": {"architectures": ["T5EncoderModel"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "EleutherAI/pythia-1b-deduped": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 50304}, "TheBloke/CodeLlama-7B-Instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "kfkas/Llama-2-ko-7b-Chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 46336}, "valhalla/t5-small-qa-qg-hl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32102}, "FlagAlpha/Llama2-Chinese-13b-Chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Open-Orca/OpenOrcaxOpenChat-Preview2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "trl-internal-testing/tiny-random-LlamaForCausalLM": {"architectures": ["LlamaForCausalLM"], "hidden_size": 16, "intermediate_size": 64, "num_attention_heads": 4, "num_hidden_layers": 2, "vocab_size": 32000}, "abhishek/llama-2-7b-hf-small-shards": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "togethercomputer/RedPajama-INCITE-7B-Base": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "Salesforce/codegen25-7b-multi": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 51200}, "fabiochiu/t5-base-tag-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "MBZUAI/LaMini-Flan-T5-248M": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "bigscience/bloomz-1b7": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 24, "num_attention_heads": 16, "vocab_size": 250880}, "valhalla/t5-base-qg-hl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "Wi/gptp": {"architectures": ["GPTPModel"], "n_embd": 128, "n_head": 4, "n_inner": null, "n_layer": 4, "vocab_size": 1000}, "medalpaca/medalpaca-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "yentinglin/Taiwan-LLaMa-v1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "rinna/japanese-gpt-neox-small": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 44416}, "TheBloke/llama2_7b_chat_uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "EleutherAI/pythia-1.4b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "daryl149/llama-2-13b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "flax-community/gpt-2-spanish": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "KoboldAI/LLAMA2-13B-Holodeck-1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-13b-gpt4-1.4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "mrm8488/t5-base-finetuned-question-generation-ap": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "OpenBuddy/openbuddy-llama2-13b-v8.1-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 37632}, "EleutherAI/pythia-6.9b-deduped": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "tscholak/3vnuv1vf": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32102}, "OpenAssistant/llama2-70b-oasst-sft-v10": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32007}, "TheBloke/vicuna-13B-v1.5-16K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "OpenAssistant/falcon-7b-sft-top1-696": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65040}, "sentence-transformers/sentence-t5-large": {"architectures": ["T5EncoderModel"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TheBloke/Nous-Hermes-Llama2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "mesolitica/finetune-translation-t5-super-super-tiny-standard-bahasa-cased": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 512, "d_model": 128, "num_heads": 6, "num_layers": 2, "vocab_size": 32100}, "Henk717/spring-dragon": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "openchat/openchat_v3.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "WizardLM/WizardMath-70B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32002}, "potsawee/t5-large-generation-squad-QuestionAnswer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TheBloke/Phind-CodeLlama-34B-v2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "pankajmathur/orca_mini_3b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "fffrrt/ruGPT-3.5-13B-GPTQ": {"architectures": ["GPT2LMHeadModel"], "n_embd": 5120, "n_head": 40, "n_inner": null, "n_layer": 40, "vocab_size": 50272}, "kykim/gpt3-kor-small_based_on_gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 42000}, "PAIXAI/Astrid-1B-CPU": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 50304}, "ElnaggarLab/ankh-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3840, "d_model": 1536, "num_heads": 16, "num_layers": 48, "vocab_size": 144}, "togethercomputer/RedPajama-INCITE-7B-Chat": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "ramsrigouthamg/t5_squad_v1": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "KETI-AIR/ke-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 64128}, "sentence-transformers/gtr-t5-base": {"architectures": ["T5EncoderModel"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "ramsrigouthamg/t5-large-paraphraser-diverse-high-quality": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "rinna/japanese-gpt2-small": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 12, "vocab_size": 32000}, "rinna/bilingual-gpt-neox-4b-instruction-ppo": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2816, "intermediate_size": 11264, "num_attention_heads": 22, "num_hidden_layers": 36, "vocab_size": 65536}, "ramsrigouthamg/t5_boolean_questions": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "philschmid/flan-t5-base-samsum": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "google/t5-small-lm-adapt": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "matsuo-lab/weblab-10b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4864, "intermediate_size": 19456, "num_attention_heads": 38, "num_hidden_layers": 36, "vocab_size": 50304}, "stabilityai/stablecode-completion-alpha-3b-4k": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 49152}, "IDEA-CCNL/Ziya-LLaMA-7B-Reward": {"architectures": ["LlamaRewardModel"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "ichitaka/falcon-40b-instruct-8bit": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "TheBloke/WizardCoder-Python-13B-V1.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "togethercomputer/Pythia-Chat-Base-7B": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "TheBloke/wizardLM-7B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "AUTOMATIC/promptgen-majinai-unsafe": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 6, "vocab_size": 50257}, "pinkmanlove/llama-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "lmsys/longchat-13b-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "togethercomputer/RedPajama-INCITE-7B-Instruct": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "lmsys/vicuna-13b-v1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Salesforce/codet5-large": {"architectures": ["T5WithLMHeadModel"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "FredZhang7/anime-anything-promptgen-v2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 6, "vocab_size": 50257}, "Salesforce/xgen-7b-8k-inst": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 51200}, "jojo0217/step3_mk7": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 30008}, "EleutherAI/pythia-14m": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 128, "intermediate_size": 512, "num_attention_heads": 4, "num_hidden_layers": 6, "vocab_size": 50304}, "cerebras/Cerebras-GPT-590M": {"n_inner": 6144, "n_embd": 1536, "n_head": 12, "n_layer": 18, "vocab_size": 50257}, "dbmdz/german-gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50265}, "KoboldAI/GPT-NeoX-20B-Skein": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 64, "num_hidden_layers": 44, "vocab_size": 50432}, "beomi/polyglot-ko-12.8b-safetensors": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 30080}, "sentence-transformers/sentence-t5-base": {"architectures": ["T5EncoderModel"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "decapoda-research/llama-65b-hf": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "mesolitica/finetune-translation-t5-small-standard-bahasa-cased": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "marcsun13/bloom-1b7_with_lm_head": {"architectures": ["BloomForCausalLM"], "hidden_size": 2048, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "MBZUAI/LaMini-Flan-T5-783M": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "medalpaca/medalpaca-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "JulesBelveze/t5-small-headline-generator": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "Michau/t5-base-en-generate-headline": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "TheBloke/Falcon-180B-Chat-GPTQ": {"architectures": ["FalconForCausalLM"], "hidden_size": 14848, "num_attention_heads": 232, "num_hidden_layers": 80, "vocab_size": 65024}, "Salesforce/xgen-7b-8k-base": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 51200}, "ai-forever/ruT5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "KRAFTON/KORani-v3-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "bigscience/mt0-xxl-mt": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 250112}, "garage-bAInd/Stable-Platypus2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Wizard-Vicuna-13B-Uncensored-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "h2oai/h2ogpt-oasst1-512-12b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50688}, "Parth/result": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "declare-lab/flan-alpaca-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "sdadas/mt5-base-translator-en-pl": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250100}, "ziqingyang/chinese-llama-2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 55296}, "NousResearch/Nous-Hermes-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "pragmatic-programs/listener-suffix-idx-300k": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3584, "d_model": 1472, "num_heads": 6, "num_layers": 12, "vocab_size": 384}, "jinaai/jina-embedding-l-en-v1": {"architectures": ["T5EncoderModel"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "stabilityai/stablelm-base-alpha-3b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 16, "vocab_size": 50688}, "razent/SciFive-base-Pubmed_PMC": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "uer/gpt2-chinese-poem": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 22557}, "openchat/openchat_v3.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "IDEA-CCNL/Ziya-LLaMA-13B-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 39424}, "Sao10K/Mythical-Destroyer-V2-L2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "juierror/text-to-sql-with-table-schema": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "MingZhong/unieval-fact": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "TheBloke/vicuna-13B-v1.5-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "cerebras/Cerebras-GPT-256M": {"n_inner": 4352, "n_embd": 1088, "n_head": 17, "n_layer": 14, "vocab_size": 50257}, "declare-lab/flan-alpaca-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "ehartford/WizardLM-1.0-Uncensored-Llama2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "aubmindlab/aragpt2-base": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 64000}, "valhalla/t5-small-e2e-qg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32102}, "elinas/llama-7b-hf-transformers-4.29": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "lmsys/vicuna-13b-delta-v1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "garage-bAInd/Platypus2-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "PKU-Alignment/beaver-7b-v1.0-cost": {"architectures": ["LlamaModelForScore"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "allenai/unifiedqa-t5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "JackFram/llama-160m": {"architectures": ["LlamaForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 32000}, "daryl149/llama-2-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "akreal/tiny-random-t5": {"d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 99}, "cyberagent/open-calm-medium": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 1024, "intermediate_size": 4096, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 52096}, "The-Face-Of-Goonery/Huginn-13b-FP16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "facebook/tart-full-flan-t5-xl": {"architectures": ["EncT5ForSequenceClassification"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "csebuetnlp/banglat5_banglaparaphrase": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "FlagAlpha/Llama2-Chinese-7b-Chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "jerryjalapeno/Llama-2-1b-0-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 32000}, "NousResearch/Redmond-Puffin-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "bigscience/bloomz": {"architectures": ["BloomForCausalLM"], "n_layer": 70, "num_attention_heads": 112, "vocab_size": 250880}, "allenai/unifiedqa-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "WizardLM/WizardMath-7B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "pragmatic-programs/speaker-prefix-idx-300k": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3584, "d_model": 1472, "num_heads": 6, "num_layers": 12, "vocab_size": 384}, "TheBloke/CodeLlama-13B-Instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "TheBloke/Upstage-Llama-2-70B-instruct-v2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "pinkmanlove/llama-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "VietAI/envit5-translation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50048}, "cerebras/Cerebras-GPT-2.7B": {"n_inner": 10240, "n_embd": 2560, "n_head": 32, "n_layer": 32, "vocab_size": 50257}, "Open-Orca/LlongOrca-7B-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32003}, "hf-internal-testing/tiny-random-T5ForConditionalGeneration": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 32100}, "juierror/flan-t5-text2sql-with-schema-v2": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "BeIR/query-gen-msmarco-t5-base-v1": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "conceptofmind/LLongMA-2-13b-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "NousResearch/Yarn-Llama-2-13b-128k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "wangrongsheng/MiniGPT-4-LLaMA": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "hf-internal-testing/tiny-random-GPT2ForSequenceClassification": {"architectures": ["GPT2ForSequenceClassification"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "zenham/wail_m_e4_16h_2k": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "h2oai/h2ogpt-4096-llama2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ai-forever/FRED-T5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 50364}, "FreedomIntelligence/phoenix-inst-chat-7b": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "castorini/monot5-base-msmarco": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "minlik/chinese-alpaca-plus-7b-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 49954}, "joaogante/tiny-random-gpt2-with-generation-config": {"intermediate_size": 37, "n_embd": 32, "n_head": 4, "n_inner": null, "n_layer": 5, "vocab_size": 1000}, "neulab/gpt2-finetuned-wikitext103": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "jarradh/llama2_70b_chat_uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TigerResearch/tigerbot-13b-base": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 60928}, "rinna/japanese-gpt-neox-3.6b-instruction-sft-v2": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2816, "intermediate_size": 11264, "num_attention_heads": 22, "num_hidden_layers": 36, "vocab_size": 32000}, "bofenghuang/vigogne-2-7b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/stable-vicuna-13B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "aiplanet/effi-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-33b-gpt4-m2.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/orca_mini_v3_13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "HuggingFaceH4/starchat-alpha": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49156}, "WizardLM/WizardMath-13B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "upstage/Llama-2-70b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "anushehchaudry/llama-2-tiny-random": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8, "intermediate_size": 32, "num_attention_heads": 2, "num_hidden_layers": 1, "vocab_size": 32000}, "fangloveskari/ORCA_LLaMA_70B_QLoRA": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "HyperbeeAI/Tulpar-7b-v0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Llama-2-70B-Chat-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "csebuetnlp/mT5_m2m_crossSum_enhanced": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "TheBloke/Genz-70b-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "cerebras/Cerebras-GPT-6.7B": {"n_embd": 4096, "vocab_size": 50257, "n_layer": 32, "n_head": 32, "n_inner": 16384}, "ziqingyang/chinese-alpaca-2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 55296}, "google/t5-small-ssm-nq": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "EleutherAI/polyglot-ko-3.8b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 3072, "intermediate_size": 12288, "num_attention_heads": 24, "num_hidden_layers": 32, "vocab_size": 30080}, "kashif/stack-llama-2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "line-corporation/japanese-large-lm-1.7b": {"vocab_size": 51200, "n_embd": 2304, "n_layer": 24, "n_head": 24, "n_inner": 9216, "architectures": ["GPT2LMHeadModel"]}, "microsoft/codereviewer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32216}, "TheBloke/guanaco-7B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "circulus/Llama-2-7b-orca-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "FlagAlpha/Atom-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 65000}, "Tap-M/Luna-AI-Llama2-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "K024/mt5-zh-ja-en-trimmed": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 85292}, "deep-learning-analytics/automatic-title-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "luodian/llama-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "stabilityai/stablelm-base-alpha-7b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 48, "num_hidden_layers": 16, "vocab_size": 50432}, "OpenLemur/lemur-70b-chat-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32005}, "rahular/varta-t5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 128128}, "rinna/japanese-gpt-neox-3.6b-instruction-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2816, "intermediate_size": 11264, "num_attention_heads": 22, "num_hidden_layers": 36, "vocab_size": 32000}, "garage-bAInd/Platypus-30B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "WizardLM/WizardCoder-Python-7B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "chavinlo/gpt4-x-alpaca": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "sentence-transformers/gtr-t5-xl": {"architectures": ["T5EncoderModel"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "wangrongsheng/MiniGPT-4-LLaMA-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "EleutherAI/pythia-12b-deduped": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50688}, "unicamp-dl/translation-pt-en-t5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "bigscience/mt0-base": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "Pirr/pythia-13b-deduped-green_devil": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50277}, "trl-internal-testing/tiny-random-GPT2Model": {"architectures": ["GPT2Model"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "MBZUAI/LaMini-GPT-1.5B": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_inner": null, "n_layer": 48, "vocab_size": 50258}, "Universal-NER/UniNER-7B-all": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/koala-13B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Rostlab/prot_t5_xl_bfd": {"architectures": ["T5WithLMHeadModel"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 128}, "Voicelab/trurl-2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "explosion-testing/llama2-kv-sharing": {"architectures": ["LlamaForCausalLM"], "hidden_size": 256, "intermediate_size": 512, "num_attention_heads": 4, "num_hidden_layers": 5, "vocab_size": 1024}, "inpars/monot5-3b-inpars-v2-nq-promptagator": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "upstage/llama-65b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "microsoft/CodeGPT-small-py": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50001}, "VietAI/vit5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 36096}, "TheBloke/CodeUp-Llama-2-13B-Chat-HF-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/CodeLlama-34B-Instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "elyza/ELYZA-japanese-Llama-2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "jondurbin/airoboros-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/CodeLlama-7B-Python-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "FlagAlpha/Llama2-Chinese-13b-Chat-4bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/CodeLlama-13B-Python-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Enoch/llama-65b-hf": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "chargoddard/platypus-2-22b-relora": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "togethercomputer/GPT-NeoXT-Chat-Base-20B": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 64, "num_hidden_layers": 44, "vocab_size": 50432}, "porkorbeef/Llama-2-13b-sf": {"architectures": ["LlamaModel"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "ehartford/Wizard-Vicuna-13B-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "doas/test5": {"architectures": ["LlamaModel"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "klosax/open_llama_3b_350bt_preview": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "Writer/camel-5b-hf": {"architectures": ["GPT2LMHeadModel"], "n_embd": 4096, "n_head": 32, "n_inner": 16384, "n_layer": 24, "vocab_size": 50258}, "Filosofas/DialoGPT-medium-PALPATINE2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "nomic-ai/gpt4all-falcon": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "reciprocate/llama2-7b-gsm8k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/CodeLlama-13B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "pankajmathur/orca_mini_v3_13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "projecte-aina/aguila-7b": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 50257}, "TheBloke/WizardLM-13B-V1.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "MBZUAI/LaMini-GPT-124M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50258}, "google/mt5-xxl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 250112}, "MaRiOrOsSi/t5-base-finetuned-question-answering": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "satvikag/chatbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "LMFlow/Robin-7b-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "daryl149/llama-2-70b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "acrastt/Puma-3B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/orca_mini_v3_7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "taeminlee/kogpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50000}, "NousResearch/Llama-2-70b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "rinna/japanese-gpt2-xsmall": {"architectures": ["GPT2LMHeadModel"], "n_embd": 512, "n_head": 8, "n_inner": 2304, "n_layer": 6, "vocab_size": 32000}, "ziqingyang/chinese-llama-2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 55296}, "hf-internal-testing/tiny-random-t5-v1.1": {"d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 1103}, "pankajmathur/Lima_Unchained_70b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "chargoddard/llama2-22b-blocktriangular": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/CodeLlama-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "BeIR/query-gen-msmarco-t5-large-v1": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TheBloke/CodeLlama-13B-Instruct-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "acrastt/Marx-3B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "PygmalionAI/pygmalion-2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "shibing624/chinese-alpaca-plus-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 49954}, "TheBloke/OpenOrcaxOpenChat-Preview2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "syzymon/long_llama_3b_instruct": {"architectures": ["LongLlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "bofenghuang/vigogne-2-7b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Gustavosta/MagicPrompt-Dalle": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "muchad/idt5-qa-qg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 30002}, "TheBloke/vicuna-13b-v1.3.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TigerResearch/tigerbot-13b-base-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 60928}, "ehartford/WizardLM-13B-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "clibrain/Llama-2-7b-ft-instruct-es": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "google/t5_xxl_true_nli_mixture": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "unikei/t5-base-split-and-rephrase": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "microsoft/Promptist": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50257}, "stas/mt5-tiny-random": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 256, "d_model": 64, "num_heads": 4, "num_layers": 8, "vocab_size": 5100}, "AIDC-ai-business/Luban-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "microsoft/GODEL-v1_1-base-seq2seq": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32102}, "CalderaAI/30B-Lazarus": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "acrastt/Marx-3B-V2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "h2oai/h2ogpt-4096-llama2-70b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "ajibawa-2023/scarlett-33b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "jondurbin/airoboros-l2-70b-2.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "rubentito/vt5-base-spdocvqa": {"architectures": ["HF_VT5"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "aisquared/dlite-v2-774m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50260}, "elyza/ELYZA-japanese-Llama-2-7b-fast": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 45043}, "quantumaikr/llama-2-70b-fb16-korean": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TheBloke/CodeLlama-34B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "microsoft/DialogRPT-updown": {"architectures": ["GPT2ForSequenceClassification"], "n_embd": 1024, "n_head": 16, "n_layer": 24, "vocab_size": 50257}, "TheBloke/CodeLlama-34B-Instruct-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "garage-bAInd/Platypus2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "trl-internal-testing/tiny-BloomForCausalLM-correct-vocab": {"architectures": ["BloomForCausalLM"], "hidden_size": 32, "n_head": 4, "n_layer": 5, "vocab_size": 250880}, "TheBloke/Llama-2-7B-GGML": {}, "TheBloke/Wizard-Vicuna-7B-Uncensored-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "wenge-research/yayi-7b-llama2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32005}, "coffeeee/nsfw-story-generator2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "jondurbin/airoboros-33b-gpt4-2.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "totally-not-an-llm/EverythingLM-13b-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "datificate/gpt2-small-spanish": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "mrm8488/t5-base-finetuned-wikiSQL": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "bofenghuang/vigogne-2-13b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "OpenAssistant/stablelm-7b-sft-v7-epoch-3": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 48, "num_hidden_layers": 16, "vocab_size": 50288}, "bhenrym14/airoboros-33b-gpt4-1.4.1-lxctx-PI-16384-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "flozi00/codellama-34b-german-assistant-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "WizardLM/WizardCoder-1B-V1.0": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 49153}, "upstage/llama-30b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "ehartford/dolphin-llama2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Open-Orca/LlongOrca-13B-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32004}, "NousResearch/Nous-Hermes-Llama2-70b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32001}, "ml6team/mt5-small-german-query-generation": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "bigscience/mt0-xxl": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 250112}, "EleutherAI/pythia-2.8b-deduped-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50304}, "TheBloke/wizardLM-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "conceptofmind/LLongMA-2-7b-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "lmsys/vicuna-7b-delta-v1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "bofenghuang/vigogne-7b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "csebuetnlp/banglat5_nmt_en_bn": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "trl-internal-testing/tiny-random-T5Model": {"architectures": ["T5Model"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 1302}, "OpenBuddy/openbuddy-llama2-70b-v10.1-bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 37632}, "TheBloke/wizard-vicuna-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "JosephusCheung/Guanaco": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "openchat/opencoderplus": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49153}, "jacobmorrison/tk-instruct-large-lora-experiments": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "PygmalionAI/metharme-1.3b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "TheBloke/orca_mini_13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "EleutherAI/pythia-70m-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "project-baize/baize-v2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "line-corporation/japanese-large-lm-1.7b-instruction-sft": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2304, "n_head": 24, "n_inner": 9216, "n_layer": 24, "vocab_size": 51200}, "TheBloke/WizardLM-13B-V1-0-Uncensored-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/llama-2-70b-Guanaco-QLoRA-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "MBZUAI/LaMini-Flan-T5-77M": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "csebuetnlp/banglat5_nmt_bn_en": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Trelis/Llama-2-7b-chat-hf-function-calling-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ehartford/Wizard-Vicuna-7B-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "llSourcell/medllama2_7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "WizardLM/WizardLM-13B-V1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "Gryphe/MythoMix-L2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/StableBeluga2-70B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "VietAI/vit5-large-vietnews-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 36096}, "adasnew/t5-small-xsum": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "Intel/t5-small-xsum-int8-dynamic": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "daspartho/prompt-extend": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 52000}, "EleutherAI/pythia-160m-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50304}, "Ar4ikov/gpt2-650k-stable-diffusion-prompt-generator": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "ehartford/WizardLM-Uncensored-Falcon-7b": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65025}, "CobraMamba/mamba-gpt-3b-v3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/llama2_70b_chat_uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "ai-forever/FRED-T5-1.7B": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1536, "num_heads": 24, "num_layers": 24, "vocab_size": 50364}, "MBZUAI/LaMini-Cerebras-590M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 12, "n_inner": 6144, "n_layer": 18, "vocab_size": 50258}, "mrm8488/llama-2-coder-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "guardrail/llama-2-7b-guanaco-instruct-sharded": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "rinna/bilingual-gpt-neox-4b-8k": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2816, "intermediate_size": 11264, "num_attention_heads": 22, "num_hidden_layers": 36, "vocab_size": 65536}, "mrm8488/falcoder-7b": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "circulus/Llama-2-13b-orca-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "allenai/tk-instruct-3b-def": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32100}, "pierreguillou/gpt2-small-portuguese": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50257}, "junelee/wizard-vicuna-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "castorini/monot5-3b-msmarco-10k": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "TheBloke/Llama-2-70B-Chat-GGML": {}, "TheBloke/CodeLlama-7B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "yeontaek/llama-2-13B-ensemble-v5": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "ybelkada/flan-t5-xl-sharded-bf16": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "WizardLM/WizardCoder-3B-V1.0": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 2816, "n_head": 22, "n_inner": 11264, "n_layer": 36, "vocab_size": 49153}, "Langboat/mengzi-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "MBZUAI/LaMini-GPT-774M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50258}, "ToddGoldfarb/Cadet-Tiny": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "TigerResearch/tigerbot-7b-base": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 60928}, "UrukHan/t5-russian-spell": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "LinkSoul/Chinese-Llama-2-7b-4bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Vicuna-13B-CoT-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "EleutherAI/pythia-1.4b-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "MayaPH/GodziLLa2-70B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TheBloke/wizardLM-13B-1.0-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "Gryphe/MythoBoros-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "abacusai/Giraffe-v2-13b-32k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-l2-13b-gpt4-1.4.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "razent/SciFive-base-Pubmed": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "TehVenom/Pygmalion-13b-Merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "garage-bAInd/SuperPlatty-30B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "jondurbin/airoboros-l2-7b-gpt4-2.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Rostlab/ProstT5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 150}, "TheBloke/guanaco-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "JackFram/llama-68m": {"architectures": ["LlamaForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 2, "vocab_size": 32000}, "MBZUAI/LaMini-Cerebras-111M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 10, "vocab_size": 50258}, "ehartford/Wizard-Vicuna-30B-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "stockmark/gpt-neox-japanese-1.4b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50000}, "TheBloke/MythoMax-L2-13B-GGML": {}, "MBZUAI/LaMini-Cerebras-256M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1088, "n_head": 17, "n_inner": 4352, "n_layer": 14, "vocab_size": 50258}, "jondurbin/airoboros-l2-13b-gpt4-2.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "The-Face-Of-Goonery/Chronos-Beluga-v2-13bfp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "lmqg/t5-base-squad-qag": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "Voicelab/trurl-2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "ehartford/Samantha-1.11-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "clibrain/Llama-2-13b-ft-instruct-es": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "deepse/CodeUp-Llama-2-13b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "mrm8488/t5-base-finetuned-sarcasm-twitter": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "ToolBench/ToolLLaMA-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "marella/gpt-2-ggml": {}, "Henk717/airochronos-33B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "stanford-crfm/alias-gpt2-small-x21": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "WizardLM/WizardLM-30B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "timdettmers/guanaco-33b-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "xkianteb/alg_ppo_separate_lr_1e-6_n_epochs_10_v_epochs_10_kl_target_1.0_clip_range_0.2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TheBloke/wizard-mega-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "bigscience/mt0-xl": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 250112}, "luffycodes/nash-vicuna-13b-v1dot5-ep2-w-rag-w-simple": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "h2oai/h2ogpt-oig-oasst1-256-6_9b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "fabiochiu/t5-base-medium-title-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "OpenAssistant/falcon-40b-sft-mix-1226": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65040}, "Writer/palmyra-base": {"architectures": ["GPT2LMHeadModel"], "n_embd": 4096, "n_head": 32, "n_inner": 16384, "n_layer": 24, "vocab_size": 50257}, "TheBloke/llama-2-70b-Guanaco-QLoRA-GGML": {}, "Rostlab/prot_t5_base_mt_uniref50": {"architectures": ["T5WithLMHeadModel"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 256}, "Lajonbot/Llama-2-13b-hf-instruct-pl-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "WizardLM/WizardLM-7B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "pankajmathur/orca_mini_7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "yhyhy3/open_llama_7b_v2_med_instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "NousResearch/CodeLlama-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "OpenBuddy/openbuddy-llama2-13b-v11.1-bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 37632}, "hf-internal-testing/tiny-random-GPT2ForQuestionAnswering": {"architectures": ["GPT2ForQuestionAnswering"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "explosion-testing/llama2-fewer-kv-heads": {"architectures": ["LlamaForCausalLM"], "hidden_size": 256, "intermediate_size": 512, "num_attention_heads": 4, "num_hidden_layers": 5, "vocab_size": 1024}, "hetpandya/t5-base-tapaco": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "PygmalionAI/pygmalion-2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "mrm8488/t5-base-finetuned-imdb-sentiment": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "explosion-testing/falcon-test": {"architectures": ["FalconForCausalLM"], "hidden_size": 32, "num_attention_heads": 4, "num_hidden_layers": 5, "vocab_size": 1024}, "ehartford/WizardLM-33B-V1.0-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/StableBeluga-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/WizardLM-Uncensored-SuperCOT-StoryTelling-30B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "jinaai/jina-embedding-s-en-v1": {"architectures": ["T5EncoderModel"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "FelixChao/vicuna-33b-coder": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/llama-30b-supercot-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "quantumaikr/llama-2-70b-fb16-orca-chat-10k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TheBloke/airoboros-l2-13B-gpt4-1.4.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "EleutherAI/pythia-31m": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 256, "intermediate_size": 1024, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "hf-internal-testing/tiny-random-GPT2ForTokenClassification": {"architectures": ["GPT2ForTokenClassification"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "jondurbin/airoboros-l2-70b-gpt4-1.4.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "kimsan0622/gpt2-medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 64007}, "TheBloke/EverythingLM-13B-16K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Linly-AI/Chinese-LLaMA-2-13B-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 40076}, "BlackSamorez/rudialogpt3_medium_based_on_gpt2_2ch": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "EleutherAI/pythia-2.8b-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50304}, "TheBloke/llama-2-7B-Guanaco-QLoRA-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "google/byt5-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 6720, "d_model": 2560, "num_heads": 32, "num_layers": 36, "vocab_size": 384}, "TheBloke/wizard-vicuna-13B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TehVenom/Pygmalion-Vicuna-1.1-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "openaccess-ai-collective/wizard-mega-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-l2-7b-gpt4-m2.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "openchat/openchat_v3.2_super": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "openaccess-ai-collective/manticore-13b-chat-pyg": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Neko-Institute-of-Science/pygmalion-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "unicamp-dl/ptt5-small-portuguese-vocab": {"architectures": ["T5WithLMHeadModel"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "hf-internal-testing/tiny-random-T5ForQuestionAnswering": {"architectures": ["T5ForQuestionAnswering"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 32100}, "microsoft/CodeGPT-small-java-adaptedGPT2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50260}, "unicamp-dl/ptt5-base-portuguese-vocab": {"architectures": ["T5WithLMHeadModel"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Fredithefish/ScarletPajama-3B-HF": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "hf-internal-testing/tiny-random-T5ForSequenceClassification": {"architectures": ["T5ForSequenceClassification"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 32100}, "TheBloke/Nous-Hermes-Llama-2-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "calvindoingstuff/DialoGPT-medium-luffy": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "lvkaokao/llama2-7b-hf-chat-lora-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "skt/ko-gpt-trinity-1.2B-v0.5": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1920, "n_head": 16, "n_inner": 7680, "n_layer": 24, "vocab_size": 51200}, "saibo/llama-1B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 2, "vocab_size": 32000}, "vonjack/Qwen-LLaMAfied-HFTok-7B-Chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 151936}, "TheBloke/CodeLlama-34B-Python-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "GAIR/rst-all-11b": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "GeorgiaTechResearchInstitute/starcoder-gpteacher-code-instruct": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49153}, "jondurbin/airoboros-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "aisquared/dlite-v2-1_5b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_inner": null, "n_layer": 48, "vocab_size": 50260}, "aiassociates/t5-small-grammar-correction-german": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "asi/gpt-fr-cased-small": {"n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50000}, "csebuetnlp/mT5_m2o_chinese_simplified_crossSum": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "openthaigpt/openthaigpt-1.0.0-alpha-7b-chat-ckpt-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "jondurbin/airoboros-l2-13b-2.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "sentence-transformers/sentence-t5-xl": {"architectures": ["T5EncoderModel"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "OpenBuddy/openbuddy-openllama-3b-v10-bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 37120}, "TheBloke/guanaco-33B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "h2oai/h2ogpt-oasst1-512-20b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 64, "num_hidden_layers": 44, "vocab_size": 50432}, "Open-Orca/OpenOrca-Preview1-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "WizardLM/WizardLM-13B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "garage-bAInd/Camel-Platypus2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "wxjiao/alpaca-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "TheBloke/Wizard-Vicuna-13B-Uncensored-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "FelixChao/vicuna-7B-chemical": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Arc53/docsgpt-14b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "OpenAssistant/llama2-13b-megacode2-oasst": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32007}, "TheBloke/Lemur-70B-Chat-v1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32005}, "EleutherAI/pythia-6.9b-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "grimpep/L2-MythoMax22b-instruct-Falseblock": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "Austism/chronos-hermes-13b-v2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "UBC-NLP/AraT5v2-base-1024": {"d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 110208}, "fireballoon/baichuan-vicuna-chinese-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 64000}, "abeja/gpt2-large-japanese": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 32000}, "TheBloke/Airoboros-L2-70B-2.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "vicgalle/gpt2-alpaca-gpt4": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50260}, "flax-community/gpt2-small-indonesian": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TheBloke/Nous-Hermes-13B-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "imone/LLaMA2_13B_with_EOT_token": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "Corianas/111m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 10, "vocab_size": 50257}, "The-Face-Of-Goonery/Huginn-v3-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ehartford/Samantha-1.11-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "heegyu/WizardVicuna-3B-0719": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "acrastt/Griffin-3B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "totally-not-an-llm/EverythingLM-13b-V2-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ikala/bloom-zh-3b-chat": {"architectures": ["BloomForCausalLM"], "hidden_size": 2560, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250688}, "Gryphe/MythoLogic-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "AlekseyKorshuk/vicuna-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "flax-community/gpt2-medium-persian": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50000}, "ehartford/samantha-1.1-llama-33b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "garage-bAInd/Platypus2-70B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "OpenLemur/lemur-70b-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32024}, "ausboss/llama-30b-supercot": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "jondurbin/airoboros-l2-70b-gpt4-m2.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "lmqg/mt5-small-koquad-qg-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "TheBloke/OpenAssistant-SFT-7-Llama-30B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32016}, "TheBloke/h2ogpt-gm-oasst1-en-2048-falcon-40b-v2-GPTQ": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "GOAT-AI/GOAT-7B-Community": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "h2oai/h2ogpt-gm-oasst1-en-1024-20b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 64, "num_hidden_layers": 44, "vocab_size": 50432}, "beaugogh/pythia-1.4b-deduped-sharegpt": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50280}, "amurshak/llama-2-7b-miniguanaco": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "psyche/kollama2-7b-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "IlyaGusev/fred_t5_ru_turbo_alpaca": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1536, "num_heads": 24, "num_layers": 24, "vocab_size": 50365}, "potsawee/t5-large-generation-race-Distractor": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "heegyu/WizardVicuna-Uncensored-3B-0719": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/openchat_v2_openorca_preview-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "CalderaAI/13B-Legerdemain-L2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "SebastianSchramm/Cerebras-GPT-111M-instruction": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 10, "vocab_size": 50258}, "Mikael110/llama-2-7b-guanaco-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Locutusque/gpt2-large-conversational": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50260}, "CalderaAI/13B-Ouroboros": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "chaoyi-wu/MedLLaMA_13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "YeungNLP/firefly-llama2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "garage-bAInd/GPlatty-30B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "pankajmathur/orca_mini_v2_13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "pankajmathur/model_007_13b_v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "chargoddard/Chronorctypus-Limarobormes-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "timdettmers/guanaco-65b-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "digitous/13B-HyperMantis": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ckiplab/gpt2-base-chinese": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 21128}, "ehartford/dolphin-llama-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jphme/orca_mini_v2_ger_7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "malhajar/Platypus2-70B-instruct-4bit-gptq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "heegyu/WizardVicuna-open-llama-3b-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "pankajmathur/model_007": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "vicgalle/gpt2-alpaca": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50260}, "h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-300bt-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "stabilityai/stablecode-completion-alpha-3b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 49152}, "aisquared/dlite-v2-355m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50260}, "google/byt5-xxl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 12352, "d_model": 4672, "num_heads": 64, "num_layers": 36, "vocab_size": 384}, "ehartford/Samantha-1.11-CodeLlama-34b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "h2oai/h2ogpt-gm-oasst1-multilang-1024-20b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 64, "num_hidden_layers": 44, "vocab_size": 50432}, "TheBloke/koala-7B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ehartford/WizardLM-30B-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "clibrain/Llama-2-ft-instruct-es": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "quantumaikr/llama-2-70b-fb16-guanaco-1k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "psyche/kogpt": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 32002}, "wenge-research/yayi-7b": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250684}, "Aspik101/WizardVicuna-Uncensored-3B-instruct-PL-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "rombodawg/LosslessMegaCoder-llama2-7b-mini": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32007}, "TurkuNLP/gpt3-finnish-medium": {"architectures": ["BloomModel"], "hidden_size": 1024, "n_head": 16, "n_layer": 24, "vocab_size": 131072}, "pankajmathur/orca_mini_13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Mikael110/llama-2-13b-guanaco-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "totally-not-an-llm/PuddleJumper-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "jondurbin/airoboros-13b-gpt4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "CobraMamba/mamba-gpt-3b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "zarakiquemparte/zarablend-l2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Locutusque/gpt2-conversational-or-qa": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50262}, "frank098/Wizard-Vicuna-13B-juniper": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-gpt-3.5-turbo-100k-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "jondurbin/airoboros-33b-gpt4-1.4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "jondurbin/airoboros-l2-70b-gpt4-2.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "MBZUAI/LaMini-Cerebras-1.3B": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 50258}, "h2oai/h2ogpt-research-oasst1-llama-65b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "nkpz/llama2-22b-daydreamer-v3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "Aspik101/trurl-2-13b-pl-instruct_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "OpenAssistant/pythia-12b-pre-v8-12.5k-steps": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50288}, "breadlicker45/dough-instruct-base-001": {"architectures": ["LlamaForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50402}, "OpenBuddy/openbuddy-llama-30b-v7.1-bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 37632}, "andreaskoepf/llama2-13b-megacode2_min100": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32007}, "ehartford/Samantha-1.11-70b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "flax-community/t5-recipe-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "BreadAi/PM_modelV2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1088, "n_head": 17, "n_inner": 4352, "n_layer": 14, "vocab_size": 50257}, "minlik/chinese-alpaca-33b-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 49954}, "jordiclive/Llama-2-70b-oasst-1-200": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32016}, "Lajonbot/tableBeluga-7B-instruct-pl-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "sia-ai/llama-2-7b-1-percent-open-orca-1000-steps-v0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "h2oai/h2ogpt-gm-oasst1-en-1024-12b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50688}, "jondurbin/airoboros-33b-gpt4-1.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "openchat/openchat_8192": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "TaylorAI/Flash-Llama-3B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "yeontaek/llama-2-13B-ensemble-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Kirili4ik/ruDialoGpt3-medium-finetuned-telegram": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "WangZeJun/bloom-820m-chat": {"architectures": ["BloomForCausalLM"], "hidden_size": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 46145}, "4bit/Llama-2-70b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "chargoddard/llama2-22b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "augtoma/qCammel-13": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "NlpHUST/gpt2-vietnamese": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Monero/Manticore-13b-Chat-Pyg-Guanaco": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "NousResearch/CodeLlama-34b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "aisquared/dlite-v2-124m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50260}, "pankajmathur/orca_mini_v2_7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "The-Face-Of-Goonery/Huginn-22b-Prototype": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "DevaMalla/llama7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "openaccess-ai-collective/manticore-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "nkpz/llama2-22b-chat-wizard-uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "davzoku/cria-llama2-7b-v1.3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TaylorAI/Flash-Llama-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Fredithefish/ReasonixPajama-3B-HF": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "TheBloke/Platypus-30B-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "LoupGarou/WizardCoder-Guanaco-15B-V1.1": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49153}, "TheBloke/guanaco-65B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "hakurei/lotus-12B": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50688}, "bofenghuang/vigogne-33b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "grimpep/llama2-22B-GPLATTY": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "concedo/Pythia-70M-ChatSalad": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50278}, "rombodawg/LosslessMegaCoder-llama2-13b-mini": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32007}, "TaylorAI/Flash-Llama-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/chronos-wizardlm-uc-scot-st-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "OpenBuddy/openbuddy-llama-65b-v8-bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 37632}, "ajibawa-2023/scarlett-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/medalpaca-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "elinas/chronos-33b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "OpenBuddy/openbuddy-atom-13b-v9-bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 49953}, "OpenAssistant/pythia-12b-sft-v8-rlhf-2k-steps": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50282}, "TheTravellingEngineer/llama2-7b-chat-hf-v4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Ejafa/vicuna_7B_vanilla_1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "yulan-team/YuLan-Chat-2-13b-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 51200}, "huashiyiqike/testmodel": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 10, "vocab_size": 50257}, "TheBloke/WizardLM-30B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "notstoic/PygmalionCoT-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Vicuna-33B-1-3-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "FelixChao/vicuna-7B-physics": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/tulu-30B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "jondurbin/airoboros-65b-gpt4-1.4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "uukuguy/speechless-llama2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "digitous/13B-Chimera": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-7b-gpt4-1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "grimpep/llama2-28B-Airo03": {"architectures": ["LlamaForCausalLM"], "hidden_size": 7296, "intermediate_size": 22016, "num_attention_heads": 57, "num_hidden_layers": 40, "vocab_size": 32000}, "ehartford/CodeLlama-34b-Instruct-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "YeungNLP/firefly-ziya-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 39424}, "TheTravellingEngineer/bloom-560m-RLHF-v2": {"architectures": ["BloomForCausalLM"], "hidden_size": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "TheTravellingEngineer/llama2-7b-chat-hf-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "uukuguy/speechless-hermes-coig-lite-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "BreadAi/gpt-Youtube": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50304}, "Aspik101/llama-30b-instruct-2048-PL-lora": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "beaugogh/Llama2-13b-sharegpt4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "gaodrew/gaodrew-gorgonzola-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "OpenBuddy/openbuddy-llama2-13b-v11-bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 37632}, "TheBloke/guanaco-13B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/WizardLM-13B-V1-1-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "NousResearch/CodeLlama-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "BreadAi/MusePy-1-2": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "jondurbin/airoboros-33b-gpt4-1.3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "YeungNLP/firefly-bloom-7b1": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "grimpep/llama2-22b-wizard_vicuna": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "Fredithefish/Guanaco-3B-Uncensored": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "digitous/Alpacino13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "mncai/SGPT-1.3B-insurance-epoch10": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 30080}, "TheTravellingEngineer/llama2-7b-chat-hf-dpo": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "yeontaek/Platypus2xOpenOrca-13B-LoRa": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "TheBloke/gpt4-alpaca-lora-30b-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "bhenrym14/airophin-13b-pntk-16k-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Kimiko-13B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "porkorbeef/Llama-2-13b-12_153950": {"architectures": ["LlamaModel"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "PSanni/Deer-3b": {"architectures": ["BloomForCausalLM"], "hidden_size": 2560, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250683}, "IGeniusDev/llama13B-quant8-testv1-openorca-customdataset": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Neko-Institute-of-Science/metharme-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "alibidaran/medical_transcription_generator": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Panchovix/airoboros-33b-gpt4-1.2-SuperHOT-8k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "digitous/Alpacino30b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "lgaalves/gpt2-dolly": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TaylorAI/FLAN-Llama-7B-2_Llama2-7B-Flash_868_full_model": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "zarakiquemparte/zarafusionex-1.1-l2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "OpenAssistant/pythia-12b-sft-v8-2.5k-steps": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50288}, "TheBloke/airoboros-13B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/robin-33B-v2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "Aspik101/trurl-2-7b-pl-instruct_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "llama-anon/petra-13b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "TFLai/gpt2-turkish-uncased": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "health360/Healix-3B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/Mythalion-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-300bt": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "pe-nlp/llama-2-13b-vicuna-wizard": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "yeontaek/Platypus2-13B-QLoRa": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "acrastt/OmegLLaMA-3B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "jslin09/bloom-560m-finetuned-fraud": {"architectures": ["BloomForCausalLM"], "hidden_size": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "YeungNLP/firefly-bloom-2b6-v2": {"architectures": ["BloomForCausalLM"], "hidden_size": 2560, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 46145}, "xzuyn/LLaMa-1-MedicWizard-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "Azure99/blossom-v2-3b": {"architectures": ["BloomForCausalLM"], "hidden_size": 2560, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "TheBloke/Airoboros-L2-13B-2.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "MetaIX/GPT4-X-Alpasta-30b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32016}, "PocketDoc/Dans-PersonalityEngine-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "vicgalle/alpaca-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Corianas/590m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 12, "n_inner": 6144, "n_layer": 18, "vocab_size": 50257}, "OpenBuddy/openbuddy-openllama-13b-v7-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 38656}, "gywy/llama2-13b-chinese-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 49376}, "Corianas/Quokka_590m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 12, "n_inner": 6144, "n_layer": 18, "vocab_size": 50260}, "aisquared/dlite-v1-355m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50260}, "aisquared/dlite-v1-774m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50260}, "Fredithefish/RedPajama-INCITE-Chat-3B-Instruction-Tuning-with-GPT-4": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "project-baize/baize-v2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Project-Baize-v2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "FabbriSimo01/GPT_Large_Quantized": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50257}, "ajibawa-2023/carl-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Azure99/blossom-v1-3b": {"architectures": ["BloomForCausalLM"], "hidden_size": 2560, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "Aspik101/30B-Lazarus-instruct-PL-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "yeontaek/Platypus2xOpenOrca-13B-IA3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "Enno-Ai/ennodata-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "FabbriSimo01/Cerebras_1.3b_Quantized": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 50257}, "migtissera/Synthia-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "pe-nlp/llama-2-13b-platypus-vicuna-wizard": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "yeontaek/Platypus2xOpenOrca-13B-IA3-ensemble": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "Corianas/1.3b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 50257}, "Rachneet/gpt2-xl-alpaca": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_inner": null, "n_layer": 48, "vocab_size": 50257}, "Aeala/VicUnlocked-alpaca-30b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/VicUnlocked-30B-LoRA-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "Monero/WizardLM-Uncensored-SuperCOT-StoryTelling-30b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "bavest/fin-llama-33b-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "openchat/openchat_v2_w": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "FabbriSimo01/Bloom_1b_Quantized": {"architectures": ["BloomForCausalLM"], "hidden_size": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "Aspik101/tulu-7b-instruct-pl-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "TheTravellingEngineer/llama2-7b-chat-hf-v3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "yeontaek/llama-2-70b-IA3-guanaco": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Kunhao/pile-7b-250b-tokens": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 49152}, "yeontaek/llama-2-13b-QLoRA": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "yeontaek/llama-2-13b-Beluga-QLoRA": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "yeontaek/Platypus2xOpenOrca-13B-IA3-v3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "ausboss/llama7b-wizardlm-unfiltered": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/h2ogpt-oasst1-512-30B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "bofenghuang/vigogne-7b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "NYTK/PULI-GPTrio": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 150016}, "LLMs/WizardLM-30B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "openaccess-ai-collective/minotaur-13b-fixed": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheTravellingEngineer/bloom-1b1-RLHF-v2": {"architectures": ["BloomForCausalLM"], "hidden_size": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "yeontaek/Platypus2xOpenOrca-13B-IA3-v4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "BreadAi/DiscordPy": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1088, "n_head": 17, "n_inner": 4352, "n_layer": 14, "vocab_size": 50257}, "TehVenom/oasst-sft-6-llama-33b-xor-MERGED-16bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32016}, "gaodrew/gaodrew-llama-30b-instruct-2048-Open-Platypus-100steps": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "yeontaek/Platypus2xOpenOrca-13B-IA3-v2.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "yeontaek/Platypus2xOpenOrca-13B-LoRa-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "dvruette/oasst-pythia-12b-6000-steps": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50288}, "LoupGarou/WizardCoder-Guanaco-15B-V1.0": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49153}, "KnutJaegersberg/gpt-2-xl-EvolInstruct": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_inner": null, "n_layer": 48, "vocab_size": 50257}, "Lajonbot/WizardLM-13B-V1.2-PL-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "yeontaek/Platypus2-13B-IA3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "zarakiquemparte/zaraxe-l2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "BreadAi/gpt-YA-1-1_70M": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "dvruette/oasst-pythia-12b-reference": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50288}, "Panchovix/WizardLM-33B-V1.0-Uncensored-SuperHOT-8k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "titan087/OpenLlama13B-Guanaco": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "w601sxs/b1ade-1b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 50304}, "Andron00e/YetAnother_Open-Llama-3B-LoRA": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "quantumaikr/QuantumLM": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-65b-gpt4-2.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Aspik101/llama-30b-2048-instruct-PL-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "yeontaek/Platypus2-13B-LoRa": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "zarakiquemparte/zarafusionix-l2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "huggingtweets/gladosystem": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "eachadea/legacy-vicuna-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "Aeala/GPT4-x-AlpacaDente2-30b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32016}, "shibing624/chinese-llama-plus-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 49953}, "euclaise/gpt-neox-122m-minipile-digits": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 48262}, "TheBloke/UltraLM-13B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "lvkaokao/llama2-7b-hf-instruction-lora": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "BreadAi/StoryPy": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "dvruette/oasst-pythia-12b-flash-attn-5000-steps": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50288}, "aisquared/dlite-v1-124m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "ewof/koishi-instruct-3b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "h2oai/h2ogpt-gm-oasst1-en-1024-open-llama-7b-preview-400bt": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/airoboros-7b-gpt4-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "jondurbin/airoboros-13b-gpt4-1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/tulu-13B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "yhyhy3/med-orca-instruct-33b": {"architectures": ["LlamaModel"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "heegyu/LIMA-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "abhishek/llama2guanacotest": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "heegyu/LIMA2-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Corianas/Quokka_256m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1088, "n_head": 17, "n_inner": 4352, "n_layer": 14, "vocab_size": 50260}, "golaxy/gogpt-560m": {"architectures": ["BloomForCausalLM"], "hidden_size": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "OptimalScale/robin-7b-v2-delta": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "bofenghuang/vigogne-13b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "klosax/pythia-160m-deduped-step92k-193bt": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50304}, "golaxy/gogpt2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 68420}, "YeungNLP/firefly-llama2-13b-v1.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "WhoTookMyAmogusNickname/NewHope_HF_not_official": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "ehartford/CodeLlama-34b-Python-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "KnutJaegersberg/megatron-GPT-2-345m-EvolInstruct": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": 4096, "n_layer": 24, "vocab_size": 50257}, "Aeala/Alpaca-elina-65b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Monero/WizardLM-30B-Uncensored-Guanaco-SuperCOT-30b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "csitfun/llama-7b-logicot": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "OptimalScale/robin-65b-v2-delta": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "LLMs/WizardLM-13B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "CobraMamba/mamba-gpt-3b-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "aisquared/dlite-v1-1_5b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_inner": null, "n_layer": 48, "vocab_size": 50257}, "nthngdy/pythia-owt2-70m-100k": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "LLMs/AlpacaGPT4-7B-elina": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Yhyu13/oasst-rlhf-2-llama-30b-7k-steps-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32006}, "jondurbin/airoboros-7b-gpt4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "grantprice/Cerebras-GPT-590M-finetuned-DND": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 12, "n_inner": 6144, "n_layer": 18, "vocab_size": 50257}, "TheBloke/robin-13B-v2-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/robin-65b-v2-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "FPHam/Free_Sydney_13b_HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "acrastt/RedPajama-INCITE-Chat-Instruct-3B-V1": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "jondurbin/airoboros-65b-gpt4-m2.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "heegyu/LIMA2-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "BreadAi/MuseCan": {"architectures": ["GPT2LMHeadModel"], "n_embd": 960, "n_head": 15, "n_inner": 9, "n_layer": 5, "vocab_size": 50304}, "ausboss/llama-13b-supercot": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "openaccess-ai-collective/manticore-30b-chat-pyg-alpha": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "OptimalScale/robin-13b-v2-delta": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "quantumaikr/llama-2-7b-hf-guanaco-1k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Fredithefish/RedPajama-INCITE-Chat-3B-ShareGPT-11K": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "CalderaAI/13B-BlueMethod": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "SaylorTwift/gpt2_test": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50257}, "WeOpenML/PandaLM-Alpaca-7B-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "WeOpenML/Alpaca-7B-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "sumo43/lora_moe_7b_baseline": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "wenge-research/yayi-13b-llama2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32005}, "golaxy/gowizardlm": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 68420}, "abhiramtirumala/DialoGPT-sarcastic-medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Corianas/Quokka_2.7b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2560, "n_head": 32, "n_inner": 10240, "n_layer": 32, "vocab_size": 50260}, "Corianas/256_5epoch": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1088, "n_head": 17, "n_inner": 4352, "n_layer": 14, "vocab_size": 50257}, "dvruette/llama-13b-pretrained": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "TheBloke/alpaca-lora-65B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "ashercn97/giraffe-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Aspik101/Vicuzard-30B-Uncensored-instruct-PL-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "TheBloke/dromedary-65b-lora-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Yhyu13/chimera-inst-chat-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "ehartford/based-30b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "concedo/Vicuzard-30B-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "64bits/LexPodLM-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "MayaPH/GodziLLa-30B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "Aspik101/vicuna-7b-v1.3-instruct-pl-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "julianweng/Llama-2-7b-chat-orcah": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "heegyu/RedTulu-Uncensored-3B-0719": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "Aspik101/Llama-2-7b-hf-instruct-pl-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "quantumaikr/QuantumLM-70B-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "BreadAi/gpt-YA-1-1_160M": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50304}, "dvruette/oasst-pythia-12b-pretrained-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50288}, "Aeala/GPT4-x-AlpacaDente-30b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32016}, "TehVenom/Pygmalion_AlpacaLora-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "LLMs/Stable-Vicuna-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "quantumaikr/open_llama_7b_hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Aeala/GPT4-x-Alpasta-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "Fredithefish/CrimsonPajama": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "openaccess-ai-collective/hippogriff-30b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "camel-ai/CAMEL-13B-Role-Playing-Data": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/landmark-attention-llama7b-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32002}, "TheBloke/robin-33B-v2-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/GPlatty-30B-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/Chinese-Alpaca-33B-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 49954}, "TheBloke/CAMEL-33B-Combined-Data-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "klosax/open_llama_13b_600bt_preview": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Aspik101/Nous-Hermes-13b-pl-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "jondurbin/airoboros-l2-7b-gpt4-1.4.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "YeungNLP/firefly-llama-30b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "ashercn97/manatee-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "lizhuang144/starcoder_mirror": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49152}, "Aspik101/vicuna-13b-v1.5-PL-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Aspik101/Redmond-Puffin-13B-instruct-PL-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "Aspik101/StableBeluga-13B-instruct-PL-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "RoversX/llama-2-7b-hf-small-shards-Samantha-V1-SFT": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Corianas/Quokka_1.3b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 50260}, "nthngdy/pythia-owt2-70m-50k": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "danielhanchen/open_llama_3b_600bt_preview": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/VicUnlocked-alpaca-65B-QLoRA-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "IDEA-CCNL/Ziya-LLaMA-13B-Pretrain-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 39424}, "kevinpro/Vicuna-13B-CoT": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "wahaha1987/llama_7b_sharegpt94k_fastchat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "openaccess-ai-collective/minotaur-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/tulu-7B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "golaxy/gogpt-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 68420}, "Aeala/Enterredaas-33b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "kingbri/chronolima-airo-grad-l2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheTravellingEngineer/bloom-560m-RLHF": {"architectures": ["BloomForCausalLM"], "hidden_size": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "HWERI/Llama2-7b-sharegpt4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "l3utterfly/llama2-7b-layla": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "yeontaek/llama-2-13b-Guanaco-QLoRA": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "duliadotio/dulia-13b-8k-alpha": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "yeontaek/llama-2-13B-ensemble-v3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "dvruette/oasst-gpt-neox-20b-3000-steps": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 64, "num_hidden_layers": 44, "vocab_size": 50288}, "dvruette/oasst-gpt-neox-20b-1000-steps": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 64, "num_hidden_layers": 44, "vocab_size": 50288}, "huggingtweets/jerma985": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Dampish/Dante-2.8B": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "TheBloke/Planner-7B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "klosax/pythia-70m-deduped-step44k-92bt": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "klosax/open_llama_7b_400bt_preview": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "golaxy/gogpt2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 68420}, "Lajonbot/Llama-2-7b-chat-hf-instruct-pl-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheTravellingEngineer/llama2-7b-chat-hf-guanaco": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Lajonbot/vicuna-7b-v1.5-PL-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "kingbri/airolima-chronos-grad-l2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "yeontaek/llama-2-70B-ensemble-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "dvruette/oasst-llama-13b-2-epochs": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "dvruette/llama-13b-pretrained-sft-epoch-1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "dvruette/llama-13b-pretrained-dropout": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "hakurei/instruct-12b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50277}, "dvruette/gpt-neox-20b-full-precision": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 64, "num_hidden_layers": 44, "vocab_size": 50288}, "Monero/WizardLM-13b-OpenAssistant-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "Yhyu13/llama-30B-hf-openassitant": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "jondurbin/airoboros-65b-gpt4-1.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "camel-ai/CAMEL-33B-Combined-Data": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "MBZUAI/bactrian-x-llama-13b-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "dsvv-cair/alpaca-cleaned-llama-30b-bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "YeungNLP/firefly-llama-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "YeungNLP/firefly-llama-13b-v1.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "heegyu/WizardVicuna2-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "dvruette/oasst-llama-13b-1000-steps": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "dvruette/llama-13b-pretrained-sft-do2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "pillowtalks-ai/delta13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "illuin/test-custom-llama": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "MrNJK/gpt2-xl-sft": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_inner": null, "n_layer": 48, "vocab_size": 50257}, "PocketDoc/Dans-PileOfSets-Mk1-llama-13b-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-65b-gpt4-1.3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "bhenrym14/airoboros-33b-gpt4-1.4.1-PI-8192-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "frank098/WizardLM_13B_juniper": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "golaxy/goims": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 68420}, "dvruette/oasst-pythia-6.9b-4000-steps": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50288}, "mncai/chatdoctor": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "wannaphong/openthaigpt-0.1.0-beta-full-model_for_open_llm_leaderboard": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "golaxy/gogpt-3b-bloom": {"architectures": ["BloomForCausalLM"], "hidden_size": 2560, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "golaxy/gogpt-7b-bloom": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "jondurbin/airoboros-33b-gpt4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "jondurbin/airoboros-13b-gpt4-1.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-7b-gpt4-1.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "jondurbin/airoboros-13b-gpt4-1.3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-7b-gpt4-1.4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "jondurbin/airoboros-13b-gpt4-1.4-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-7b-gpt4-1.4.1-qlora": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "frank098/orca_mini_3b_juniper": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "Lajonbot/vicuna-13b-v1.3-PL-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jxhong/CAlign-alpaca-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "quantumaikr/KoreanLM-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "keyfan/vicuna-chinese-replication-v1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 49953}, "jondurbin/airoboros-7b-gpt4-1.3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "jerryjalapeno/nart-100k-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "xzuyn/Alpacino-SuperCOT-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "wahaha1987/llama_13b_sharegpt94k_fastchat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "quantumaikr/QuantumLM-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Undi95/ReMM-SLERP-L2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "huggingtweets/bladeecity-jerma985": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "pszemraj/pythia-6.9b-HC3": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "CalderaAI/30B-Epsilon": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TFLai/OpenOrca-Platypus2-13B-QLoRA-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "alpindale/pygmalion-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "jondurbin/airoboros-c34b-2.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "abacaj/starcoderbase-1b-sft": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 49153}, "bongchoi/test-llama2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TinyPixel/lima-test": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "quantumaikr/llama-2-70B-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "yeontaek/llama-2-13B-ensemble-v4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "cointegrated/rut5-base-absum": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 30000}, "pankajmathur/model_420_preview": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Fredithefish/Guanaco-3B-Uncensored-v2": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "yeontaek/llama-2-70B-ensemble-v4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Writer/palmyra-large": {"architectures": ["GPT2LMHeadModel"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 44, "vocab_size": 50257}, "RobbeD/OpenLlama-Platypus-3B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TFLai/OrcaMini-Platypus2-13B-QLoRA-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "NobodyExistsOnTheInternet/PuffedConvo13bLoraE4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Sao10K/Medusa-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Manticore-13B-Chat-Pyg-Guanaco-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TFLai/Nous-Hermes-Platypus2-13B-QLoRA-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "dhmeltzer/llama-7b-SFT_eli5_wiki65k_1024_r_64_alpha_16_merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TFLai/MythoMix-Platypus2-13B-QLoRA-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "chargoddard/llama-2-34b-uncode": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "zarakiquemparte/zaraxls-l2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TFLai/Stable-Platypus2-13B-QLoRA-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Danielbrdz/Barcenas-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "alan-turing-institute/mt5-large-finetuned-mnli-xtreme-xnli": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 250112}, "TFLai/Limarp-Platypus2-13B-QLoRA-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TFLai/PuddleJumper-Platypus2-13B-QLoRA-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "The-Face-Of-Goonery/Huginn-13b-v4.5": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-l2-7b-2.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "allenai/unifiedqa-v2-t5-large-1363200": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TheBloke/OpenAssistant-Llama2-13B-Orca-8K-3319-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "synapsoft/Llama-2-7b-hf-flan2022-1.2M": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "yeontaek/Platypus2-13B-LoRa-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "KES/T5-KES": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "robowaifudev/megatron-gpt2-345m": {"vocab_size": 50257, "n_embd": 1024, "n_layer": 24, "n_head": 16, "n_inner": 4096, "architectures": ["GPT2LMHeadModel"]}, "Sao10K/Mythical-Destroyer-L2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "CHIH-HUNG/llama-2-13b-dolphin_20w": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "The-Face-Of-Goonery/Huginn-13b-V4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "haining/scientific_abstract_simplification": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "ChanonUtupon/openthaigpt-merge-lora-llama-2-7B-3470k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 56554}, "chaoyi-wu/PMC_LLAMA_7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "CHIH-HUNG/llama-2-13b-OpenOrca_5w": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "clibrain/lince-zero": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "TheBloke/Project-Baize-v2-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "uukuguy/speechless-codellama-platypus-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "CHIH-HUNG/llama-2-13b-dolphin_5w": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "yeontaek/airoboros-2.1-llama-2-13B-QLoRa": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "uukuguy/speechless-llama2-luban-orca-platypus-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Norquinal/llama-2-7b-claude-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TFLai/Luban-Platypus2-13B-QLora-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "conceptofmind/Open-LLongMA-3b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "Norquinal/llama-2-7b-claude-chat-rp": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "lgaalves/llama-2-7b-hf_open-platypus": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ziqingyang/chinese-llama-2-7b-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 55296}, "yeontaek/llama-2-13B-ensemble-v6": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "yeontaek/llama-2-70B-ensemble-v7": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "ubikpt/t5-small-finetuned-cnn": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "rajkumarrrk/t5-base-fine-tuned-on-cnn-dm": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "google/t5-efficient-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "TFLai/Airboros2.1-Platypus2-13B-QLora-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "dhmeltzer/llama-7b-SFT_ds_eli5_1024_r_64_alpha_16_merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "h2oai/h2ogpt-4096-llama2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "dhmeltzer/llama-7b-SFT_ds_wiki65k_1024_r_64_alpha_16_merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TFLai/Ensemble5-Platypus2-13B-QLora-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "TFLai/Athena-Platypus2-13B-QLora-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "4bit/Llama-2-7b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TFLai/MythicalDestroyerV2-Platypus2-13B-QLora-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TFLai/OpenOrcaPlatypus2-Platypus2-13B-QLora-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "Salesforce/codegen25-7b-mono": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 51200}, "Sao10K/Stheno-L2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "yeontaek/WizardCoder-Python-13B-LoRa": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "uukuguy/speechless-orca-platypus-coig-lite-2k-0.6e-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "casperhansen/vicuna-7b-v1.5-awq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "luffycodes/nash-vicuna-33b-v1dot3-ep2-w-rag-w-simple": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "CHIH-HUNG/llama-2-13b-OpenOrca_20w": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "google/t5-efficient-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "TheBloke/orca_mini_v2_7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "tianyil1/denas-llama2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Sao10K/Stheno-Inverted-L2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "junelee/ko_vicuna_7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Seungyoun/codellama-7b-instruct-pad": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32025}, "TheBloke/Kimiko-v2-13B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "uukuguy/speechless-orca-platypus-coig-lite-4k-0.5e-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "uukuguy/speechless-orca-platypus-coig-lite-4k-0.6e-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "Undi95/UndiMix-v1-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "yeontaek/llama-2-70B-ensemble-v6": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TheBloke/vicuna-13B-v1.5-16K-GGML": {}, "KnutJaegersberg/black_goo_recipe_a": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "PKU-Alignment/beaver-7b-v1.0-reward": {"architectures": ["LlamaModelForScore"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "KnutJaegersberg/black_goo_recipe_b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "lgaalves/gpt2_open-platypus": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "cointegrated/rut5-base-multitask": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 30000}, "h2oai/h2ogpt-gm-oasst1-multilang-2048-falcon-7b": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "Cheng98/llama-160m": {"architectures": ["LlamaForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 32000}, "Andron00e/YetAnother_Open-Llama-3B-LoRA-OpenOrca": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "lgaalves/gpt2_guanaco-dolly-platypus": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "gagan3012/k2t-base": {"architectures": ["T5WithLMHeadModel"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "marcchew/Platypus-2-7B-LaMini-14K": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "lgaalves/gpt2_platypus-dolly-guanaco": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "czearing/article-title-generator": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "luffycodes/mcq-vicuna-13b-v1.5": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Universal-NER/UniNER-7B-definition": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Meli/GPT2-Prompt": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50259}, "s-nlp/ruT5-base-detox": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "cointegrated/rut5-base-paraphraser": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 30000}, "DevaMalla/llama7b_alpaca_bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "Universal-NER/UniNER-7B-type": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/starchat-beta-GPTQ": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49156}, "bigscience/sgpt-bloom-7b1-msmarco": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 30, "num_attention_heads": 32, "vocab_size": 250682}, "4bit/Llama-2-13b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "uukuguy/speechless-llama2-hermes-orca-platypus-wizardlm-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ClueAI/PromptCLUE-base-v1-5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "budecosystem/genz-13b-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/LlongOrca-13B-16K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32004}, "ozcangundes/mt5-multitask-qa-qg-turkish": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250102}, "EleutherAI/pythia-410m-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 1024, "intermediate_size": 4096, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "CHIH-HUNG/llama-2-13b-FINETUNE2_3w": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "sonoisa/t5-base-japanese-v1.1": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "bolbolzaban/gpt2-persian": {"n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 25000, "architectures": ["GPT2LMHeadModel"]}, "google/t5-large-ssm": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "DeepPavlov/rudialogpt3_medium_based_on_gpt2_v2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "Mikivis/xuanxuan": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "uukuguy/speechless-llama2-hermes-orca-platypus-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "KnutJaegersberg/black_goo_recipe_c": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "beaugogh/Llama2-7b-sharegpt4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Salesforce/codet5p-770m-py": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "codefuse-ai/CodeFuse-CodeLlama-34B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "AUTOMATIC/promptgen-majinai-safe": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 6, "vocab_size": 50257}, "reciprocate/shepherd-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Devio/test-22B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "acrastt/Bean-3B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/L2-MythoMax22b-Instruct-Falseblock-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "vihangd/smartplat-3b-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "jinaai/jina-embedding-b-en-v1": {"architectures": ["T5EncoderModel"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "yahma/llama-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "uukuguy/speechless-codellama-orca-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "CHIH-HUNG/llama-2-13b-FINETUNE1_17w": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "VMware/open-llama-13b-open-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ToolBench/ToolLLaMA-2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "luffycodes/mcq-hal-vicuna-13b-v1.5": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "TheBloke/BigTranslate-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 53613}, "PeanutJar/LLaMa-2-PeanutButter_v18_A-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "openbmb/UltraLM-65b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Mikivis/gpt2-large-lora-sft": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50257}, "Devio/test-3b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 16, "vocab_size": 32000}, "akhooli/gpt2-small-arabic": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50257}, "Rardilit/Panther_v1": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ValiantLabs/ShiningValiant": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Devio/test100": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "Devio/testC": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Chronoboros-33B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/Pygmalion-13B-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "vihangd/smartplat-3b-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "laituan245/t5-v1_1-small-smiles2caption-ft-from-pretrained-c4": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "4bit/Llama-2-7b-Chat-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "CHIH-HUNG/llama-2-13b-FINETUNE2_3w-gate_up_down_proj": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "CHIH-HUNG/llama-2-13b-FINETUNE2_3w-q_k_v_o_proj": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/vicuna-33B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "Devio/test-1400": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/gpt4-alpaca-lora-30B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "notstoic/pygmalion-13b-4bit-128g": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "NousResearch/Yarn-Llama-2-7b-128k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Den4ikAI/FRED-T5-LARGE_text_qa": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 50364}, "valhalla/t5-base-qa-qg-hl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32102}, "Undi95/ReMM-L2-13B-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Zarablend-L2-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "KnutJaegersberg/black_goo_recipe_d": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "ckip-joint/bloom-1b1-zh": {"architectures": ["BloomModel"], "hidden_size": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "seonglae/llama-2-13b-chat-hf-gptq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Trelis/Llama-2-7b-chat-hf-sharded-bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "KnutJaegersberg/LLongMA-3b-LIMA": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "h2oai/h2ogpt-gm-oasst1-en-xgen-7b-8k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 51200}, "PeanutJar/LLaMa-2-PeanutButter_v18_B-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ehartford/WizardLM-1.0-Uncensored-CodeLlama-34b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "uukuguy/speechless-codellama-orca-platypus-13b-0.10e": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "DeepESP/gpt2-spanish": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "paust/pko-flan-t5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 50358}, "ThomasNLG/t5-qa_squad2neg-en": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "PharMolix/BioMedGPT-LM-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "eenzeenee/t5-base-korean-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50358}, "porkorbeef/Llama-2-13b-public": {"architectures": ["LlamaModel"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/WizardLM-Uncensored-Falcon-7B-GPTQ": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65025}, "dahara1/weblab-10b-instruction-sft-GPTQ": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4864, "intermediate_size": 19456, "num_attention_heads": 38, "num_hidden_layers": 36, "vocab_size": 50277}, "CHIH-HUNG/llama-2-13b-FINETUNE2_TEST_2.2w": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "mrm8488/t5-small-finetuned-emotion": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "gurgutan/saiga2-13b-4bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "IlyaGusev/rut5_base_sum_gazeta": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 30000}, "TheBloke/Llama-2-13B-German-Assistant-v4-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 37632}, "gaodrew/OpenOrca-Platypus2-13B-thera-1250": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "minlik/chinese-llama-7b-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 49953}, "TheBloke/Stable-Platypus2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Luna-AI-Llama2-Uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "allenai/t5-small-squad2-question-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "microsoft/bloom-deepspeed-inference-fp16": {"architectures": ["BloomModel"], "n_layer": 70, "num_attention_heads": 112, "vocab_size": 250880}, "csebuetnlp/banglat5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "potsawee/t5-large-generation-race-QuestionAnswer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "grammarly/coedit-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32100}, "Narrativaai/bloom-560m-finetuned-totto-table-to-text": {"architectures": ["BloomForCausalLM"], "hidden_size": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "jjaaaww/posi_13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "IDEA-CCNL/Randeng-T5-784M-MultiTask-Chinese": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32596}, "Undi95/Nous-Hermes-13B-Code": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "paust/pko-t5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 50358}, "learnanything/llama-7b-huggingface": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "weiren119/Taiwan-LLaMa-v1.0-4bits-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ml6team/keyphrase-generation-t5-small-inspec": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "TheBloke/CodeLlama-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "Undi95/MLewd-L2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "tscholak/cxmefzzi": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32102}, "Gaivoronsky/ruGPT-3.5-13B-8bit": {"architectures": ["GPT2LMHeadModel"], "n_embd": 5120, "n_head": 40, "n_inner": null, "n_layer": 40, "vocab_size": 50272}, "SatoruDano/llama-2-7b-finetuned_v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ClueAI/PromptCLUE-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "uukuguy/speechless-codellama-orca-airoboros-13b-0.10e": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "anonymous-german-nlp/german-gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 52000}, "fxmarty/gpt2-tiny-onnx": {"intermediate_size": 37, "n_embd": 32, "n_head": 4, "n_inner": null, "n_layer": 5, "vocab_size": 1000}, "prakharz/DIAL-FLANT5-XL": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32100}, "h2oai/h2ogpt-oasst1-falcon-40b": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "synapsoft/Llama-2-7b-chat-hf-flan2022-1.2M": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Undi95/ReMM-L2-13B-PIPPA": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "CHIH-HUNG/llama-2-13b-FINETUNE1_17w-gate_up_down_proj": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Fredithefish/Guanaco-7B-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "conceptofmind/Yarn-Llama-2-13b-128k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Undi95/LewdEngine": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/CodeLlama-7B-Instruct-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "CHIH-HUNG/llama-2-13b-Open_Platypus_and_ccp_2.6w": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "CHIH-HUNG/llama-2-13b-FINETUNE1_17w-q_k_v_o_proj": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-33b-2.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "Salesforce/codet5p-220m-py": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "Danielbrdz/CodeBarcenas-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "SJ-Ray/Re-Punctuate": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "EasthShin/Youth_Chatbot_Kogpt2-base": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 51200}, "ThomasNLG/t5-qg_squad1-en": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "EleutherAI/pythia-160m-deduped-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50304}, "MBZUAI/LaMini-T5-223M": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "HooshvareLab/gpt2-fa": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 42001}, "TFLai/Nova-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "conceptofmind/LLongMA-2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TDC2023/trojan-base-pythia-1.4b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "phpaiola/ptt5-base-summ-xlsum": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "TFLai/SpeechlessV1-Nova-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/stablecode-instruct-alpha-3b-GPTQ": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 49152}, "stanford-crfm/music-small-800k": {"vocab_size": 55028, "n_embd": 768, "n_layer": 12, "n_head": 12, "n_inner": null, "architectures": null}, "TFLai/EnsembleV5-Nova-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "declare-lab/flan-alpaca-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "jpwahle/t5-large-word-sense-disambiguation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "lizhuang144/flan-t5-large-factual-sg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "DKYoon/mt5-base-lm-adapt": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "TheBloke/guanaco-65B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Salesforce/codegen25-7b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 51200}, "bigscience-data/sgpt-bloom-1b7-nli": {"architectures": ["BloomModel"], "n_inner": null, "n_layer": 24, "num_attention_heads": 16, "vocab_size": 250880}, "TurkuNLP/gpt3-finnish-small": {"architectures": ["BloomModel"], "hidden_size": 768, "n_head": 12, "n_layer": 12, "vocab_size": 131072}, "jordiclive/flan-t5-3b-summarizer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "marblyso/DialoGPT-small-what-the-fuck": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "retrieva-jp/t5-small-short": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "codeparrot/codeparrot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_inner": null, "n_layer": 48, "vocab_size": 32768}, "openthaigpt/openthaigpt-1.0.0-beta-7b-chat-ckpt-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 56554}, "Rocketknight1/falcon-rw-1b": {"architectures": ["FalconForCausalLM"], "hidden_size": 2048, "num_attention_heads": 32, "num_hidden_layers": 24, "vocab_size": 50304}, "TaylorAI/Flash-Llama-30M-20001": {"architectures": ["LlamaForCausalLM"], "hidden_size": 384, "intermediate_size": 1024, "num_attention_heads": 12, "num_hidden_layers": 4, "vocab_size": 32000}, "castorini/t5-base-canard": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "r3dhummingbird/DialoGPT-medium-joshua": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "IDEA-CCNL/Wenzhong2.0-GPT2-110M-BertTokenizer-chinese": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 12, "vocab_size": 21133}, "TigerResearch/tigerbot-13b-chat-8bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 60928}, "pranavpsv/gpt2-genre-story-generator": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50266}, "Photolens/llama-2-7b-langchain-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ck46/t5-base-hotpot-qa-qg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32102}, "castorini/monot5-small-msmarco-10k": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "yujiepan/llama-2-tiny-random": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8, "intermediate_size": 32, "num_attention_heads": 2, "num_hidden_layers": 1, "vocab_size": 32000}, "castorini/doc2query-t5-base-msmarco": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "oliverguhr/spelling-correction-multilingual-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "allenai/unifiedqa-t5-11b": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "TheBloke/CodeLlama-34B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "snorkelai/sdnet": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "SiberiaSoft/SiberianFRED-T5-XL": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1536, "num_heads": 24, "num_layers": 24, "vocab_size": 50365}, "sultan/ArabicT5-Base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 20, "vocab_size": 32000}, "nikaashpuri/gpt-expt-sp-v3-K-600-MA-Mac-actions-kmeans-v16": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 1913}, "TheBloke/Yarn-Llama-2-13B-128K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "allenai/cosmo-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "flax-community/gpt2-bengali": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "EleutherAI/pythia-410m-deduped-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 1024, "intermediate_size": 4096, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "Writer/palmyra-small": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 12, "vocab_size": 50257}, "LukasStankevicius/t5-base-lithuanian-news-summaries-175": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "laituan245/molt5-large-caption2smiles": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "google/ul2": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 16384, "d_model": 4096, "num_heads": 16, "num_layers": 32, "vocab_size": 32128}, "Suva/uptag-keyphrase-model": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "TheBloke/orca_mini_7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TusharJoshi89/title-generator": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "juierror/flan-t5-text2sql-with-schema": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "hf-tiny-model-private/tiny-random-T5ForConditionalGeneration": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 32100}, "stacked-summaries/flan-t5-large-stacked-samsum-1024": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TheBloke/WizardLM-33B-V1-0-Uncensored-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "persiannlp/mt5-base-parsinlu-opus-translation_fa_en": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "gurgutan/ruGPT-13B-4bit": {"architectures": ["GPT2LMHeadModel"], "n_embd": 5120, "n_head": 40, "n_inner": null, "n_layer": 40, "vocab_size": 50272}, "TheBloke/upstage-llama-30b-instruct-2048-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "sdadas/polish-gpt2-medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": 4096, "n_layer": 24, "vocab_size": 51200}, "aubmindlab/aragpt2-medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 64000}, "SEBIS/code_trans_t5_large_source_code_summarization_python_multitask_finetune": {"architectures": ["T5Model"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "maximxls/text-normalization-ru-terrible": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 256, "num_heads": 4, "num_layers": 3, "vocab_size": 5120}, "TheBloke/llama-2-13B-Guanaco-QLoRA-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ziqingyang/chinese-alpaca-2-13b-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 55296}, "KETI-AIR/ke-t5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 64128}, "ibm/qcpg-sentences": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32158}, "tiiuae/falcon-rw-7b": {"architectures": ["FalconForCausalLM"], "hidden_size": 4096, "num_attention_heads": 64, "num_hidden_layers": 36, "vocab_size": 65024}, "timdettmers/guanaco-13b-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "h2oai/h2ogpt-oig-oasst1-falcon-40b": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "coffeeee/nsfw-story-generator": {"architectures": ["GPT2Model"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "zpn/llama-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "crumb/bloom-560m-RLHF-SD2-prompter-aesthetic": {"architectures": ["BloomForCausalLM"], "hidden_size": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "kalpeshk2011/dipper-paraphraser-xxl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "TheBloke/WizardLM-13B-V1.0-Uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/WizardLM-13B-V1-1-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "allenai/unifiedqa-t5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "oliverguhr/spelling-correction-german-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "ThomasSimonini/t5-end2end-question-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "asi/gpt-fr-cased-base": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1792, "n_head": 14, "n_inner": null, "n_layer": 24, "vocab_size": 50000}, "lora-x/backpack-gpt2": {"architectures": ["BackpackGPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50264}, "TheBloke/Vigogne-2-13B-Instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ai-forever/ruT5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "ml6team/keyphrase-generation-t5-small-openkp": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "mrm8488/t5-base-finetuned-e2m-intent": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "nikaashpuri/gpt-expt-sp-v3-K-600-MA-Mac-actions-kmeans-v14": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 1902}, "TheBloke/Marx-3b-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/Dolphin-Llama2-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "oscorrea/scores-falcon40b-sm-merged": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "lmqg/t5-small-squad-qag": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32101}, "ehartford/WizardLM-Uncensored-Falcon-40b": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65025}, "persiannlp/mt5-base-parsinlu-sentiment-analysis": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "VietAI/vit5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 36100}, "thanathorn/mt5-cpe-kmutt-thai-sentence-sum": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "Blackroot/Hermes-Kimiko-13B-f16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "CarperAI/stable-vicuna-13b-delta": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "j5ng/kullm-12.8b-GPTQ-8bit": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 30080}, "TheBloke/ReMM-SLERP-L2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Weni/WeniGPT-L-70": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "valhalla/t5-small-qg-hl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32102}, "retrieva-jp/t5-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "TheBloke/Wizard-Vicuna-30B-Superhot-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "openllmplayground/openalpaca_3b_600bt_preview": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "ArmelR/starcoder-gradio-v0": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49152}, "chanind/frame-semantic-transformer-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "akreal/tiny-random-gpt2": {"intermediate_size": 37, "n_embd": 32, "n_head": 4, "n_inner": null, "n_layer": 5, "vocab_size": 99}, "Neko-Institute-of-Science/LLaMA-7B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Writer/palmyra-med-20b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 44, "vocab_size": 50259}, "SiberiaSoft/SiberianPersonaFred": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1536, "num_heads": 24, "num_layers": 24, "vocab_size": 50364}, "mrm8488/spanish-gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "unicamp-dl/translation-en-pt-t5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "OFA-Sys/gsm8k-rft-llama7b-u13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "liuhaotian/LLaVA-13b-delta-v0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32003}, "huggingface/falcon-40b-gptq": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "Ravi07bec/llama-qlora-65b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "PKU-Alignment/alpaca-7b-reproduced": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "Unbabel/gec-t5_small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "TheBloke/Speechless-Llama2-Hermes-Orca-Platypus-WizardLM-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "MIIB-NLP/Arabic-question-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 110080}, "google/t5-large-ssm-nq": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "stanford-crfm/arwen-gpt2-medium-x21": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "sentence-transformers/gtr-t5-xxl": {"architectures": ["T5EncoderModel"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "TheBloke/Nous-Hermes-Llama2-70B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32001}, "paust/pko-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50358}, "allenai/tk-instruct-11b-def": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "amphora/FinABSA": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32105}, "TurkuNLP/gpt3-finnish-13B": {"architectures": ["BloomModel"], "hidden_size": 5120, "n_head": 40, "n_layer": 40, "vocab_size": 131072}, "PAIXAI/Astrid-LLama-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Aalaa/opt-125m-wikitext2": {"architectures": ["OPTForCausalLM"], "hidden_size": 768, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50272}, "hf-internal-testing/tiny-random-GPTNeoXForQuestionAnswering": {"architectures": ["GPTNeoXForQuestionAnswering"], "hidden_size": 32, "intermediate_size": 37, "num_attention_heads": 4, "num_hidden_layers": 5, "vocab_size": 1024}, "cssupport/t5-small-awesome-text-to-sql": {"vocab_size": 32128, "d_model": 512, "d_ff": 2048, "num_layers": 6, "num_heads": 8, "architectures": ["T5ForConditionalGeneration"]}, "TheBloke/MythoMix-L2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "conceptofmind/Hermes-LLongMA-2-13b-8k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "lysandre/arxiv-nlp": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50257}, "Pcik/DialoGPT-medium-Kirby": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "PY007/SLM_1-4B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 2048, "intermediate_size": 5632, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50432}, "ceshine/t5-paraphrase-paws-msrp-opinosis": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "microsoft/bloom-deepspeed-inference-int8": {"architectures": ["BloomModel"], "n_layer": 70, "num_attention_heads": 112, "vocab_size": 250880}, "TheBloke/PuddleJumper-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "gorilla-llm/gorilla-falcon-7b-hf-v0": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "TheBloke/starcoder-GPTQ": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49152}, "lmsys/longchat-7b-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "DAMO-NLP-MT/polylm-1.7b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 256000}, "Salesforce/xgen-7b-4k-base": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 51200}, "DAMO-NLP-MT/polylm-13b": {"architectures": ["PolyLMHeadModel"], "n_embd": 5120, "n_head": 40, "n_inner": 20480, "n_layer": 40, "vocab_size": 256000}, "dbddv01/gpt2-french-small": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "EleutherAI/pythia-70m-deduped-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "algolet/mt5-base-chinese-qg": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250101}, "hf-internal-testing/tiny-random-BloomForQuestionAnswering": {"architectures": ["BloomForQuestionAnswering"], "hidden_size": 32, "n_head": 4, "n_layer": 5, "vocab_size": 1024}, "hf-internal-testing/tiny-random-BloomForTokenClassification": {"architectures": ["BloomForTokenClassification"], "hidden_size": 32, "n_head": 4, "n_layer": 5, "vocab_size": 1024}, "flax-community/t5-base-cnn-dm": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "hf-internal-testing/tiny-random-BloomForSequenceClassification": {"architectures": ["BloomForSequenceClassification"], "hidden_size": 32, "n_head": 4, "n_layer": 5, "vocab_size": 1024}, "tau/t5-v1_1-large-rss": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TheBloke/airoboros-l2-13b-gpt4-m2.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "hf-internal-testing/tiny-random-GPTNeoXForSequenceClassification": {"architectures": ["GPTNeoXForSequenceClassification"], "hidden_size": 32, "intermediate_size": 37, "num_attention_heads": 4, "num_hidden_layers": 5, "vocab_size": 1024}, "allegro/plt5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 50048}, "TheBloke/stable-vicuna-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "hf-internal-testing/tiny-random-GPTNeoXForTokenClassification": {"architectures": ["GPTNeoXForTokenClassification"], "hidden_size": 32, "intermediate_size": 37, "num_attention_heads": 4, "num_hidden_layers": 5, "vocab_size": 1024}, "TheBloke/WizardLM-7B-V1-0-Uncensored-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "HuggingFaceH4/tiny-random-LlamaForSequenceClassification": {"architectures": ["LlamaForSequenceClassification"], "hidden_size": 16, "intermediate_size": 64, "num_attention_heads": 4, "num_hidden_layers": 2, "vocab_size": 32000}, "hf-internal-testing/tiny-random-GPTNeoXModel": {"architectures": ["GPTNeoXModel"], "hidden_size": 32, "intermediate_size": 37, "num_attention_heads": 4, "num_hidden_layers": 5, "vocab_size": 1024}, "IlyaGusev/rut5_base_headline_gen_telegram": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 30000}, "lgaalves/gpt2_camel_physics-platypus": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "lightonai/alfred-40b-0723": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "KETI-AIR/ke-t5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 64128}, "ibm/regen-disambiguation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "vihangd/smartplat-3b-v3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/OpenBuddy-Llama2-13B-v11.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 37632}, "BlinksFly/Harry_Potter-Ai": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "conceptofmind/Yarn-Llama-2-7b-128k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "liujch1998/vera": {"architectures": ["T5EncoderModel"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "kaist-ai/CoT-T5-11B": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "lintang/t5-v1_1-base-flan": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "sentence-transformers/sentence-t5-xxl": {"architectures": ["T5EncoderModel"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "TheBloke/vicuna-7B-v1.5-16K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "retrieva-jp/t5-large-long": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "retrieva-jp/t5-base-long": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "upstage/SOLAR-0-70b-8bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "jerteh/gpt2-vrabac": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 49152}, "Parth/boolean": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "hf-internal-testing/tiny-random-GPTBigCodeForSequenceClassification": {"architectures": ["GPTBigCodeForSequenceClassification"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "hf-internal-testing/tiny-random-GPTBigCodeForTokenClassification": {"architectures": ["GPTBigCodeForTokenClassification"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "megagonlabs/t5-base-japanese-web": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32000}, "MisguidedKerbal/DialoGPT-kerbalV3": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "praeclarum/cuneiform": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "uw-hai/polyjuice": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "reciprocate/tiny-llama": {"architectures": ["LlamaForCausalLM"], "hidden_size": 64, "intermediate_size": 64, "num_attention_heads": 1, "num_hidden_layers": 1, "vocab_size": 32000}, "luqh/ClinicalT5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "stanford-crfm/celebrimbor-gpt2-medium-x81": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/CodeLlama-13B-Python-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "laituan245/molt5-large-smiles2caption": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TurkuNLP/gpt3-finnish-8B": {"architectures": ["BloomModel"], "hidden_size": 4096, "n_head": 32, "n_layer": 32, "vocab_size": 131072}, "NeuML/t5-small-txtsql": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "malteos/bloom-6b4-clp-german": {"hidden_size": 4096, "n_head": 32, "n_layer": 30, "vocab_size": 50304}, "GT4SD/multitask-text-and-chemistry-t5-base-augm": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "allenai/open-instruct-stanford-alpaca-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "CarperAI/randomwalks": {"architectures": ["GPT2LMHeadModel"], "n_embd": 144, "n_head": 6, "n_inner": null, "n_layer": 6, "vocab_size": 23}, "unicamp-dl/mt5-13b-mmarco-100k": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 250112}, "lmqg/t5-small-squad-qg-ae": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32101}, "naltukhov/joke-generator-rus-t5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "hf-internal-testing/tiny-random-UMT5Model": {"architectures": ["UMT5Model"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 256300}, "rentcarsAI/falcon-7b-codegenerator-qlora-merged": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "panggi/t5-base-indonesian-summarization-cased": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "hf-internal-testing/tiny-random-UMT5ForQuestionAnswering": {"architectures": ["UMT5ForQuestionAnswering"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 256300}, "UBC-NLP/AraT5-base": {"d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 110080}, "kmewhort/stable-diffusion-prompt-bolster": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 52000}, "TheBloke/Llama-2-13B-GGML": {}, "gaussalgo/T5-LM-Large-text2sql-spider": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "DAMO-NLP-MT/polylm-multialpaca-13b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 5120, "n_head": 40, "n_inner": 20480, "n_layer": 40, "vocab_size": 256000}, "hf-internal-testing/tiny-random-UMT5ForSequenceClassification": {"architectures": ["UMT5ForSequenceClassification"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 256300}, "tinkoff-ai/ruDialoGPT-small": {"n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50261}, "indonesian-nlp/gpt2-medium-indonesian": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "Salesforce/mixqg-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "EleutherAI/pythia-1b-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 50304}, "NinedayWang/PolyCoder-2.7B": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50304}, "zanchat/falcon-1b": {"architectures": ["RWForCausalLM"], "hidden_size": 2048, "n_head": 32, "n_layer": 24, "vocab_size": 50304}, "Goodnoway/DialoGPT-nerbalV2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "crumb/llama2-7b-shard-bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "sagawa/ReactionT5-retrosynthesis": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 268}, "DKYoon/mt5-large-lm-adapt": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 250112}, "lintang/t5-v1_1-xl-flan": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "castorini/monot5-large-msmarco-10k": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "Ichsan2895/Merak-7B-v3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "stanford-crfm/caprica-gpt2-small-x81": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "vicgalle/gpt2-open-instruct-v1": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50260}, "philschmid/llama-2-7b-instruction-generator": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "aubmindlab/aragpt2-large": {"architectures": ["GPT2LMHeadModel"], "intermediate_size": 5120, "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 64000}, "NonzeroCornet34/DialoGPT-small-philbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Trelis/Llama-2-7b-chat-hf-sharded-bf16-5GB": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "deep-learning-analytics/wikihow-t5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "JDBN/t5-base-fr-qg-fquad": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32102}, "stanford-crfm/durin-gpt2-medium-x343": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "abjbpi/Dwight_Schrute": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "Spico/Humback-Myx": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "T-Systems-onsite/mt5-small-sum-de-en-v2": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250100}, "kaiyuy/leandojo-lean3-tacgen-byt5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3584, "d_model": 1472, "num_heads": 6, "num_layers": 12, "vocab_size": 384}, "pinkmanlove/llama-33b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "lintang/t5-v1_1-large-flan": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "Naseej/noon-7b": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "chizhikchi/sci-five-radsum23": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "impyadav/GPT2-FineTuned-Hinglish-Song-Generation": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "elinas/llama-13b-hf-transformers-4.29": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/GodziLLa2-70B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TheBloke/Llama-2-70B-OASST-1-200-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32016}, "jacobmorrison/tk-instruct-base-lora-experiments": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-3b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "ingen51/DialoGPT-medium-GPT4": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "cointegrated/rut5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 30000}, "PocketDoc/Dans-CreepingSenseOfDoom": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "tsmatz/mt5_summarize_japanese": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "domenicrosati/QA2D-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "gorkemgoknar/gpt2chatbotenglish": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50262}, "DeliveryBoy/DiabloGPT-medium-Kurisu": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "philschmid/instruct-igel-001": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_layer": 30, "vocab_size": 50304}, "xDAN2099/xDAN_13B_Zh_Base": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 60928}, "codeparrot/codeparrot-small": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 32768}, "paust/pko-t5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 50358}, "flozi00/Llama-2-13b-german-assistant-v4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 37632}, "doc2query/msmarco-t5-base-v1": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "microsoft/DialogRPT-depth": {"architectures": ["GPT2ForSequenceClassification"], "n_embd": 1024, "n_head": 16, "n_layer": 24, "vocab_size": 50257}, "nomic-ai/gpt4all-13b-snoozy": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "NousResearch/Yarn-Llama-2-13b-64k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "mrm8488/t5-base-e2e-question-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "postbot/gpt2-medium-emailgen": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "vanilladucky/Friends_chatting_bot_redefined": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/LlongOrca-7B-16K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32003}, "mutamuta/DialoGPT-spongebob-small": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Ar4ikov/gpt2-medium-650k-stable-diffusion-prompt-generator": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/HermesLimaRP-L2-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "clibrain/Llama-2-7b-ft-instruct-es-gptq-4bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Yarn-Llama-2-7B-128K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "lmqg/mt5-small-jaquad-qg-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "allenai/tk-instruct-base-def-pos": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "davidkim205/komt-Llama-2-7b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "tangy0/llama-2-7b-dtlpy_v0.4chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TigerResearch/tigerbot-70b-base": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 60928}, "hadifar/eventextraction": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "TintinMeimei/NousResearch-Llama-2-7b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/airoboros-l2-13b-gpt4-2.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Nekochu/Llama-2-13B-fp16-french": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "minhtoan/t5-translation-vietnamese-nom": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 300, "num_heads": 8, "num_layers": 6, "vocab_size": 30100}, "BELLE-2/BELLE-Llama2-13B-chat-0.4M": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "bigscience/T0": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "andreaskoepf/pythia-1.4b-gpt4all-pretrain": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50288}, "Salesforce/codet5-base-codexglue-clone": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "Chae/scottbot_med": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/LLaMA-7b-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "sagard21/python-code-explainer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "stanfordnlp/SteamSHP-flan-t5-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "MarinHinawa/DialoGPT-medium-Ene": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "aiautomationlab/german-news-title-gen-mt5": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "TheBloke/vicuna-13B-1.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Chronos-Hermes-13B-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "microsoft/DialogRPT-human-vs-machine": {"architectures": ["GPT2ForSequenceClassification"], "n_embd": 1024, "n_head": 16, "n_layer": 24, "vocab_size": 50257}, "uer/gpt2-distil-chinese-cluecorpussmall": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 6, "vocab_size": 21128}, "h2oai/h2ogpt-gm-oasst1-en-2048-falcon-40b-v1": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "davidkim205/komt-Llama-2-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ibm/qcpg-questions": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32155}, "gavin124/gpt2-finetuned-cnn-summarization-v2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50260}, "hogru/MolReactGen-GuacaMol-Molecules": {"architectures": ["GPT2LMHeadModel"], "n_embd": 144, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 93}, "stanford-crfm/darkmatter-gpt2-small-x343": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "conceptofmind/Yarn-Llama-2-7b-64k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Radicalkiddo/DialoGPT-small-Radical": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Ninja5000/DialoGPT-medium-HarryPotter": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "theblackcat102/alpaca-title-generator-mt0-large": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 250112}, "transfaeries/Twilight-Sparkle-GPT": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/Vigogne-2-7B-Instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "markofhope/DialoGPT-medium-HarringtonBot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "seeksery/DialoGPT-calig3": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "beomi/kcgpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 55000}, "vilm/vietcuna-3b": {"architectures": ["BloomForCausalLM"], "hidden_size": 2560, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "IDEA-CCNL/Randeng-T5-784M": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32596}, "vwxyzjn/starcoderbase-triviaqa": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49152}, "IDEA-CCNL/Wenzhong2.0-GPT2-3.5B-chinese": {"architectures": ["GPT2LMHeadModel"], "n_embd": 3072, "n_head": 32, "n_inner": 12288, "n_layer": 30, "vocab_size": 50304}, "TheBloke/Llama-2-7b-Chat-GGUF": {}, "MingZhong/unieval-dialog": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "OpenAssistant/falcon-40b-megacode2-oasst": {"architectures": ["FalconForCausalLM"], "hidden_size": 8192, "num_attention_heads": 128, "num_hidden_layers": 60, "vocab_size": 65152}, "axiong/PMC_LLaMA_13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "codeparrot/codeparrot-small-multi": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 12, "vocab_size": 32768}, "EleutherAI/pythia-6.9b-deduped-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "Riiid/sheep-duck-llama-2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "alibaba-pai/pai-bloom-1b1-text2prompt-sd": {"architectures": ["BloomForCausalLM"], "hidden_size": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "TheBloke/Chronos-Beluga-v2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "malmarjeh/t5-arabic-text-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 110080}, "GarfExit/DialogGPT-medium-707": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "audreycl/DialoGPT-RPF": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "florentiino/DialoGPT-small-harrypotter": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "JazzyLucas/DialoGPT-small-TonyStark": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "marblyso/DialoGPT-medium-marina": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "polandball/GPT-Polen": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "GarrisonBot/DialoGPT-medium-herbertgarrison": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "XuYipei/kw-cutegpt-13b-ift": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 49954}, "TheBloke/Pygmalion-7B-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "timothykim04/DialoGPT-medium-harrypotter": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "allegro/plt5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50048}, "lengoctuong/gpt2-finetuned-wikitext2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "PyaeSoneK/Fine_Tuned_Pythia_smallest_140_legal": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 1024, "intermediate_size": 4096, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "psyche/KoT5-paraphrase-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "microsoft/DialogRPT-width": {"architectures": ["GPT2ForSequenceClassification"], "n_embd": 1024, "n_head": 16, "n_layer": 24, "vocab_size": 50257}, "Dahoas/pythia-1B-static-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "jerteh/gpt2-orao": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 49152}, "TheBloke/LosslessMegaCoder-Llama2-13B-Mini-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32007}, "Ngao/DialoGPT-small-ngao": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "4i-ai/Llama-2-7b-alpaca-es": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "asifhugs/open_llama_7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "RajuKandasamy/tamillama_tiny_30m": {"architectures": ["LlamaForCausalLM"], "hidden_size": 256, "intermediate_size": 786, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 32000}, "stabilityai/StableBeluga1-Delta": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Linly-AI/Chinese-LLaMA-2-7B-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 40076}, "flax-community/gpt2-base-thai": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "shalomma/llama-7b-embeddings": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/llama2-7b-chat-codeCherryPop-qLoRA-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "KhanAdeeb/model-tony-stark": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "spy24/autonlp-UK-to-US-600416931": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "DKYoon/mt5-small-lm-adapt": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "TheBloke/Llama-2-70B-GGML": {}, "TheBloke/model_007-70B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "abhi-8/DialoGPT-medium-Joshua-twevy": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "camenduru/MiniGPT4-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "paripi/Malishka": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "SiberiaSoft/SiberianPersonaFred_large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 50364}, "Alred/t5-small-finetuned-summarization-cnn": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "Leomas/DialoGPT-medium-Leomas": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TehVenom/Pygmalion-7b-Merged-Safetensors": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "marblyso/DialoGPT-medium-pearl": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "lmqg/mt5-small-dequad-qg-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "TheBloke/WizardLM-Uncensored-Falcon-40B-GPTQ": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65025}, "NlpHUST/t5-small-vi-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "Elucia/Diluc_Bot_1.3": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "h2oai/h2ogpt-16k-codellama-34b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "microsoft/CodeGPT-small-java": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 52000}, "Starry/COUNTNARC": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "OpenMEDLab/PULSE-7bv5": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "marblyso/DialoGPT-medium-aubrey": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "Ashypaws/DialoGPT-medium-Ashybot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "YTTD/DialoGPT-medium-sou": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "marblyso/DialoGPT-medium-hero": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "Trelis/Llama-2-7b-chat-hf-function-calling-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "NousResearch/CodeLlama-7b-hf-flash": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "TheBloke/CodeLlama-34B-Python-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "musabgultekin/functionary-7b-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "amasand/gpt2-imdb-pos-ppo": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "bigscience/bloomz-7b1-p3": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 30, "num_attention_heads": 32, "vocab_size": 250880}, "rirv938/wizard-vicuna-13b-uncensored-awq-4bit-g128": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "marblyso/DialoGPT-medium-marblesbagel": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "vilm/vietcuna-7b-v3": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "stas/t5-very-small-random": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 256, "d_model": 64, "num_heads": 4, "num_layers": 8, "vocab_size": 32128}, "KeLiu/Title-Gen": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "vampiregirl/DialoGPT-medium-lennoxram": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "sharpbai/Llama-2-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "sam2ai/openllama_odia_3b_base": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "lmqg/mt5-small-esquad-qg-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "stanfordnlp/SteamSHP-flan-t5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "allenai/tulu-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "JNDankwah/DialoGPT-small-ThorCB": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "lmqg/mt5-small-ruquad-qg": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "lmqg/mt5-small-ruquad-qg-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "Dinocroth/DialoGPT-medium-Trevor-PhilipsV2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "Speedemon/jake-peralta-ai": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "chanind/frame-semantic-transformer-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "stanford-crfm/music-medium-800k": {"vocab_size": 55028, "n_embd": 1024, "n_layer": 24, "n_head": 16, "n_inner": null, "architectures": null}, "h2oai/h2ogpt-16k-codellama-7b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "TheBloke/Pygmalion-2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "huggingface-course/codeparrot-ds": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50000}, "KakoSi/AcciGPT-smol": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "lmqg/mt5-small-itquad-qg-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "EggsInAJar/DialoGPT-small-MerrickBot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "razent/SciFive-large-Pubmed": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "armandnlp/gpt2-TOD_finetuned_SGD": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50271}, "RuterNorway/Llama-2-13b-chat-norwegian": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "AIDC-ai-business/Marcoroni-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "deep-learning-analytics/GrammarCorrector": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "redrussianarmy/gpt2-turkish-cased": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "lmqg/mt5-small-frquad-qg-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "psyche/KoT5-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "loitran/DialoGPT-medium-peppapig": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "openchat/openchat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "saikatc/NatGen": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "Coderhuynin/DialoGPT-large-TonyStark": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "declare-lab/flan-sharegpt-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "TheBloke/Chronos-Hermes-13B-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "usvsnsp/pythia-6.9b-rm-full-hh-rlhf": {"architectures": ["GPTNeoXForSequenceClassification"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50277}, "yujiepan/llama-2-tiny-3layers-random": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8, "intermediate_size": 32, "num_attention_heads": 2, "num_hidden_layers": 3, "vocab_size": 32000}, "allenai/unifiedqa-v2-t5-3b-1363200": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "gsarti/it5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32103}, "simple2312/DialoGPT-Ellie": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "kashif/llama-7b_stack-exchange_RM_peft-adapter-merged": {"architectures": ["LlamaForSequenceClassification"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "larryvrh/mt5-translation-ja_zh": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 250112}, "j5ng/et5-typos-corrector": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 45100}, "vilsonrodrigues/falcon-7b-sharded": {"architectures": ["FalconForCausalLM"], "hidden_size": 4544, "num_attention_heads": 71, "num_hidden_layers": 32, "vocab_size": 65024}, "felinecity/ScaraBot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "persiannlp/mt5-base-parsinlu-translation_en_fa": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "Jonesy/HomersNightOut": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "conceptofmind/LLongMA-2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/LoKuS-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "shibing624/mengzi-t5-base-chinese-correction": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Lamia/DialoGPT-small-Sundrop": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Blizzchor/DialoGPT-medium-gamora": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "jlsalty9999/DialoGPT-medium-Riddle": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "uer/gpt2-chinese-lyric": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 21128}, "LMFlow/Full-Robin-7b-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "llm-book/t5-base-long-livedoor-news-corpus": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "nuggster/DialoGPT-small-ianbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Tristan/gpt2_reward_summarization": {"architectures": ["GPT2ForSequenceClassification"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "MysteriousAmazon/DialoGPT-medium-freddy": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "wdidfau/Pygmalion-13b-Landmark-Attention-Merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "kaiyuy/leandojo-lean3-retriever-byt5-small": {"architectures": ["T5EncoderModel"], "d_ff": 3584, "d_model": 1472, "num_heads": 6, "num_layers": 12, "vocab_size": 384}, "kz919/ntk_scaled_open_llama_3b_32k": {"architectures": ["NTKScaledLlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "abhi-8/DialoGPT-medium-Rick": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "polymath707/llama-2-13b-miniguanaco": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Langboat/bloom-389m-zh": {"architectures": ["BloomForCausalLM"], "hidden_size": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 42437}, "Techcs002/DialoGPT-medium-AboTalkTest": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "AIDC-ai-business/Marcoroni-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "ybelkada/t5-3b-sharded": {"architectures": ["T5WithLMHeadModel"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "benjamin/gerpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "abhi-8/DialoGPT-medium-Michael": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "cahya/gpt2-small-indonesian-522M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "marianna13/flan-t5-base-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Lakoc/fisher_dec_6_layers": {"architectures": ["GPT2Model"], "n_embd": 512, "n_head": 4, "n_inner": null, "n_layer": 6, "vocab_size": 5000}, "simple2312/DialoGPT-nayeon": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "sjrhuschlee/flan-t5-base-squad2": {"architectures": ["T5ForQuestionAnswering"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "eqhylxx/full-vicuna-160m": {"architectures": ["LlamaForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 32000}, "Ashypaws/DialoGPT-medium-Kitaibot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/Wizard-Vicuna-7B-Uncensored-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "NHStudios/DialoGPT-small-jake": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "ZipperXYZ/DialoGPT-medium-TheWorldMachineExpressive2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "IIC/mt5-spanish-mlsum": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "mattymchen/gense-base-plus": {"architectures": ["T5Model"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "DAMO-NLP/SeqGPT-560M": {"architectures": ["BloomForCausalLM"], "hidden_size": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "AMHR/T5-for-Adversarial-Paraphrasing": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Blizzchor/DialoGPT-medium-HarryBotter": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "brianveebee/DialoGPT-medium-bender": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "YTTD/DialoGPT-medium-keiji": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "anon8231489123/gpt4-x-alpaca-13b-native-4bit-128g": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "Pcik/DialoGPT-medium-Dante": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "mHossain/bangla-para-v3-500000": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "TheBloke/Llama-2-7B-GGUF": {}, "diwas7777/HarryBot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "seduerr/t5-small-pytorch": {"architectures": ["T5WithLMHeadModel"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "felinecity/DioloGPT-small-KaeyaBot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "lmsys/vicuna-7b-delta-v0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "inu-ai/dolly-japanese-gpt-1b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 44928}, "TheBloke/Vicuna-33B-1-3-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "Dahoas/pythia-125M-static-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50304}, "Blizzchor/DialoGPT-medium-QuillLord": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "beomi/KoAlpaca-llama-1-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "patrickNLP/Graphix-3B": {"architectures": ["Model"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "Starry/HELLORUKAS": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "keans/DialoGPT-small-highjacker": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "DoesNoPro/DialoGPT-small-RaidenG": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "ThatSkyFox/DialoGPT-medium-whatsapp": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "EnterNameBros/Senko-san-medium-scl": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/CodeLlama-7B-Python-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "mrm8488/t5-small-finetuned-quora-for-paraphrasing": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "NonzeroCornet34/DialoGPT-small-hansolo": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "d0rj/rut5-base-summ": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "CAIRE-CedarsSinai/falcon-7b-qlora-chat-support-bot-faq-alzkb-version-2": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "el-profesor/code_t5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "Soumyajit1008/DialoGPT-small-harryPotterssen": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "malteos/bloom-1b5-clp-german": {"architectures": ["BloomForCausalLM"], "hidden_size": 2048, "n_head": 16, "n_layer": 24, "vocab_size": 50304}, "yesuns/DialoGPT-small-yesun": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Stevo/DiagloGPT-medium-spamton": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Vision-CAIR/vicuna-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "TheBloke/airoboros-33B-gpt4-1-4-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "tanishqvashisht/DialoGPT-small-Joshua": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TurkuNLP/gpt3-finnish-3B": {"architectures": ["BloomModel"], "hidden_size": 2560, "n_head": 32, "n_layer": 32, "vocab_size": 131072}, "lizhuang144/flan-t5-base-VG-factual-sg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "TheBloke/Athena-v1-GGUF": {}, "xxyyy123/test-28b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "pastlecry/DialoGPT-small-harrypotter": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "DiscordRequestsAPI/NurDeeps-Bot-2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "channashi/DialoGPT-small-rocket": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "ritog/bangla-gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TheBloke/Redmond-Puffin-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "Shakerlicious/DialoGPT-small-raquelbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "lmqg/mt5-base-jaquad-qag": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250101}, "anon8231489123/vicuna-13b-GPTQ-4bit-128g": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "jacobmorrison/tk-instruct-small-lora-experiments": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32100}, "TheBloke/open-llama-13b-open-instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "cedpsam/chatbot_fr": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "Photolens/llama-2-13b-langchain-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "avinashshrangee/DialoGPT-small-Ricky": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "YeungNLP/firefly-llama2-7b-pretrain": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 55296}, "efederici/it5-efficient-small-fanpage": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 32, "vocab_size": 32100}, "saikiranmaddukuri/chat_to_sql0.17": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "TheBloke/Llama2-28B-Air03-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 7296, "intermediate_size": 22016, "num_attention_heads": 57, "num_hidden_layers": 40, "vocab_size": 32000}, "crodri/falcon_aguila_meteocat": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 50257}, "Narsil/starcoder-gptq": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49152}, "CobraMamba/mamba-gpt-3b-v4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "YeungNLP/firefly-llama2-13b-pretrain": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 55296}, "TheBloke/airoboros-l2-7b-gpt4-1.4.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "DecafNosebleed/DialoGPT-small-ScaraBot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "yazdipour/text-to-sparql-t5-small-qald9": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "ClassCat/gpt2-base-french": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50000}, "TheBloke/airoboros-33B-GPT4-m2.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "quantumaikr/KoreanLM-1.5b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 1024, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "toyfreak/DialoGPT-small-addy": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "spursyy/mT5_multilingual_XLSum_rust": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "lengoctuong/gpt2-finetuned-chatbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50261}, "KnutJaegersberg/megatron-gpt2-345m-evol_instruct_v2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": 4096, "n_layer": 24, "vocab_size": 50257}, "zkdtckk/falcon40-instruct-qlora-tta-v1": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "TheBloke/Nous-Hermes-13B-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "TheBloke/Nous-Hermes-Llama2-GGML": {}, "IkariDev/Athena-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/WizardLM-7B-V1.0-Uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/llama-2-13B-German-Assistant-v2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "cahya/gpt2-large-indonesian-522M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50257}, "VietAI/envit5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50048}, "kam1run/DialoGPT-large-kami": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50257}, "uukuguy/speechless-codellama-dolphin-orca-platypus-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "aluserhuggingface/DialoGPT-small-harrypotter": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TheBloke/gpt4-x-vicuna-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "Pcik/DialoGPT-medium-Ruby": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/LLaMA-30b-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "sdadas/polish-gpt2-small": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 12, "vocab_size": 51200}, "ahxt/llama2_xs_460M_experimental": {"architectures": ["LlamaForCausalLM"], "hidden_size": 1024, "intermediate_size": 4096, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "lemon234071/t5-base-Chinese": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 35364}, "4bit/pyg-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "squarelike/Gugugo-koen-1.3B-V1.0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 30080}, "lvwerra/t5-imdb": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "psymon/KoLlama2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Maxwere/DiabloGPT-medium-maxbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "nafisehNik/mt5-persian-summary": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "nams/nams-bot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "lmqg/mt5-small-esquad-qag": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "mattbit/gpt2wb": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "ghazikhanihamed/TooT-PLM-P2S": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 48, "vocab_size": 144}, "lonewanderer27/YoshinoriBot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "VinVanGogh/Llama-2-7b-Aixiety-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "GroNLP/gpt2-medium-italian-embeddings": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 30001}, "IDEA-CCNL/Randeng-T5-784M-QA-Chinese": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32601}, "kingbri/airo-llongma-2-13B-16k-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "lvwerra/starcoderbase-gsm8k": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49152}, "mofawzy/gpt2-arabic-sentence-generator": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50000}, "lmqg/mt5-small-itquad-qag": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "sharpbai/Llama-2-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "lomahony/eleuther-pythia70m-hh-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "Salesforce/codet5-large-ntp-py": {"architectures": ["T5WithLMHeadModel"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "TheBloke/Samantha-1.11-CodeLlama-34B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "Lenza/DialoGPT-medium-Kobayashi": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "davidviriato/DialoGPT-small-joshua": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Shakerlicious/DialoGPT-small-descentbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TurkuNLP/gpt3-finnish-xl": {"architectures": ["BloomModel"], "hidden_size": 2064, "n_head": 24, "n_layer": 24, "vocab_size": 131072}, "TheBloke/starcoderplus-GPTQ": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49152}, "TheBloke/Airoboros-L2-7B-2.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Mikivis/gpt2-large-lora-sft1": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50257}, "gagan3012/k2t": {"architectures": ["T5WithLMHeadModel"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "MerlynMind/merlyn-education-safety": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50688}, "explosion-testing/refined-web-model-test": {"architectures": ["RWForCausalLM"], "hidden_size": 32, "n_head": 4, "n_layer": 5, "vocab_size": 1024}, "explosion-testing/falcon-no-parallel-attn-test": {"architectures": ["RWForCausalLM"], "hidden_size": 32, "n_head": 4, "n_layer": 5, "vocab_size": 1024}, "Marxav/frpron": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 268}, "AmbricJohnson5888/claura": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TheBloke/CodeLlama-7B-Instruct-GGUF": {}, "felinecity/DioloGPT-small-LisaBot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "lmqg/mt5-small-frquad-qag": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "RobiKenobi/DialoGPT-medium-pete": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/Vicuna-13B-CoT-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/airoboros-33B-gpt4-1.4-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "SEBIS/code_trans_t5_base_code_documentation_generation_java_multitask": {"architectures": ["T5Model"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "retrieva-jp/t5-base-medium": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "elinas/chronos-13b-4bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "abhinavkulkarni/meta-llama-Llama-2-7b-chat-hf-w4-g128-awq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Luban-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "uer/t5-base-chinese-cluecorpussmall": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 21228}, "ClueAI/ChatYuan-large-v1": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "helenai/gpt2-ov": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "fireballoon/baichuan-vicuna-chinese-7b-gptq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 64000}, "Trelis/Llama-2-7b-chat-hf-hosted-inference-8bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Starry/KARENTRIES": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "umm-maybe/SportsFanGhost": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/airoboros-13B-gpt4-1.4-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TabbyML/StarCoder-1B": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 49152}, "TFLai/Nova-13B-50-step": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "Mikivis/gpt2-large-lora-sft2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50257}, "CHIH-HUNG/llama-2-13b-Open_Platypus_and_ccp_2.6w-3_epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "BlackSamorez/falcon-40b-tiny-testing": {"architectures": ["RWForCausalLM"], "hidden_size": 256, "n_head": 4, "n_layer": 2, "vocab_size": 65024}, "Rocketknight1/tiny-random-falcon-40b": {"architectures": ["FalconForCausalLM"], "hidden_size": 1024, "num_attention_heads": 128, "num_hidden_layers": 2, "vocab_size": 65024}, "TheBloke/WizardLM-1.0-Uncensored-Llama2-13B-GGML": {}, "TheBloke/Zarafusionex-1.1-L2-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "lmqg/t5-large-squad-qg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32101}, "casperhansen/falcon-7b-awq": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "Azure99/blossom-v2-llama2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "DeepESP/gpt2-spanish-medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "StudentLLM/Alpagasus-2-13b-QLoRA-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Weni/WeniGPT": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "niicovila/llama-v2-tst-law": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Undi95/CreativityEngine": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "DB13067/Peterbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "EleutherAI/pythia-12b-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50688}, "allenai/tulu-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "TheBloke/airoboros-l2-13b-gpt4-m2.0-GGML": {}, "TheBloke/Griffin-3B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "imthanhlv/vigpt2medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "m3hrdadfi/gpt2-persian-qa": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50000}, "TheBloke/MythoMax-L2-Kimiko-v2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "CHIH-HUNG/llama-2-13b-FINETUNE1_17w-r16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ppn/DialoGPT-small-harrypotter": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "lmqg/mt5-base-ruquad-qag": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250101}, "TheBloke/Firefly-Llama2-13B-v1.2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "persiannlp/mt5-large-parsinlu-opus-translation_fa_en": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 250112}, "simple2312/DialoGPT-Twice": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "declare-lab/flan-alpaca-xxl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "ChanceFocus/finma-7b-nlp": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "osunlp/attrscore-flan-t5-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "likenneth/honest_llama2_chat_7B": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Hugherinit/hi": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32119}, "vaibhav9/GPT2-qa": {"architectures": ["GPT2ModelForQuestionAnswering"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "st3rl4nce/t5-small-finetuned-pubmed": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "uonlp/okapi-ro-llama": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ThomasNLG/t5-weighter_cnndm-en": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "google/t5-11b-ssm-tqa": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "lizhuang144/flan-t5-small-VG-factual-sg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "hyunjae/skt-kogpt2-kullm-v2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 51200}, "Mirage-Studio/llama-gaan-2-7b-chat-hf-dutch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/LosslessMegaCoder-Llama2-7B-Mini-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32007}, "lmqg/t5-small-squad-qg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32101}, "castorini/doc2query-t5-large-msmarco": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TheBloke/manticore-13b-chat-pyg-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "22h/open-cabrita3b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 52000}, "alzoubi36/priva_t5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "TheBloke/vicuna-7B-v0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "TheBloke/falcon-7b-instruct-GGML": {}, "Rozi05/QuoteVibes_Model_Trained": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Tidum/DialoGPT-large-Michael": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "valhalla/t5-small-qg-prepend": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32102}, "lmqg/t5-large-squad-qag": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32101}, "abhiramtirumala/DialoGPT-sarcastic": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "mindrage/Manticore-13B-Chat-Pyg-Guanaco-GGML": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Salesforce/dialogstudio-t5-base-v1.0": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32103}, "allenai/unifiedqa-v2-t5-base-1363200": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "kleinay/qanom-seq2seq-model-joint": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32101}, "puugz/DialoGPT-small-spiderman": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "UrukHan/t5-russian-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "HuggingFaceH4/tiny-random-LlamaForSeqClass": {"architectures": ["LlamaForSequenceClassification"], "hidden_size": 16, "intermediate_size": 64, "num_attention_heads": 4, "num_hidden_layers": 2, "vocab_size": 32000}, "JosephusCheung/Qwen-LLaMAfied-7B-Chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 151936}, "Abzu/orca-mini-v3-70b-gptq-q4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "wnic00/t5-small-finetune-bilingual-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "ChukSamuels/DialoGPT-small-Dr.FauciBot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "macavaney/doc2query-t5-base-msmarco": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "nlp-waseda/comet-t5-base-japanese": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32000}, "stjiris/t5-portuguese-legal-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Icaruas/V2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "imxly/t5-pegasus": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50000}, "stefan-it/german-gpt2-larger": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50265}, "noahkim/KoT5_news_summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50358}, "hoskinson-center/proofGPT-v0.1": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "TheBloke/WizardMath-7B-V1.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "FieldSu/distil_student_24": {"architectures": ["RWForCausalLM"], "hidden_size": 1136, "n_head": 71, "n_layer": 8, "vocab_size": 65024}, "shyamsn97/Mario-GPT2-700-context-length": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 6, "vocab_size": 50257}, "dgnk007/eagle": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "sharpbai/Llama-2-7b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "jackyv/DialoGPT-small-pinocchio": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "felinecity/DioloGPT-small-KaeyaBot2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "toyfreak/DialoGPT-small-shy": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "chavinlo/alpaca-13b": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "TehVenom/Pygmalion-7b-4bit-GPTQ-Safetensors": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "georgesung/open_llama_7b_qlora_uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ostorc/rick-sanchez-chatbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "polymath707/llama-2-7b-miniguanaco": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "KBlueLeaf/guanaco-7b-leh-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Chronos-Hermes-13B-v2-GGML": {}, "approach0/mathy-vicuna-13B-FFT-phase2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "gorilla-llm/gorilla-7b-hf-delta-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "j5ng/kullm-5.8b-GPTQ-8bit": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 16, "num_hidden_layers": 28, "vocab_size": 30080}, "bitadin/checkpoint-230167": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "NekoPunchBBB/Llama2-13b-hf-Open-Platypus-QLoRA-att": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "mrm8488/t5-small-finetuned-wikiSQL": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "ozcangundes/T5-base-for-BioQA": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "AriakimTaiyo/gpt2-chat": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_layer": 36, "vocab_size": 50257}, "TheBloke/WizardLM-13B-V1.2-GGML": {}, "TheBloke/Trurl-2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "ccore/opt-125-smart-test": {"architectures": ["OPTForCausalLM"], "hidden_size": 768, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50272}, "James-WYang/BigTranslate": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 53613}, "Trelis/Llama-2-7b-chat-hf-function-calling": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Wikidepia/IndoT5-base-paraphrase": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "csebuetnlp/mT5_m2m_crossSum": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "seanmor5/tiny-llama-test": {"architectures": ["LlamaForCausalLM"], "hidden_size": 32, "intermediate_size": 64, "num_attention_heads": 2, "num_hidden_layers": 2, "vocab_size": 32000}, "explosion-testing/refined-web-model-new-decoder-test": {"architectures": ["RWModel"], "hidden_size": 256, "n_head": 4, "n_layer": 5, "vocab_size": 1024}, "jondurbin/airocoder-34b-2.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "lmqg/t5-base-squad-qg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "PORTULAN/gervasio-ptpt-base": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 50304}, "UWB-AIR/barticzech-1.0": {"architectures": ["MBartForConditionalGeneration"], "d_model": 1024, "num_hidden_layers": 12, "vocab_size": 50265}, "TokenBender/llama2-7b-chat-hf-codeCherryPop-qLoRA-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Voicelab/trurl-2-7b-8bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "TheBloke/Llama-2-13B-chat-GGUF": {}, "VietAI/vit5-base-vietnews-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 36096}, "lmqg/t5-small-squad-ae": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32101}, "retrieva-jp/t5-base-short": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "grammarly/coedit-xxl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32100}, "heack/HeackMT5-ZhSum100k": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "TheBloke/LLaMA-13b-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TFMC/ELYZA-japanese-Llama-2-7b-instruct-GPTQ-4bit-64g": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "mxmax/Chinese_Chat_T5_Base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "elinas/chronos-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "kajdun/iubaris-13b-v3_GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jmeadows17/MathT5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32104}, "TheBloke/Kimiko-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "nlp-waseda/gpt2-small-japanese": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 32000}, "rshrott/description-together-ai": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "noah-ai/mt5-base-question-generation-vi": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "AI4PD/ZymCTRL": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 458}, "bitadin/gpt-4-long-titles-v2-flan-t5-base-llm-12": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "shorthillsai/flan-t5-large-absa": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TheBloke/CodeLlama-13B-oasst-sft-v10-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "prithivida/active_to_passive_styletransfer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "lcw99/t5-large-korean-text-summary": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 50358}, "EleutherAI/pythia-1.4b-deduped-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "sdadas/polish-gpt2-large": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": 5120, "n_layer": 36, "vocab_size": 51200}, "uonlp/okapi-vi-bloom": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "cenkersisman/gpt2-turkish-900m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "IlyaGusev/rugpt_large_turbo_instructed": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50258}, "Waterhorse/chessgpt-base-v1": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "jondurbin/spicyboros-13b-2.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "echarlaix/t5-small-openvino": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "mrm8488/santacoder-finetuned-the-stack-bash-shell": {"architectures": ["GPT2LMHeadCustomModel"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 49280}, "ckip-joint/bloom-3b-zh": {"architectures": ["BloomForCausalLM"], "hidden_size": 2560, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "Dawnstarhunter/DialoGPT-medium-Eveline": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "lmqg/t5-base-squad-ae": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "mesolitica/finetune-translation-t5-small-standard-bahasa-cased-v2": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "liuhaotian/LLaVA-7b-delta-v0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32004}, "yzhuang/autotree_llama_small_snxor_l1_2_vit": {"architectures": ["LlamaForAutoTree"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 6, "vocab_size": 32000}, "mrm8488/t5-base-finetuned-wikiSQL-sql-to-en": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "aleksickx/llama-7b-hf": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "yongzx/pythia-70m-sft-hh": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "sonoisa/t5-base-english-japanese": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "BramVanroy/Llama-2-13b-chat-dutch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Bhuvana/t5-base-spellchecker": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "PlanTL-GOB-ES/gpt2-base-bne": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50261}, "lmqg/mt5-small-jaquad-qg": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "Mirage-Studio/llama-gaan-2-7b-chat-hf-dutch-epoch-5": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "microsoft/DialogRPT-human-vs-rand": {"architectures": ["GPT2ForSequenceClassification"], "n_embd": 1024, "n_head": 16, "n_layer": 24, "vocab_size": 50257}, "aubmindlab/aragpt2-mega": {"architectures": ["GPT2LMHeadModel"], "intermediate_size": 6144, "n_embd": 1536, "n_head": 24, "n_inner": null, "n_layer": 48, "vocab_size": 64000}, "liyuesen/druggpt": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 53083}, "conceptofmind/Hermes-LLongMA-2-7b-8k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/scarlett-33B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/EverythingLM-13b-V2-16K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "sartmis1/starcoder-v2-openapi-special-tokens": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49152}, "TheBloke/Phind-CodeLlama-34B-v1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "TheBloke/Yarn-Llama-2-7B-64K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Dolphin-Llama-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "kfkas/Legal-Llama-2-ko-7b-Chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 46336}, "Ichsan2895/Merak-7B-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "allenai/unifiedqa-v2-t5-base-1251000": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "sagawa/ReactionT5-product-prediction": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 268}, "lmqg/mt5-small-jaquad-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "Narrativa/mT5-base-finetuned-tydiQA-xqa": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "allenai/macaw-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "gagan3012/k2t-new": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "google/t5-efficient-tiny-nl2": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 256, "num_heads": 4, "num_layers": 2, "vocab_size": 32128}, "sam2ai/open_llama_3b_odia_gptq_128_4bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "lmqg/mt5-small-dequad-qg": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "mrm8488/mT5-small-finetuned-tydiqa-for-xqa": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "zjunlp/knowlm-13b-zhixi": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "h2oai/h2ogpt-16k-codellama-13b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "mymusise/gpt2-medium-chinese": {"architectures": ["TFGPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 8021}, "ai-forever/mGPT-13B": {"architectures": ["GPT2LMHeadModel"], "n_embd": 5120, "n_head": 40, "n_inner": null, "n_layer": 40, "vocab_size": 100000}, "TinaLiHF/fined-tuned-T5small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "TheBloke/airoboros-l2-7B-gpt4-2.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Mihakram/AraT5-base-question-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 110080}, "fjungstedt/t5-criteria-text-to-json": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "luqh/ClinicalT5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "h2oai/h2ogpt-16k-codellama-13b-python": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "masakhane/afri-mt5-base": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "YeungNLP/bloom-1b4-zh": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 24, "num_attention_heads": 16, "vocab_size": 46145}, "shekharchatterjee/temp-model-174": {}, "TheBloke/Kimiko-v2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jeffwan/vicuna-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "kz919/ntk_scaled_open_llama_13b_32k": {"architectures": ["NTKScaledLlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "lmqg/t5-base-squad-qg-ae": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "r3dhummingbird/DialoGPT-small-harrypotter": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "camenduru/MiniGPT4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "TheBloke/open-llama-7b-open-instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "MoinFaisal/Llama-2-7b-chat-finetune": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/CodeLlama-13B-Instruct-GGUF": {}, "fbellame/llama2-pdf-to-quizz-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "fractalego/fact-checking": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "michelecafagna26/gpt2-medium-finetuned-sst2-sentiment": {"architectures": ["GPT2ForSequenceClassification"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/WizardLM-Uncensored-SuperCOT-StoryTelling-30B-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "TheBloke/Airoboros-7B-GPT4-1-4-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Airoboros-L2-70B-GPT4-m2.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Arc53/docsgpt-7b-falcon": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "alenusch/mt5large-ruparaphraser": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 250112}, "ApoTro/slovak-t5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32100}, "microsoft/dolly-v2-7b-olive-optimized": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50280}, "huggingtweets/gordonramsay": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "prithivida/formal_to_informal_styletransfer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "model-attribution-challenge/gpt2-xl": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_layer": 48, "vocab_size": 50257}, "saiful9379/Bangla_GPT2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 33391}, "deepse/CodeUp-Llama-2-7b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ziqingyang/chinese-llama-2-13b-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 55296}, "ChandlerU11/t5_fine": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "TheBloke/Guanaco-3B-Uncensored-v2-GPTQ": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "mamiksik/T5-commit-message-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32104}, "conceptofmind/Yarn-Llama-2-13b-64k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "mesolitica/llama-13b-hf-16384-fpf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Sao10K/Stheno-1.2-L2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "gsarti/it5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32103}, "Den4ikAI/FRED-T5-XL-interpreter": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1536, "num_heads": 24, "num_layers": 24, "vocab_size": 50364}, "TheBloke/WizardCoder-Guanaco-15B-V1.1-GPTQ": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49153}, "seonglae/llama-2-7b-chat-hf-gptq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/llama2_7b_chat_uncensored-GGML": {}, "ecosumit/gpt-model": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "allegro/plt5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 50048}, "cointegrated/rut5-small": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 20100}, "it5/it5-large-question-answering": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32103}, "tscholak/1zha5ono": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32102}, "optible/unifiedqa-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "CleverShovel/falcon-7b-instruct-sharded-bf16": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "TheBloke/Pygmalion-13B-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "BlackSamorez/llama-2-tiny-testing": {"architectures": ["LlamaForCausalLM"], "hidden_size": 128, "intermediate_size": 11008, "num_attention_heads": 8, "num_hidden_layers": 2, "vocab_size": 2000}, "ianagra/Llama-2-7b-ALLM-virtual-sales-assistant": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "quantumaikr/KoreanLM-3B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 2048, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "quantumaikr/llama-2-70B-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Deniskin/gpt3_medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50264}, "ozcangundes/mt5-small-turkish-summarization": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "EleutherAI/pythia-1b-deduped-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 50304}, "flozi00/Llama-2-7b-german-assistant-v3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Mikivis/gpt2-large-lora-stf4": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50257}, "AK270802/DialoGPT-small-harrypotter": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "EleutherAI/pythia-12b-deduped-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50688}, "EricPeter/Llama-2-multilingual": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Pygmalion-2-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "miguelvictor/python-gpt2-large": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_layer": 36, "vocab_size": 50257}, "h2oai/h2ogpt-16k-codellama-7b-python": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "ammarinjtkrbh/llama-2-7b-food-search": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "GroNLP/gpt2-small-dutch": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 40000}, "pszemraj/opt-350m-email-generation": {"architectures": ["OPTForCausalLM"], "hidden_size": 1024, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50265}, "caffsean/t5-small-finetuned-keyword-to-text-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "lmqg/mt5-small-dequad-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "yuyijiong/T5-large-sentiment-analysis-Chinese-MultiTask": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32596}, "sonoisa/t5-qiita-title-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "YeungNLP/firefly-bloom-1b4": {"architectures": ["BloomForCausalLM"], "hidden_size": 2048, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 46145}, "samwit/koala-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Vicuna-13B-1-3-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Photolens/OpenOrcaxOpenChat-2-13b-langchain-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "Ichsan2895/Merak-7B-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "flozi00/Llama-2-7b-german-assistant-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ss1612/loki-chat": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "OpenBuddy/openbuddy-falcon-7b-v5-fp16": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 70144}, "wellecks/llmstep-mathlib4-pythia2.8b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50278}, "dariolopez/llama-2-7b-oasst1-es": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/WizardLM-1.0-Uncensored-CodeLlama-34B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "brad1141/gpt2-finetuned-comp2": {"architectures": ["GPT2ForTokenClassification"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TheBloke/chronos-hermes-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "lizhuang144/flan-t5-large-VG-factual-sg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "nivos/pythia-410m-deduped-finetuned-final-activity-text-10epoch": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 1024, "intermediate_size": 4096, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "HamidRezaAttar/gpt2-product-description-generator": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TheBloke/ORCA_LLaMA_70B_QLoRA-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "lmsys/vicuna-13b-delta-v0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "jacobmorrison/tk-instruct-xl-lora-experiments": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32100}, "GroNLP/gpt2-small-italian": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 30001}, "yihsuan/mt5_chinese_small": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250100}, "YTTD/DialoGPT-medium-souv2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "arubenruben/ptt5-portuguese-cnn-dailymail-azure-pt-pt": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "localmodels/Llama-2-7B-Chat-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "lgaalves/llama-2-13b-chat-platypus": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "it5/it5-large-question-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32103}, "psyche/KoT5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "TheBloke/Llama2-70B-OASST-SFT-v10-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32007}, "deepparag/Aeona": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "lmqg/mt5-small-koquad-qg": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "lmqg/mt5-small-esquad-qg": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "NinedayWang/PolyCoder-0.4B": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 1024, "intermediate_size": 4096, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "ConvLab/t5-small-nlu-multiwoz21": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "SIC98/GPT2-python-code-generator": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "lmqg/mt5-small-itquad-qg": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "kaiyuy/leandojo-lean4-tacgen-byt5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3584, "d_model": 1472, "num_heads": 6, "num_layers": 12, "vocab_size": 384}, "usvsnsp/pythia-6.9b-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "PlanTL-GOB-ES/gpt2-large-bne": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_layer": 36, "vocab_size": 50261}, "jordiclive/flan-t5-11b-summarizer-filtered": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "Jordine/scpoo": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "behnamsh/gpt2_camel_physics": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "lmqg/mt5-small-esquad-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "MerlynMind/merlyn-education-teacher-assistant": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50688}, "mesolitica/llama-7b-hf-16384-fpf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "MatthisHoules/rat-t5-qdmr-grounded-with-db": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "persiannlp/mt5-small-parsinlu-qqp-query-paraphrasing": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "lmqg/mt5-small-koquad-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "lmqg/mt5-small-itquad-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "openthaigpt/openthaigpt-gpt2-instructgpt-poc-0.0.4": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50268}, "ChanceFocus/finma-7b-full": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "vivekraina/Llama-2-7b-hf-8bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "dpml/vicuna_mt_450s": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "burberg92/resume_summary": {"architectures": ["BloomForCausalLM"], "hidden_size": 2048, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "Monero/Pygmalion-Metharme-7b-4bit-TopScore": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Icaruas/7bill8k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32002}, "dahara1/ELYZA-japanese-Llama-2-7b-fast-instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 45043}, "TheBloke/Yarn-Llama-2-13B-64K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "prithivida/passive_to_active_styletransfer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "lmqg/mt5-small-frquad-qg": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "JamesStratford/PLord-bot-DialoGPT-medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "yizhangliu/prompt-extend": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 52000}, "lmqg/mt5-small-frquad-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "Beltenebros/DialoGPT-small-PerionOfGaul": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "sominw/rel23_conll": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "mncai/SGPT-5.8B-wiki-mirae-bank_securities-epoch5": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 16, "num_hidden_layers": 28, "vocab_size": 30080}, "MickyMike/VulRepair": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32105}, "ybelkada/t5-11b-sharded": {"architectures": ["T5WithLMHeadModel"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "Einmalumdiewelt/T5-Base_GNAD_MaxSamples": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "digitous/13B-HyperMantis_GPTQ_4bit-128g": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "weqweasdas/hh_rlhf_rm_open_llama_3b": {"architectures": ["LlamaForSequenceClassification"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/WizardMath-13B-V1.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "ziqingyang/chinese-alpaca-2-7b-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 55296}, "valhalla/t5-base-squad": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "ELiRF/mt5-base-dacsa-es": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "abhitopia/question-answer-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32102}, "TurkuNLP/gpt3-finnish-large": {"architectures": ["BloomModel"], "hidden_size": 1536, "n_head": 16, "n_layer": 24, "vocab_size": 131072}, "Abyss-fyf/DialoGPT-small-discord": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TheBloke/OpenOrca-Platypus2-13B-GGML": {}, "TheBloke/Airoboros-L2-7B-2.1-GGUF": {}, "huggingtweets/googleai": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "it5/it5-base-question-answering": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32103}, "woodmtaylor/DialoGPT-medium-Heej": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "kimdwan/t5-base-korean-summarize-LOGAN": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50358}, "Narrativa/mT5-base-finetuned-tydiQA-question-generation": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "huggingtweets/normmacdonald": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "r3dhummingbird/DialoGPT-medium-neku": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "yhavinga/t5-v1.1-base-dutch-cnn-test": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32103}, "kennethhendricks/DialoGPT-medium-jared-hendricks-gen1": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "retrieva-jp/t5-small-long": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "TheBloke/Vigogne-2-7B-Chat-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TigerResearch/tigerbot-7b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 60928}, "Fredithefish/Guanaco-13B-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "consciousAI/question-answering-generative-t5-v1-base-s-q-c": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "TheBloke/open-llama-7B-v2-open-instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "mosama/Llama-2-Medical-Merged-LoRA": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "bullmount/quanIta_t5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32103}, "YeungNLP/bloomz-396m-zh": {"architectures": ["BloomForCausalLM"], "hidden_size": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 46145}, "GreenBitAI/LLaMA-7B-2bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "chgk13/decicoder-1b-openvino-int8": {"architectures": ["DeciCoderForCausalLM"], "hidden_size": 2048, "intermediate_size": 5888, "num_attention_heads": 32, "num_hidden_layers": 20, "vocab_size": 49152}, "bigscience/bloomz-mt": {"architectures": ["BloomForCausalLM"], "n_layer": 70, "num_attention_heads": 112, "vocab_size": 250880}, "LarkAI/codet5p-770m_nl2sql_oig": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "Linly-AI/Chinese-Falcon-7B": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 90046}, "ckip-joint/bloom-3b-zh-instruct": {"architectures": ["BloomForCausalLM"], "hidden_size": 2560, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "sgr23/llama2-fine-tuned-dolly-15k-dto": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "edbeeching/gpt2-imdb": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "cardiffnlp/flan-t5-small-tweet-emotion": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "TheBloke/airoboros-7B-gpt4-1.4-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/CodeLlama-7B-GGUF": {}, "TheBloke/Airoboros-c34B-2.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "byeongal/Ko-DialoGPT": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 51200}, "ismaelfaro/gpt2-poems.en": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "tuner007/t5_abs_qa": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "kennethhendricks/DialoGPT-medium-PowPowGaming": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "DunnBC22/flan-t5-base-text_summarization_data": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "zarakiquemparte/hermeslimarp-l2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "MagicLEMP/llamavocat_13B_mixed_16K": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "4bit/ELYZA-japanese-Llama-2-7b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "EnglishVoice/t5-base-us-to-uk-english": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "devanshipatel/t5-gec-english-125k": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "helloollel/vicuna-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "nferroukhi/WizardLM-Uncensored-Falcon-7b-sharded-bf16": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65025}, "dacorvo/tiny-random-gpt2-neuronx": {"intermediate_size": 37, "n_embd": 32, "n_head": 4, "n_inner": null, "n_layer": 5, "vocab_size": 1000}, "JamesStratford/Pidrow-bot-DialoGPT-Large-Feb2023": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50257}, "tsuyuan/Llama-2-7b-unit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 41218}, "OFA-Sys/gsm8k-rft-llama7b2-u13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "uer/gpt2-chinese-ancient": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 25370}, "YTTD/DialoGPT-medium-safv3": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "Neko-Institute-of-Science/LLaMA-65B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TheBloke/Spicyboros-13B-2.2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "IDEA-CCNL/Randeng-T5-77M-MultiTask-Chinese": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32596}, "coreml-projects/Llama-2-7b-chat-coreml": {"architectures": ["LlamaForCausalLM"], "vocab_size": 32000}, "oscorrea/scores-lince-sm": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "morzecrew/FRED-T5-RefinedPersonaChat": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1536, "num_heads": 24, "num_layers": 24, "vocab_size": 50364}, "anjakuzev/harry_7": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Mythalion-13B-GGUF": {}, "Kryptone/monikAI": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/Luna-AI-Llama2-Uncensored-GGML": {}, "mlabonne/llama-2-7b-miniguanaco": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Hermes-LLongMA-2-7B-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "zlsl/l_erotic_kink_chat": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50259}, "Sao10K/Stheno-Inverted-1.2-L2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "castorini/duot5-base-msmarco": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "mrm8488/t5-base-finetuned-qasc": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "entropy/gpt2_zinc_87m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 2707}, "MarkyMarx/DialoGPT-medium-jimmybot2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "stefan-it/secret-gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Narrativa/byt5-base-tweet-hate-detection": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3968, "d_model": 1536, "num_heads": 12, "num_layers": 18, "vocab_size": 384}, "nicholasKluge/Aira-2-124M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50261}, "TheBloke/Samantha-1.11-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "castorini/monot5-large-msmarco": {"architectures": ["T5Model"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "PoloHuggingface/French_grammar_error_corrector": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32102}, "cambridgeltl/magic_mscoco": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50259}, "Gatozu35/tortoise-tts": {"architectures": ["GPT2InferenceModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 30, "vocab_size": 604}, "abacusai/Giraffe-v1-delta-13b-scaled-16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "flozi00/Llama-2-13B-german-assistant-v3-4bit-autogptq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "HAERAE-HUB/tulu_13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "doc2query/msmarco-14langs-mt5-base-v1": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "Maciel/T5Corrector-base-v2": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "vilm/vietcuna-3b-v2": {"architectures": ["BloomForCausalLM"], "hidden_size": 2560, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "TitanML/ct2-int8-falcon-7b-instruct": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "ybelkada/llama-7b-GPTQ-test": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "h2oai/h2ogpt-16k-codellama-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "TigerResearch/tigerbot-70b-chat-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 60928}, "Supiri/t5-base-conversation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "msterbentz/t5-base-break-high": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "igorktech/rut5-small-chit-chat-intelligent": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 20100}, "kuleshov/llama-7b-4bit": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "hipnologo/gpt2-imdb-finetune": {"architectures": ["GPT2ForSequenceClassification"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "qwopqwop/danbooru-llama-gptq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "t-dai-con/gpt-fine-tuned-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Platypus2-70B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "KETI-AIR/ke-t5-base-ko": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 64128}, "doc2query/all-t5-base-v1": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "GT4SD/multitask-text-and-chemistry-t5-base-standard": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "uer/gpt2-medium-chinese-cluecorpussmall": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 21128}, "UBC-NLP/AraT5-base-title-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 110080}, "dsivakumar/text2sql": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "power-greg/super-fast-llm": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2048, "n_head": 16, "n_inner": 2048, "n_layer": 4, "vocab_size": 2048}, "AlexWortega/instruct_rugptMedium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50263}, "hiyouga/Llama-2-Chinese-13b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "frank098/llama2-13b-8k-vnf-virtualization": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "OFA-Sys/gsm8k-rft-llama7b-sample100": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "EnterNameBros/Senko-ai-medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "PeanutJar/LLaMa-2-PeanutButter_v19_R8-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Sao10K/Medusa-1.1-L2-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ChrisVCB/DialoGPT-medium-cmjs": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "indonesian-nlp/gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "persiannlp/mt5-small-parsinlu-squad-reading-comprehension": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "stmnk/codet5-small-code-summarization-python": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "emozilla/LLongMA-2-13b-16k-flash": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "bigscience/bloom-petals": {"architectures": ["BloomForCausalLM"], "hidden_size": 14336, "n_head": 112, "n_layer": 70, "vocab_size": 250880}, "procesaur/gpt2-srlat": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 52000}, "ashwinR/CodeExplainer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "Chirayu/nl2pandas": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "OpenBuddy/openbuddy-falcon-7b-v6-bf16": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 70144}, "swbaek/tulu_65b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32001}, "huggingtweets/wallstreetbets": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Sultannn/gpt2-ft-id-puisi": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 35000}, "sonoisa/sentence-t5-base-ja-mean-tokens": {"architectures": ["T5Model"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "sdadas/polish-gpt2-xl": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_inner": 6400, "n_layer": 48, "vocab_size": 51200}, "sjrhuschlee/flan-t5-large-squad2": {"architectures": ["T5ForQuestionAnswering"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "Hnabil/t5-address-standardizer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Gryphe/MythoLogic-Mini-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Athena-v1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Undi95/MythoMax-L2-Kimiko-v2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "magnifi/llama-augmented-contextual-2-epoch-6-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "doc2query/msmarco-chinese-mt5-base-v1": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "Sakuna/t5_grammar_checker": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Dahoas/pythia-1B-response-full-static-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "localmodels/Vicuna-7B-v1.3-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Sao10K/Stheno-1.1-L2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "mlabonne/drllama-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "IlyaGusev/rugpt3medium_sum_gazeta": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "describeai/gemini": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "mojians/E2E-QA-Mining": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32102}, "TheBloke/Wizard-Vicuna-13B-Uncensored-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "dnagpt/human_gpt2-v1": {"architectures": ["GPT2Model"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 25000}, "heegyu/WizardVicuna-Uncensored-pythia-160m-deduped": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50304}, "maximuslee07/llama-2-7b-rockwell": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "DylanJHJ/fidt5-base-nq": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "laituan245/molt5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "DancingIguana/music-generation": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 6, "vocab_size": 25000}, "Qiliang/flan-t5-large-summarization-finetuned-xsum": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TheBloke/Vicuna-7B-CoT-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "hpcaitech/openmoe-base": {"architectures": ["OpenMoeForCausalLM"], "hidden_size": 768, "intermediate_size": 2048, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 256384}, "CalderaAI/13B-Thorns-l2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "CHIH-HUNG/llama-2-13b-FINETUNE1_17w-r4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "IlyaGusev/rugpt_medium_turbo_instructed": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50258}, "pankajmathur/orca_alpaca_3b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/Wizard-Vicuna-7B-Uncensored-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "abacusai/Giraffe-v1-delta-13b-scaled-4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Huginn-v3-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "bloom-testing/test-bloomd-350m-main": {"architectures": ["BloomModel"], "n_inner": null, "n_layer": 24, "num_attention_heads": 16, "vocab_size": 250880}, "AI-Sweden/gpt-sw3-356m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": 4096, "n_layer": 24, "vocab_size": 64000}, "raymondho/DialoGPT-small-harry": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TheBloke/airochronos-33B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/OpenChat_v3.2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "ahnyeonchan/OpenOrca-AYT-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "stanford-crfm/expanse-gpt2-small-x777": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "doc2query/msmarco-german-mt5-base-v1": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "ku-nlp/gpt2-medium-japanese-char": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 6000}, "llm-blender/gen_fuser_3b": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32100}, "lomahony/eleuther-pythia2.8b-hh-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50304}, "TheBloke/Llama2-22B-GPLATTY-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "grammarly/coedit-xl-composite": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32100}, "imuncomfortable/DiabloGPT-small-CocoAtarashi": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "kaiyuy/leandojo-lean3-retriever-tacgen-byt5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3584, "d_model": 1472, "num_heads": 6, "num_layers": 12, "vocab_size": 384}, "michaelwzhu/Chinese-LlaMA2-13B-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 55296}, "Xenova/llama2.c-stories110M": {"architectures": ["LlamaForCausalLM"], "hidden_size": 768, "intermediate_size": 2048, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 32000}, "Youngwoo9/T5_Pyeongsan": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50358}, "literallywood/DialoGPT-small-ekansh": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "jondurbin/spicyboros-7b-2.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "indobenchmark/indogpt": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 40005}, "it5/it5-efficient-small-el32-news-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 32, "vocab_size": 32100}, "mesolitica/finetune-translation-t5-base-standard-bahasa-cased-v2": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "Den4ikAI/FRED-T5-XL_instructor": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1536, "num_heads": 24, "num_layers": 24, "vocab_size": 50365}, "mlabonne/gpt2-GPTQ-4bit": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "steerapi/Llama-2-7b-chat-hf-onnx": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Langboat/bloom-1b4-zh": {"architectures": ["BloomForCausalLM"], "hidden_size": 2048, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 46145}, "neulab/docprompting-codet5-python-doc-retriever": {"architectures": ["BERTScorerForCL"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "AI-Sweden/gpt-sw3-20b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 44, "vocab_size": 64000}, "syndi-models/article-title-generator": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "vgaraujov/Dummy5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "TFLai/Orca-Nova-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "allenai/tk-instruct-11b-def-pos": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "aspis/gpt2-genre-story-generation": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50267}, "lcw99/t5-base-korean-paraphrase": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50358}, "Celestinian/TopicGPT": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50257}, "TheBloke/Redmond-Hermes-Coder-GPTQ": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49153}, "TheBloke/h2ogpt-gm-oasst1-en-2048-falcon-7b-v3-GPTQ": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "abhinavkulkarni/meta-llama-Llama-2-13b-chat-hf-w4-g128-awq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jypppp/llama-2-7b-manual_GPT_ver2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Llama-2-7B-32K-Instruct-GGML": {}, "TheBloke/Yarn-Llama-2-7B-128K-GGML": {}, "quantumaikr/KoreanLM-llama-2-7B-finetuned": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 46336}, "google/t5-xl-ssm-nq": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "nikokons/gpt2-greek": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 22000}, "NYTK/PULI-GPT-3SX": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50048}, "Futyn-Maker/rugpt3small_based_on_gpt2-finetuned_teachers_quotes_small": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50264}, "localmodels/Llama-2-13B-Chat-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "SebastianSchramm/UniNER-7B-all-GPTQ-4bit-128g-actorder_True": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Airoboros-L2-70B-2.1-Creative-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "casperhansen/vicuna-7b-v1.5-awq-gemv": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "IDEA-CCNL/Wenzhong-GPT2-3.5B": {"architectures": ["GPT2LMHeadModel"], "n_embd": 3072, "n_head": 32, "n_inner": 12288, "n_layer": 30, "vocab_size": 50304}, "antoinelouis/belgpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50257}, "atkh6673/DialoGPT-small-trump": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "huggingface-course/mt5-small-finetuned-amazon-en-es": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "malteos/gpt2-xl-wechsel-german": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_inner": 6400, "n_layer": 48, "vocab_size": 50304}, "KES/caribe-capitalise": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "pszemraj/flan-t5-large-instruct-dolly_hhrlhf": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "Tanmay09516/StableBeluga-7B-sharded-bf16-5GB": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "abhinavkulkarni/codellama-CodeLlama-7b-Python-hf-w4-g128-awq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Spicyboros-7B-2.2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "huggingtweets/elonmusk": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "BelleGroup/BELLE-7B-2M": {"architectures": ["BloomModel"], "n_inner": null, "n_layer": 30, "num_attention_heads": 32, "vocab_size": 250880}, "snoop2head/Gomoku-GPT2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 256, "n_head": 4, "n_inner": null, "n_layer": 4, "vocab_size": 404}, "AnimusOG/pygmalion-7b-4bit-128g-cuda-2048Token": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/airoboros-l2-7B-gpt4-m2.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Juniplayground/Mist_LLaMA-2-7B-1024_V3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "DataLinguistic/DataLinguistic-34B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32001}, "erikycd/chatbot_hadita": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50262}, "medicalai/ClinicalGPT-base-zh": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "TheBloke/orca_mini_v2_13b-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "NIRVANA/T5_academic_paraphraser": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "josmunpen/mt5-small-spanish-summarization": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "shahp7575/gpt2-horoscopes": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50260}, "yihsuan/best_model_0427_small_long": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250100}, "malteos/bloom-6b4-clp-german-oasst-v0.1": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_layer": 30, "vocab_size": 50272}, "openllmplayground/openalpaca_7b_700bt_preview": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Gaivoronsky/ruGPT-3.5-13B-fp16": {"architectures": ["GPT2LMHeadModel"], "n_embd": 5120, "n_head": 40, "n_inner": null, "n_layer": 40, "vocab_size": 50272}, "universeTBD/astrollama": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "gorkemgoknar/gpt2-small-turkish": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "huggingtweets/joejoinerr": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Prarabdha/T5-Transformer-RickBot": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "beomi/kollama-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 52000}, "mohammadtaghizadeh/flan-t5-base-imdb-text-classification": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "nicholasKluge/Aira-Instruct-774M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50259}, "bhenrym14/airoboros-7b-gpt4-1.4.1-lxctx-PI-16384-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Alireza1044/michael_bert_lm": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 6, "vocab_size": 50257}, "shibing624/gpt2-dialogbot-base-chinese": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 10, "vocab_size": 13317}, "mesolitica/finetune-summarization-ms-t5-base-standard-bahasa-cased": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "lmqg/flan-t5-large-squad-qg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32101}, "circulus/alpaca-7b": {"architectures": ["LlaMAForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "reeducator/vicuna-13b-free": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "flozi00/Llama-2-13b-german-assistant-v6-4bit-autogptq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 37632}, "SasnayaLetovka/tinkoff-zhientaev-model": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50262}, "mesolitica/t5-base-standard-bahasa-cased": {"architectures": ["T5Model"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "EllyPony/flutterbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "pszemraj/flan-t5-xl-grammar-synthesis": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "jinxuewen/vicuna-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "fireballoon/baichuan-llama-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 64000}, "TheBloke/Vicuna-7B-v1-3-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "scural/arxiv_model": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "Undi95/CodeEngine": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Codexister/DialoGPT-medium-KafkaBotV1": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "google/t5-xxl-ssm-nq": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "uer/gpt2-chinese-couplet": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 21128}, "nicholasKluge/Aira-Instruct-355M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50259}, "HIT-SCIR/huozi-7b-sft": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "NousResearch/CodeLlama-13b-Instruct-hf-flash": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "Enno-Ai/vigogne2-enno-13b-sft-lora-4bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "sonoisa/t5-base-japanese-article-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Kyrmasch/t5-kazakh-qa": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 18947}, "TheBloke/airoboros-13b-gpt4-1.4-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Kimiko-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "arya555/vicuna-7b-v1.5-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Geo/gpt2_custom_c_q_and_a": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "laituan245/molt5-small-smiles2caption": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "cloudqi/cqi_brain_memory_summarizer_large_pt_v0": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "ybelkada/bloom-1b7-8bit": {"architectures": ["BloomForCausalLM"], "hidden_size": 2048, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "snipaid/snip-igel-500-v2-adapter-merged": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_layer": 30, "vocab_size": 50304}, "TabbyML/SantaCoder-1B": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 49280}, "TheBloke/Guanaco-33B-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "hanseokhyeon/kullm-polyglot-5.8b-v2-GPTQ": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 16, "num_hidden_layers": 28, "vocab_size": 30080}, "CAIRE-CedarsSinai/falcon-7b-qlora-chat-support-bot-faq-alzkb-version-1": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "pranavpsv/genre-story-generator-v2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50266}, "nandakishormpai/t5-small-machine-articles-tag-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "ITG/DialoGPT-medium-spanish-chitchat": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "4bit/falcon-7b-instruct-GPTQ": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "OpenBuddy/openbuddy-openllama-7b-v5-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 38449}, "papahawk/keya-560m": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 24, "num_attention_heads": 16, "vocab_size": 250880}, "abhinavkulkarni/tiiuae-falcon-40b-instruct-w4-g128-awq": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "funstoryai/immersiveL-exp": {"architectures": ["BloomForCausalLM"], "hidden_size": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "Benson/llama-2-7b-miniguanaco-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "clancystudios/DialoGPT-medium-Morty": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "huggingtweets/realdonaldtrump": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "charanhu/text_to_sql_2": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32102}, "beomi/kollama-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 52000}, "IDEA-CCNL/Ziya-LLaMA-13B-v1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 39424}, "nicholasKluge/Aira-Instruct-PT-1B7": {"architectures": ["BloomForCausalLM"], "hidden_size": 2048, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250683}, "TheBloke/Llama2-22B-Daydreamer-v3-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "yongzx/pythia-160m-sft-hh": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50304}, "h2oai/h2ogpt-16k-codellama-34b-python": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "nedima68/author_articles_GPT2_textgen_TR": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 52001}, "IronChef/MascotAI_Open_LLaMA_FINAL": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "unionai/pythia-1B-deduped-wikipedia-8bit": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "Chirayu/nl2cql": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "TheBloke/Nous-Puffin-70B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TheBloke/Llama-2-70B-Orca-200k-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TheBloke/Llama-2-70B-chat-GGUF": {}, "sartmis1/CodeLlama-34b-instruct-openapi": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "flax-community/bengali-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32103}, "csebuetnlp/mT5_m2o_hindi_crossSum": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "huggingtweets/fabrizioromano": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "yshen99/ZhiGuoLiZheng-GPT2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 21128}, "malalejandra/putinspeaks": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Intel/fid_flan_t5_base_nq": {"architectures": ["FusionInDecoderForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "sjrhuschlee/flan-t5-base-mnli": {"architectures": ["T5ForSequenceClassification"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "TheBloke/Codegen25-7B-mono-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 51200}, "frank098/starcoder-vyatta": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49152}, "Xenova/llama2.c-stories42M": {"architectures": ["LlamaForCausalLM"], "hidden_size": 512, "intermediate_size": 1376, "num_attention_heads": 8, "num_hidden_layers": 8, "vocab_size": 32000}, "flozi00/Llama-2-13b-german-assistant-v5": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 37632}, "Andrei-Alex/Fine-Tuned-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/vicuna-7B-1.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "sharpbai/alpaca-7b-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "Clakmann/t5-base-Clakmann-thesis": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "RuterNorway/Llama-2-13b-chat-norwegian-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "osieosie/bloom-560m-4bit": {"architectures": ["BloomForCausalLM"], "hidden_size": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "paulowoicho/t5-podcast-summarisation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "liujch1998/rainier-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "gsdas/qct5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "nicholasKluge/Aira-Instruct-1B5": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_inner": null, "n_layer": 48, "vocab_size": 50259}, "kajdun/iubaris-13b-v3_GGML": {}, "csebuetnlp/mT5_m2o_english_crossSum": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "dehio/german-qg-t5-quad": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "emil2000/dialogpt-for-french-language": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "khalidsaifullaah/bengali-lyricist-gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "thinhda/chatbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "Finnish-NLP/llama-7b-finnish": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 64256}, "ehartford/WizardLM-7B-V1.0-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/WizardCoder-Guanaco-15B-V1.0-GPTQ": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49153}, "DUOMO-Lab/TransGPT-v0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 49954}, "TheBloke/Platypus2-70B-Instruct-GGUF": {}, "lmqg/t5-large-squad-qg-ae": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32101}, "rubentito/hivt5-base-mpdocvqa": {"architectures": ["HiVT5"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "cosimoiaia/Loquace-70m": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "metamyth/jennyNew": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "AlexWortega/LLama2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "totally-not-an-llm/AlpacaCielo2-7b-8k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/orca_mini_v3_7B-GGML": {}, "zjunlp/knowlm-13b-base-v1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ticoAg/gpt2-tigerbot-pt-zh": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "akshat3492/mT5": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "TheBloke/Falcon-180B-Chat-GGUF": {}, "unicamp-dl/mt5-base-mmarco-v2": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "malteos/gpt2-wechsel-german-ds-meg": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 12, "vocab_size": 50304}, "phpaiola/ptt5-base-summ-temario": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "mesolitica/finetune-translation-t5-super-tiny-standard-bahasa-cased": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 256, "num_heads": 6, "num_layers": 2, "vocab_size": 32100}, "ademfatnassi/bonjourGPT-small": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "pr1me/llama2_13b_eros_instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "Xenova/llama2.c-stories15M": {"architectures": ["LlamaForCausalLM"], "hidden_size": 288, "intermediate_size": 768, "num_attention_heads": 6, "num_hidden_layers": 6, "vocab_size": 32000}, "sekarmulyani/gpt2-ulasan-beauty-products-gen": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "akhooli/gpt2-small-arabic-poetry": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50257}, "mrm8488/spanish-t5-small-sqac-for-qa": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32103}, "flozi00/falcon-7b-german-assistant-v2": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65040}, "TheBloke/llama-2-13B-chat-limarp-v2-merged-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ticoAg/gpt2-tiger-sft-zh": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "thiagomf/Llama-2-7b-hf-sharded-bf16-1GB": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "THUMT/mGPT": {"architectures": ["GPT2LMHeadModel"], "vocab_size": 250100, "n_embd": 1024, "n_layer": 24, "n_head": 16, "n_inner": 4096}, "lmqg/flan-t5-base-squad-qg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-700bt": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Phind-CodeLlama-34B-Python-v1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "arogov/llama2_13b_chat_uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ai-forever/mGPT-1.3B-bulgarian": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2048, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 100000}, "davesoma/SageBeluga13": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "pssubitha/llama-2-7b-sales-force-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "PyaeSoneK/pythia_70m_legalQA": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "hidude562/OpenMusenet-2.1-L": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50259}, "abeiler/huggingface-goatLora-goatV9-testData-morePushes": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "abinayam/gpt-2-tamil": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "persiannlp/mt5-base-parsinlu-squad-reading-comprehension": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "pierreguillou/t5-base-qa-squad-v1.1-portuguese": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "lchaloupsky/czech-gpt2-oscar": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50257}, "OpenHust/viet-gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "tiansz/ChatYuan-7B-merge": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "voidful/llama-v2-unit-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 8195}, "taaredikahan23/Llama-2-7b-chat-finetune": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "deutsche-telekom/mt5-small-sum-de-en-v1": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250100}, "hetpandya/t5-small-tapaco": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "sunhao666/chi-sum2": {"architectures": ["T5Model"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 21228}, "smartik/mt5-small-finetuned-gec-0.2": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "PORTULAN/gervasio-ptbr-base": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 50304}, "clibrain/Llama-2-13b-ft-instruct-es-gptq-4bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "soketlabs/bhasha-7b-2k-hi": {"architectures": ["MPTForCausalLM"], "d_model": 4096, "vocab_size": 61772}, "codefuse-ai/CodeFuse-13B": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 100831}, "Sentdex/GPyT": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 52000}, "it5/it5-large-news-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32103}, "FredZhang7/distilgpt2-stable-diffusion": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 6, "vocab_size": 50257}, "Rostlab/ProstT5_fp16": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 150}, "approach0/mathy-vicuna-13B-FFT": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "lighteternal/gpt2-finetuned-greek": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "stanford-crfm/battlestar-gpt2-small-x49": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "stacked-summaries/flan-t5-small-stacked-samsum-1024": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "TigerResearch/tigerbot-7b-base-v1": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250680}, "Chang-Su/llama-2-13b-chat-ko": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 39478}, "Clakmann/t5-base-Clakmann-thesis-epoch10": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "yekaraoglann/results": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "bitadin/gpt-4-medium-titles-v2-flan-t5-base-llm-6": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "google/t5_11b_trueteacher_and_anli": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "TaylorAI/Flash-Llama-30M": {"architectures": ["LlamaForCausalLM"], "hidden_size": 384, "intermediate_size": 1024, "num_attention_heads": 12, "num_hidden_layers": 4, "vocab_size": 32000}, "flax-community/t5-base-wikisplit": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "razent/SciFive-large-Pubmed_PMC": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "inkoziev/rugpt_chitchat": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50258}, "lomahony/eleuther-pythia410m-hh-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 1024, "intermediate_size": 4096, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "TheBloke/Vicuna-13B-v1.3-German-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "emozilla/LLongMA-2-13b-storysummarizer": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "yongzx/pythia-1b-sft-hh": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 50304}, "TheBloke/airoboros-13b-gpt4-1.4-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "heegyu/llama-2-ko-7b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 46336}, "flozi00/Llama-2-7b-german-assistant-v3-4bit-autogptq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "zarakiquemparte/zararp-l2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Sao10K/Stheno-1.3-L2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TsinghuaAI/CPM-Generate": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2560, "n_head": 32, "n_inner": null, "n_layer": 32, "vocab_size": 30000}, "AlexWortega/instruct_rugptlarge": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50263}, "tatsu-lab/alpaca-7b-wdiff": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "microsoft/phi-1_5": {"architectures": ["MixFormerSequentialForCausalLM"], "n_embd": 2048, "n_head": 32, "n_inner": null, "n_layer": 24, "vocab_size": 51200}, "Deci/DeciLM-6b": {"architectures": ["DeciLMForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Deci/DeciLM-6b-instruct": {"architectures": ["DeciLMForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "baichuan-inc/Baichuan2-13B-Chat": {"architectures": ["BaichuanForCausalLM"], "hidden_size": 5120, "intermediate_size": 13696, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 125696}, "microsoft/phi-1": {"architectures": ["MixFormerSequentialForCausalLM"], "n_embd": 2048, "n_head": 32, "n_inner": null, "n_layer": 24, "vocab_size": 51200}, "Deci/DeciCoder-1b": {"architectures": ["DeciCoderForCausalLM"], "hidden_size": 2048, "intermediate_size": 5888, "num_attention_heads": 32, "num_hidden_layers": 20, "vocab_size": 49152}, "baichuan-inc/Baichuan2-7B-Chat": {"architectures": ["BaichuanForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 125696}, "CofeAI/FLM-101B": {"n_embd": 10240, "n_head": 80, "n_inner": 40960, "n_layer": 80, "vocab_size": 100352}, "jphme/phi-1_5_Wizard_Vicuna_uncensored": {"architectures": ["MixFormerSequentialForCausalLM"], "n_embd": 2048, "n_head": 32, "n_inner": null, "n_layer": 24, "vocab_size": 50304}, "ehartford/samantha-phi": {"architectures": ["MixFormerSequentialForCausalLM"], "n_embd": 2048, "n_head": 32, "n_inner": null, "n_layer": 24, "vocab_size": 50304}, "teknium/Phi-Hermes-1.3B": {"architectures": ["MixFormerSequentialForCausalLM"], "n_embd": 2048, "n_head": 32, "n_inner": null, "n_layer": 24, "vocab_size": 50304}, "baichuan-inc/Baichuan2-13B-Chat-4bits": {"architectures": ["BaichuanForCausalLM"], "hidden_size": 5120, "intermediate_size": 13696, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 125696}, "teknium/Puffin-Phi-v2": {"architectures": ["MixFormerSequentialForCausalLM"], "n_embd": 2048, "n_head": 32, "n_inner": null, "n_layer": 24, "vocab_size": 50304}, "AIDC-ai-business/Marcoroni-70B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "PY007/TinyLlama-1.1B-Chat-v0.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 2048, "intermediate_size": 5632, "num_attention_heads": 32, "num_hidden_layers": 22, "vocab_size": 32001}, "SkunkworksAI/PlatyPhi-1.5B": {"architectures": ["MixFormerSequentialForCausalLM"], "n_embd": 2048, "n_head": 32, "n_inner": null, "n_layer": 24, "vocab_size": 50304}, "gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50257}, "ibm/MoLM-700M-8B": {"architectures": ["ModuleFormerForCausalLM"], "n_embd": 1024, "n_head": 1, "n_layer": 48, "vocab_size": 50295}, "tangger/Qwen-7B-Chat": {"architectures": ["QWenLMHeadModel"], "hidden_size": 4096, "intermediate_size": 22016, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 151936}, "baichuan-inc/Baichuan2-7B-Chat-4bits": {"architectures": ["BaichuanForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 125696}, "lyogavin/Anima-7B-100K": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "baichuan-inc/Baichuan2-13B-Base": {"architectures": ["BaichuanForCausalLM"], "hidden_size": 5120, "intermediate_size": 13696, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 125696}, "baichuan-inc/Baichuan-7B": {"architectures": ["BaiChuanForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 64000}, "TheBloke/MLewd-L2-Chat-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}} \ No newline at end of file +{"NousResearch/Llama-2-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "google/flan-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "google/flan-t5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "t5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "PascalNotin/Tranception_Small": {"architectures": ["TranceptionLMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 12, "vocab_size": 25}, "bigscience/bloom-560m": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 24, "num_attention_heads": 16, "vocab_size": 250880}, "distilgpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 6, "vocab_size": 50257}, "hf-internal-testing/tiny-random-gpt2": {"intermediate_size": 37, "n_embd": 32, "n_head": 4, "n_inner": null, "n_layer": 5, "vocab_size": 1000}, "tiiuae/falcon-7b": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "t5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "bigscience/bloomz-1b1": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 24, "num_attention_heads": 16, "vocab_size": 250880}, "gpt2-medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_layer": 24, "vocab_size": 50257}, "mrm8488/t5-base-finetuned-common_gen": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "lmsys/fastchat-t5-3b-v1.0": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32110}, "gpt2-xl": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_layer": 48, "vocab_size": 50257}, "meta-llama/Llama-2-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "hf-internal-testing/tiny-random-t5": {"d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 1103}, "EleutherAI/pythia-6.9b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "databricks/dolly-v2-3b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50280}, "hf-internal-testing/tiny-random-GPTNeoXForCausalLM": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 32, "intermediate_size": 37, "num_attention_heads": 4, "num_hidden_layers": 5, "vocab_size": 1024}, "meta-llama/Llama-2-7b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "microsoft/DialoGPT-medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_layer": 24, "vocab_size": 50257}, "google/mt5-base": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "TheBloke/Wizard-Vicuna-7B-Uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "hf-internal-testing/tiny-random-BloomModel": {"architectures": ["BloomModel"], "hidden_size": 32, "n_head": 4, "n_layer": 5, "vocab_size": 1024}, "google/flan-t5-xxl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "lmsys/vicuna-7b-v1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "huggyllama/llama-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "mrm8488/t5-base-finetuned-summarize-news": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "google/flan-t5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "tiiuae/falcon-40b-instruct": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "ramsrigouthamg/t5_sentence_paraphraser": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "flexudy/t5-base-multi-sentence-doctor": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "lewtun/tiny-random-mt5": {"architectures": ["MT5Model"], "d_ff": 1024, "d_model": 16, "num_heads": 4, "num_layers": 2, "vocab_size": 250112}, "gpt2-large": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_layer": 36, "vocab_size": 50257}, "valhalla/t5-base-e2e-qg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32102}, "sshleifer/tiny-gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2, "n_head": 2, "n_layer": 2, "vocab_size": 50257}, "fxmarty/tiny-llama-fast-tokenizer": {"architectures": ["LlamaForCausalLM"], "hidden_size": 16, "intermediate_size": 64, "num_attention_heads": 4, "num_hidden_layers": 2, "vocab_size": 32000}, "decapoda-research/llama-7b-hf": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "stabilityai/StableBeluga2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "syzymon/long_llama_3b": {"architectures": ["LongLlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "NousResearch/Llama-2-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Llama-2-70B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "tiiuae/falcon-7b-instruct": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "google/flan-t5-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "meta-llama/Llama-2-13b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "petals-team/StableBeluga2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "meta-llama/Llama-2-70b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "EleutherAI/gpt-neox-20b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 64, "num_hidden_layers": 44, "vocab_size": 50432}, "hf-internal-testing/tiny-random-GPTBigCodeForCausalLM": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "nferruz/ProtGPT2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50257}, "philschmid/flan-t5-xxl-sharded-fp16": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "HuggingFaceM4/tiny-random-LlamaForCausalLM": {"architectures": ["LlamaForCausalLM"], "hidden_size": 16, "intermediate_size": 64, "num_attention_heads": 4, "num_hidden_layers": 2, "vocab_size": 32000}, "hf-internal-testing/tiny-random-BloomForCausalLM": {"architectures": ["BloomForCausalLM"], "hidden_size": 32, "n_head": 4, "n_layer": 5, "vocab_size": 1024}, "hf-internal-testing/tiny-random-GPT2LMHeadModel": {"architectures": ["GPT2LMHeadModel"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "Vamsi/T5_Paraphrase_Paws": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "lmsys/vicuna-7b-v1.3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "meta-llama/Llama-2-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ybelkada/tiny-random-T5ForConditionalGeneration-calibrated": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 32100}, "prithivida/parrot_paraphraser_on_T5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "hf-internal-testing/tiny-random-GPTBigCodeModel": {"architectures": ["GPTBigCodeModel"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "hkunlp/instructor-large": {"architectures": ["T5EncoderModel"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "lmsys/vicuna-7b-v1.5": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "fabiochiu/t5-small-medium-title-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "TheBloke/Llama-2-7b-Chat-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "skt/kogpt2-base-v2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 51200}, "google/t5-v1_1-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Maykeye/TinyLLama-v0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 64, "intermediate_size": 256, "num_attention_heads": 16, "num_hidden_layers": 8, "vocab_size": 32000}, "TheBloke/Llama-2-13B-chat-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "tiiuae/falcon-40b": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "sonoisa/t5-base-japanese-title-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Rostlab/prot_t5_xl_uniref50": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 128}, "TheBloke/vicuna-7B-v1.3-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "daryl149/llama-2-7b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "stabilityai/StableBeluga-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "meta-llama/Llama-2-70b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TheBloke/MythoMax-L2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "czurita/nsql-llama-2-7B-sharded-bf16-2GB": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "vennify/t5-base-grammar-correction": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "czearing/story-to-title": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "google/byt5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3840, "d_model": 1536, "num_heads": 16, "num_layers": 36, "vocab_size": 384}, "HuggingFaceH4/starchat-beta": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49156}, "codellama/CodeLlama-34b-Instruct-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "openlm-research/open_llama_13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "optimum/t5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "t5-3b": {"architectures": ["T5WithLMHeadModel"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "humarin/chatgpt_paraphraser_on_T5_base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Gustavosta/MagicPrompt-Stable-Diffusion": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "bigscience/bloomz-7b1": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 30, "num_attention_heads": 32, "vocab_size": 250880}, "trl-internal-testing/tiny-random-GPTNeoXForCausalLM": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 32, "intermediate_size": 37, "num_attention_heads": 4, "num_hidden_layers": 5, "vocab_size": 1024}, "NousResearch/Yarn-Llama-2-7b-64k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "khhuang/zerofec-qa2claim-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b-v3": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "khhuang/zerofec-daqa-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "declare-lab/flan-alpaca-gpt4-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "codellama/CodeLlama-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "lmsys/vicuna-13b-v1.3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Rostlab/prot_t5_xl_half_uniref50-enc": {"architectures": ["T5EncoderModel"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 128}, "google/mt5-small": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "Salesforce/safety-flan-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "patrickvonplaten/t5-tiny-random": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 256, "d_model": 64, "num_heads": 2, "num_layers": 2, "vocab_size": 32128}, "google/flan-ul2": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 16384, "d_model": 4096, "num_heads": 16, "num_layers": 32, "vocab_size": 32128}, "EleutherAI/pythia-70m": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "bigscience/mt0-large": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 250112}, "stevhliu/my_awesome_billsum_model": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "EleutherAI/pythia-70m-deduped": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "lmsys/vicuna-13b-v1.5": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "PAIXAI/Astrid-1B": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 50304}, "Phind/Phind-CodeLlama-34B-Python-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "pszemraj/flan-t5-large-grammar-synthesis": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "Voicelab/vlt5-base-keywords": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50048}, "togethercomputer/Llama-2-7B-32K-Instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "huggyllama/llama-65b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "ai-forever/ruGPT-3.5-13B": {"architectures": ["GPT2LMHeadModel"], "n_embd": 5120, "n_head": 40, "n_inner": null, "n_layer": 40, "vocab_size": 50272}, "Einmalumdiewelt/T5-Base_GNAD": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "google/t5-v1_1-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "Austism/chronos-hermes-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "upstage/SOLAR-0-70b-16bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "bigscience/bloom-7b1": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "nlpai-lab/kullm-polyglot-12.8b-v2": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 30080}, "codellama/CodeLlama-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "hf-internal-testing/tiny-random-GPT2Model": {"architectures": ["GPT2Model"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "Gryphe/MythoMax-L2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "openlm-research/open_llama_3b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/Llama-2-70B-chat-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "trl-internal-testing/dummy-GPT2-correct-vocab": {"architectures": ["GPT2LMHeadModel"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 50257}, "charsiu/g2p_multilingual_byT5_small_100": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3584, "d_model": 1472, "num_heads": 6, "num_layers": 12, "vocab_size": 384}, "EleutherAI/pythia-160m": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50304}, "ElnaggarLab/ankh-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 48, "vocab_size": 144}, "trl-internal-testing/tiny-random-GPT2LMHeadModel": {"architectures": ["GPT2LMHeadModel"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "openlm-research/open_llama_7b_v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/OpenAssistant-Llama2-13B-Orca-v2-8K-3166-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "codellama/CodeLlama-7b-Instruct-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "WizardLM/WizardCoder-Python-34B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32001}, "pszemraj/grammar-synthesis-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "TheBloke/llama-2-70b-Guanaco-QLoRA-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "openlm-research/open_llama_3b_v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "IDEA-CCNL/Wenzhong-GPT2-110M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 12, "vocab_size": 50304}, "microsoft/DialoGPT-small": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50257}, "trl-internal-testing/tiny-random-BloomForCausalLM": {"architectures": ["BloomForCausalLM"], "hidden_size": 32, "n_head": 4, "n_layer": 5, "vocab_size": 1024}, "trl-internal-testing/tiny-random-T5ForConditionalGeneration": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 1302}, "hf-internal-testing/tiny-random-onnx-mt5": {"architectures": ["MT5Model"], "d_ff": 1024, "d_model": 16, "num_heads": 4, "num_layers": 2, "vocab_size": 250112}, "NousResearch/Llama-2-7b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "trl-internal-testing/tiny-random-MT5ForConditionalGeneration": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 256, "d_model": 64, "num_heads": 4, "num_layers": 8, "vocab_size": 5100}, "fxmarty/tiny-testing-gpt2-remote-code": {"architectures": ["GPT2CustomLMHeadModel"], "intermediate_size": 37, "n_embd": 32, "n_head": 4, "n_inner": null, "n_layer": 5, "vocab_size": 1000}, "castorini/monot5-base-msmarco-10k": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "microsoft/DialoGPT-large": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_layer": 36, "vocab_size": 50257}, "bigscience/bloomz-560m": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 24, "num_attention_heads": 16, "vocab_size": 250880}, "Open-Orca/OpenOrca-Platypus2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "google/byt5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3584, "d_model": 1472, "num_heads": 6, "num_layers": 12, "vocab_size": 384}, "bigscience/bloom-1b7": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 24, "num_attention_heads": 16, "vocab_size": 250880}, "elinas/chronos-13b-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "google/t5-efficient-tiny": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 256, "num_heads": 4, "num_layers": 4, "vocab_size": 32128}, "bigscience/bloom-1b1": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 24, "num_attention_heads": 16, "vocab_size": 250880}, "EleutherAI/polyglot-ko-1.3b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 30080}, "bigscience/bloom-3b": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 30, "num_attention_heads": 32, "vocab_size": 250880}, "TinyPixel/Llama-2-7B-bf16-sharded": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "edumunozsala/llama-2-7b-int4-python-code-20k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "yahma/llama-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "beomi/KoAlpaca-Polyglot-12.8B": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 30003}, "stanfordnlp/backpack-gpt2": {"architectures": ["BackpackGPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50264}, "prithivida/grammar_error_correcter_v1": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "lvkaokao/llama2-7b-hf-chat-lora-v3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "google/t5-v1_1-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "TheBloke/gpt4-alpaca-lora_mlp-65B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "google/mt5-large": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 250112}, "EleutherAI/pythia-2.8b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50304}, "cyberagent/open-calm-7b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 52224}, "lvwerra/gpt2-imdb": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50257}, "WizardLM/WizardLM-13B-V1.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "KoboldAI/GPT-NeoX-20B-Erebus": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 64, "num_hidden_layers": 44, "vocab_size": 50432}, "togethercomputer/RedPajama-INCITE-Instruct-3B-v1": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "aditi2222/automatic_title_generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "shibing624/chinese-alpaca-plus-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 49954}, "optimum/gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "togethercomputer/LLaMA-2-7B-32K": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "amazon/FalconLite": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65040}, "EleutherAI/polyglot-ko-5.8b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 16, "num_hidden_layers": 28, "vocab_size": 30080}, "databricks/dolly-v2-7b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50280}, "snrspeaks/t5-one-line-summary": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "lmsys/vicuna-33b-v1.3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/OpenOrca-Platypus2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "TheBloke/Llama-2-13B-Chat-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "sdadas/mt5-base-translator-pl-en": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250100}, "TheBloke/Llama-2-7b-chat-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "bigcode/gpt_bigcode-santacoder": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 49280}, "h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b-v2": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "TheBloke/airoboros-l2-70B-GPT4-2.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "lmsys/vicuna-13b-v1.5-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "bigcode/santacoder": {"architectures": ["GPT2LMHeadCustomModel"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 49280}, "togethercomputer/RedPajama-INCITE-Chat-3B-v1": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "ai-forever/mGPT": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2048, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 100000}, "openlm-research/open_llama_7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "decapoda-research/llama-13b-hf": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "OpenAssistant/codellama-13b-oasst-sft-v10": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "rinna/bilingual-gpt-neox-4b-instruction-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2816, "intermediate_size": 11264, "num_attention_heads": 22, "num_hidden_layers": 36, "vocab_size": 65536}, "KoboldAI/LLaMA2-13B-Holomax-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "MBZUAI/LaMini-T5-61M": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "google/t5-v1_1-xxl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "EleutherAI/pythia-1.4b-deduped": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "stabilityai/StableBeluga-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "tiiuae/falcon-rw-1b": {"architectures": ["FalconForCausalLM"], "hidden_size": 2048, "num_attention_heads": 32, "num_hidden_layers": 24, "vocab_size": 50304}, "ClueAI/ChatYuan-large-v2": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "af1tang/personaGPT": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50263}, "google/t5-large-lm-adapt": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "vilsonrodrigues/falcon-7b-instruct-sharded": {"architectures": ["FalconForCausalLM"], "hidden_size": 4544, "num_attention_heads": 71, "num_hidden_layers": 32, "vocab_size": 65024}, "petals-team/falcon-rw-1b": {"architectures": ["FalconForCausalLM"], "hidden_size": 2048, "num_attention_heads": 32, "num_hidden_layers": 24, "vocab_size": 50304}, "bigscience/T0_3B": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "TheTravellingEngineer/llama2-7b-hf-guanaco": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Salesforce/codet5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "EleutherAI/pythia-2.8b-deduped": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50304}, "The-Face-Of-Goonery/Huginn-13b-v1.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "FredZhang7/distilgpt2-stable-diffusion-v2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 6, "vocab_size": 50257}, "WizardLM/WizardCoder-15B-V1.0": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49153}, "EleutherAI/pythia-410m-deduped": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 1024, "intermediate_size": 4096, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "huggyllama/llama-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ybelkada/falcon-7b-sharded-bf16": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "MingZhong/unieval-sum": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "NousResearch/Nous-Hermes-Llama2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "csebuetnlp/mT5_multilingual_XLSum": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "hkunlp/instructor-xl": {"architectures": ["T5EncoderModel"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "h2oai/h2ogpt-4096-llama2-13b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "HuggingFaceH4/tiny-random-LlamaForCausalLM": {"architectures": ["LlamaForCausalLM"], "hidden_size": 16, "intermediate_size": 64, "num_attention_heads": 4, "num_hidden_layers": 2, "vocab_size": 32000}, "EleutherAI/polyglot-ko-12.8b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 30080}, "databricks/dolly-v2-12b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50280}, "mrm8488/t5-base-finetuned-span-sentiment-extraction": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "WizardLM/WizardLM-70B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32001}, "codellama/CodeLlama-34b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "Salesforce/codet5-base-multi-sum": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "MBZUAI/LaMini-T5-738M": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "codellama/CodeLlama-13b-Instruct-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "h2oai/h2ogpt-4096-llama2-7b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50288}, "bigscience/bloom": {"architectures": ["BloomForCausalLM"], "n_layer": 70, "num_attention_heads": 112, "vocab_size": 250880}, "TigerResearch/tigerbot-13b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 60928}, "TheBloke/airoboros-l2-70B-gpt4-1.4.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Austism/chronos-hermes-13b-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "snrspeaks/KeyPhraseTransformer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Rocketknight1/tiny-random-falcon-7b": {"architectures": ["FalconForCausalLM"], "hidden_size": 1136, "num_attention_heads": 71, "num_hidden_layers": 2, "vocab_size": 65024}, "hf-internal-testing/tiny-random-T5Model": {"architectures": ["T5Model"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 32100}, "sambanovasystems/BLOOMChat-176B-v1": {"architectures": ["BloomForCausalLM"], "hidden_size": 14336, "n_head": 112, "n_layer": 70, "vocab_size": 250880}, "huggyllama/llama-30b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "elyza/ELYZA-japanese-Llama-2-7b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "lcw99/t5-base-korean-text-summary": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50358}, "it5/it5-base-news-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32103}, "uer/gpt2-chinese-cluecorpussmall": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 21128}, "t5-11b": {"architectures": ["T5WithLMHeadModel"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "KoboldAI/LLaMA2-13B-Holomax": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Llama-2-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "bigscience/bloomz-3b": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 30, "num_attention_heads": 32, "vocab_size": 250880}, "lmsys/vicuna-7b-v1.5-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "sonoisa/t5-base-japanese": {"architectures": ["T5Model"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "line-corporation/japanese-large-lm-3.6b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 3072, "intermediate_size": 12288, "num_attention_heads": 32, "num_hidden_layers": 30, "vocab_size": 51200}, "TheBloke/Llama-2-7B-32K-Instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "EleutherAI/pythia-410m": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 1024, "intermediate_size": 4096, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "NousResearch/Llama-2-70b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TheBloke/falcon-7b-instruct-GPTQ": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "eachadea/vicuna-13b-1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "beomi/llama-2-ko-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 46336}, "TheBloke/falcon-40b-instruct-GPTQ": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "TheBloke/Llama-2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "garage-bAInd/Platypus2-70B-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "rajkumarrrk/gpt2-fine-tuned-on-imdb-positive-reviews": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "cerebras/Cerebras-GPT-13B": {"architectures": ["GPT2Model"], "n_embd": 5120, "n_head": 40, "n_inner": 20480, "n_layer": 40, "vocab_size": 50257}, "rinna/japanese-gpt2-medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": 4096, "n_layer": 24, "vocab_size": 32000}, "bigscience/T0pp": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "Phind/Phind-CodeLlama-34B-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "beomi/kykim-gpt3-kor-small_based_on_gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 42000}, "Pi3141/DialoGPT-medium-elon-3": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "EleutherAI/pythia-1b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 50304}, "ai-forever/rugpt3large_based_on_gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "jondurbin/airoboros-l2-13b-gpt4-m2.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "codellama/CodeLlama-13b-Python-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "AUTOMATIC/promptgen-lexart": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 6, "vocab_size": 50257}, "Salesforce/codet5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "h2oai/h2ogpt-oig-oasst1-512-6_9b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "rinna/japanese-gpt-neox-3.6b-instruction-ppo": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2816, "intermediate_size": 11264, "num_attention_heads": 22, "num_hidden_layers": 36, "vocab_size": 32000}, "prithivida/informal_to_formal_styletransfer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "TheBloke/Wizard-Vicuna-13B-Uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "matsuo-lab/weblab-10b-instruction-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4864, "intermediate_size": 19456, "num_attention_heads": 38, "num_hidden_layers": 36, "vocab_size": 50277}, "succinctly/text2image-prompt-generator": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TheBloke/Llama-2-7B-Chat-GGML": {}, "TheBloke/Llama-2-70B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "sentence-transformers/gtr-t5-large": {"architectures": ["T5EncoderModel"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "h2oai/h2ogpt-gm-oasst1-en-2048-falcon-40b-v2": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "togethercomputer/RedPajama-INCITE-Base-3B-v1": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "rinna/bilingual-gpt-neox-4b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2816, "intermediate_size": 11264, "num_attention_heads": 22, "num_hidden_layers": 36, "vocab_size": 65536}, "TheBloke/Llama-2-13B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "pankajmathur/orca_mini_v3_70b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "OpenAssistant/llama2-13b-orca-8k-3319": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "TheBloke/StableBeluga-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "defog/sqlcoder": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49152}, "WizardLM/WizardCoder-Python-13B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "stabilityai/stablelm-tuned-alpha-3b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 16, "vocab_size": 50688}, "cyberagent/open-calm-small": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 52096}, "TheBloke/Wizard-Vicuna-30B-Uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/WizardLM-70B-V1.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32001}, "bigscience/bigscience-small-testing": {"architectures": ["BloomModel"], "hidden_size": 64, "n_head": 8, "n_inner": null, "n_layer": 2, "vocab_size": 250880}, "cyberagent/open-calm-1b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 52096}, "lamini/lamini_docs_finetuned": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "EnglishVoice/t5-base-uk-to-us-english": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "codellama/CodeLlama-7b-Python-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/WizardLM-13B-V1.2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "EleutherAI/pythia-160m-deduped": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50304}, "jphme/Llama-2-13b-chat-german": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "EleutherAI/pythia-12b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50688}, "TheBloke/WizardLM-1.0-Uncensored-Llama2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Salesforce/codet5p-220m": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "google/mt5-xl": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 250112}, "cerebras/Cerebras-GPT-111M": {"n_inner": 3072, "n_embd": 768, "n_head": 12, "n_layer": 10, "vocab_size": 50257}, "google/t5-v1_1-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TheBloke/vicuna-7B-v1.5-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "chavinlo/alpaca-native": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "kimnt93/kmv-7b-03": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "NumbersStation/nsql-llama-2-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "cerebras/Cerebras-GPT-1.3B": {"n_inner": 8192, "n_embd": 2048, "n_head": 16, "n_layer": 24, "vocab_size": 50257}, "trl-internal-testing/tiny-T5ForConditionalGeneration-correct-vocab": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 32128}, "akreal/tiny-random-LlamaForCausalLM": {"architectures": ["LlamaForCausalLM"], "hidden_size": 16, "intermediate_size": 64, "num_attention_heads": 4, "num_hidden_layers": 2, "vocab_size": 32000}, "akreal/tiny-random-BloomForCausalLM": {"architectures": ["BloomForCausalLM"], "hidden_size": 32, "n_head": 4, "n_layer": 5, "vocab_size": 1024}, "NousResearch/Nous-Hermes-llama-2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ai-forever/rugpt3small_based_on_gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50264}, "VMware/open-llama-7b-v2-open-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "robertmyers/targon-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "TheBloke/Nous-Hermes-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "TheBloke/WizardLM-33B-V1.0-Uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/WizardLM-7B-uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "ramsrigouthamg/t5_paraphraser": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "tinkoff-ai/ruDialoGPT-medium": {"n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50261}, "OpenAssistant/falcon-7b-sft-mix-2000": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65040}, "bigcode/tiny_starcoder_py": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 20, "vocab_size": 49152}, "rinna/japanese-gpt-1b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 44928}, "TheBloke/orca_mini_v3_70B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "UBC-NLP/turjuman": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 110080}, "h2oai/h2ogpt-4096-llama2-70b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Phind/Phind-CodeLlama-34B-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "elyza/ELYZA-japanese-Llama-2-7b-fast-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 45043}, "iarfmoose/t5-base-question-generator": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32102}, "TheBloke/Llama-2-7B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "mrm8488/t5-base-finetuned-emotion": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "hkunlp/instructor-base": {"architectures": ["T5EncoderModel"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "fxmarty/onnx-tiny-random-gpt2-without-merge": {"architectures": ["GPT2Model"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "fxmarty/onnx-tiny-random-gpt2-with-merge": {"architectures": ["GPT2Model"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "microsoft/GODEL-v1_1-large-seq2seq": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32102}, "rinna/japanese-gpt-neox-3.6b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2816, "intermediate_size": 11264, "num_attention_heads": 22, "num_hidden_layers": 36, "vocab_size": 32000}, "cyberagent/open-calm-3b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 52224}, "eachadea/vicuna-7b-1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "beomi/KoAlpaca-Polyglot-5.8B": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 16, "num_hidden_layers": 28, "vocab_size": 30080}, "grammarly/coedit-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "TheBloke/Platypus2-70B-Instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "codellama/CodeLlama-34b-Python-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "noamwies/llama-test-gqa-with-better-transformer": {"architectures": ["LlamaForCausalLM"], "hidden_size": 128, "intermediate_size": 344, "num_attention_heads": 8, "num_hidden_layers": 2, "vocab_size": 2000}, "bigscience/bloomz-7b1-mt": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 30, "num_attention_heads": 32, "vocab_size": 250880}, "Salesforce/codet5p-770m": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "OpenAssistant/pythia-12b-sft-v8-7k-steps": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50288}, "augtoma/qCammel-70-x": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "NousResearch/Llama-2-13b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "plguillou/t5-base-fr-sum-cnndm": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32102}, "WeOpenML/PandaLM-7B-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "VMware/open-llama-7b-open-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "pankajmathur/orca_mini_v3_7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "google/t5-xl-lm-adapt": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "LinkSoul/Chinese-Llama-2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "line-corporation/japanese-large-lm-3.6b-instruction-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 3072, "intermediate_size": 12288, "num_attention_heads": 32, "num_hidden_layers": 30, "vocab_size": 51200}, "OpenAssistant/oasst-sft-1-pythia-12b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50288}, "ehartford/WizardLM-7B-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "upstage/llama-30b-instruct-2048": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "cyberagent/open-calm-large": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 1536, "intermediate_size": 6144, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 52096}, "Gryphe/MythoLogic-L2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "eenzeenee/t5-small-korean-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 50358}, "google/t5-xxl-lm-adapt": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "mywateriswet/ShuanBot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "hf-internal-testing/tiny-random-bloom": {"architectures": ["BloomModel"], "hidden_size": 64, "n_head": 8, "n_inner": null, "n_layer": 2, "vocab_size": 250880}, "TheBloke/Llama-2-13B-chat-GGML": {}, "decapoda-research/llama-30b-hf": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "lmsys/longchat-7b-v1.5-32k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ziqingyang/chinese-alpaca-2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 55296}, "nlpai-lab/kullm-polyglot-5.8b-v2": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 16, "num_hidden_layers": 28, "vocab_size": 30080}, "google/byt5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3968, "d_model": 1536, "num_heads": 12, "num_layers": 18, "vocab_size": 384}, "stabilityai/stablelm-tuned-alpha-7b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 48, "num_hidden_layers": 16, "vocab_size": 50432}, "PygmalionAI/pygmalion-1.3b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "stanford-crfm/BioMedLM": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2560, "n_head": 20, "n_inner": null, "n_layer": 32, "vocab_size": 28896}, "PY007/TinyLlama-1.1B-step-50K-105b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 2048, "intermediate_size": 5632, "num_attention_heads": 32, "num_hidden_layers": 22, "vocab_size": 32000}, "georgesung/llama2_7b_chat_uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "bigscience/mt0-small": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "TheBloke/WizardCoder-15B-1.0-GPTQ": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49153}, "google/t5-base-lm-adapt": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "OpenAssistant/falcon-40b-sft-top1-560": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65040}, "TheBloke/WizardLM-30B-Uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "TheBloke/WizardCoder-Python-34B-V1.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32001}, "garage-bAInd/Camel-Platypus2-70B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "DeepFloyd/t5-v1_1-xxl": {"architectures": ["T5EncoderModel"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "EleutherAI/pythia-1b-deduped": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 50304}, "TheBloke/CodeLlama-7B-Instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "kfkas/Llama-2-ko-7b-Chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 46336}, "valhalla/t5-small-qa-qg-hl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32102}, "FlagAlpha/Llama2-Chinese-13b-Chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Open-Orca/OpenOrcaxOpenChat-Preview2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "trl-internal-testing/tiny-random-LlamaForCausalLM": {"architectures": ["LlamaForCausalLM"], "hidden_size": 16, "intermediate_size": 64, "num_attention_heads": 4, "num_hidden_layers": 2, "vocab_size": 32000}, "abhishek/llama-2-7b-hf-small-shards": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "togethercomputer/RedPajama-INCITE-7B-Base": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "Salesforce/codegen25-7b-multi": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 51200}, "fabiochiu/t5-base-tag-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "MBZUAI/LaMini-Flan-T5-248M": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "bigscience/bloomz-1b7": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 24, "num_attention_heads": 16, "vocab_size": 250880}, "valhalla/t5-base-qg-hl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "Wi/gptp": {"architectures": ["GPTPModel"], "n_embd": 128, "n_head": 4, "n_inner": null, "n_layer": 4, "vocab_size": 1000}, "medalpaca/medalpaca-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "yentinglin/Taiwan-LLaMa-v1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "rinna/japanese-gpt-neox-small": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 44416}, "TheBloke/llama2_7b_chat_uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "EleutherAI/pythia-1.4b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "daryl149/llama-2-13b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "flax-community/gpt-2-spanish": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "KoboldAI/LLAMA2-13B-Holodeck-1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-13b-gpt4-1.4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "mrm8488/t5-base-finetuned-question-generation-ap": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "OpenBuddy/openbuddy-llama2-13b-v8.1-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 37632}, "EleutherAI/pythia-6.9b-deduped": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "tscholak/3vnuv1vf": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32102}, "OpenAssistant/llama2-70b-oasst-sft-v10": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32007}, "TheBloke/vicuna-13B-v1.5-16K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "OpenAssistant/falcon-7b-sft-top1-696": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65040}, "sentence-transformers/sentence-t5-large": {"architectures": ["T5EncoderModel"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TheBloke/Nous-Hermes-Llama2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "mesolitica/finetune-translation-t5-super-super-tiny-standard-bahasa-cased": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 512, "d_model": 128, "num_heads": 6, "num_layers": 2, "vocab_size": 32100}, "Henk717/spring-dragon": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "openchat/openchat_v3.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "WizardLM/WizardMath-70B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32002}, "potsawee/t5-large-generation-squad-QuestionAnswer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TheBloke/Phind-CodeLlama-34B-v2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "pankajmathur/orca_mini_3b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "fffrrt/ruGPT-3.5-13B-GPTQ": {"architectures": ["GPT2LMHeadModel"], "n_embd": 5120, "n_head": 40, "n_inner": null, "n_layer": 40, "vocab_size": 50272}, "kykim/gpt3-kor-small_based_on_gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 42000}, "PAIXAI/Astrid-1B-CPU": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 50304}, "ElnaggarLab/ankh-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3840, "d_model": 1536, "num_heads": 16, "num_layers": 48, "vocab_size": 144}, "togethercomputer/RedPajama-INCITE-7B-Chat": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "ramsrigouthamg/t5_squad_v1": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "KETI-AIR/ke-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 64128}, "sentence-transformers/gtr-t5-base": {"architectures": ["T5EncoderModel"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "ramsrigouthamg/t5-large-paraphraser-diverse-high-quality": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "rinna/japanese-gpt2-small": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 12, "vocab_size": 32000}, "rinna/bilingual-gpt-neox-4b-instruction-ppo": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2816, "intermediate_size": 11264, "num_attention_heads": 22, "num_hidden_layers": 36, "vocab_size": 65536}, "ramsrigouthamg/t5_boolean_questions": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "philschmid/flan-t5-base-samsum": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "google/t5-small-lm-adapt": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "matsuo-lab/weblab-10b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4864, "intermediate_size": 19456, "num_attention_heads": 38, "num_hidden_layers": 36, "vocab_size": 50304}, "stabilityai/stablecode-completion-alpha-3b-4k": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 49152}, "IDEA-CCNL/Ziya-LLaMA-7B-Reward": {"architectures": ["LlamaRewardModel"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "ichitaka/falcon-40b-instruct-8bit": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "TheBloke/WizardCoder-Python-13B-V1.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "togethercomputer/Pythia-Chat-Base-7B": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "TheBloke/wizardLM-7B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "AUTOMATIC/promptgen-majinai-unsafe": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 6, "vocab_size": 50257}, "pinkmanlove/llama-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "lmsys/longchat-13b-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "togethercomputer/RedPajama-INCITE-7B-Instruct": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "lmsys/vicuna-13b-v1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Salesforce/codet5-large": {"architectures": ["T5WithLMHeadModel"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "FredZhang7/anime-anything-promptgen-v2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 6, "vocab_size": 50257}, "Salesforce/xgen-7b-8k-inst": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 51200}, "jojo0217/step3_mk7": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 30008}, "EleutherAI/pythia-14m": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 128, "intermediate_size": 512, "num_attention_heads": 4, "num_hidden_layers": 6, "vocab_size": 50304}, "cerebras/Cerebras-GPT-590M": {"n_inner": 6144, "n_embd": 1536, "n_head": 12, "n_layer": 18, "vocab_size": 50257}, "dbmdz/german-gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50265}, "KoboldAI/GPT-NeoX-20B-Skein": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 64, "num_hidden_layers": 44, "vocab_size": 50432}, "beomi/polyglot-ko-12.8b-safetensors": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 30080}, "sentence-transformers/sentence-t5-base": {"architectures": ["T5EncoderModel"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "decapoda-research/llama-65b-hf": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "mesolitica/finetune-translation-t5-small-standard-bahasa-cased": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "marcsun13/bloom-1b7_with_lm_head": {"architectures": ["BloomForCausalLM"], "hidden_size": 2048, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "MBZUAI/LaMini-Flan-T5-783M": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "medalpaca/medalpaca-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "JulesBelveze/t5-small-headline-generator": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "Michau/t5-base-en-generate-headline": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "TheBloke/Falcon-180B-Chat-GPTQ": {"architectures": ["FalconForCausalLM"], "hidden_size": 14848, "num_attention_heads": 232, "num_hidden_layers": 80, "vocab_size": 65024}, "Salesforce/xgen-7b-8k-base": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 51200}, "ai-forever/ruT5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "KRAFTON/KORani-v3-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "bigscience/mt0-xxl-mt": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 250112}, "garage-bAInd/Stable-Platypus2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Wizard-Vicuna-13B-Uncensored-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "h2oai/h2ogpt-oasst1-512-12b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50688}, "Parth/result": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "declare-lab/flan-alpaca-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "sdadas/mt5-base-translator-en-pl": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250100}, "ziqingyang/chinese-llama-2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 55296}, "NousResearch/Nous-Hermes-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "pragmatic-programs/listener-suffix-idx-300k": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3584, "d_model": 1472, "num_heads": 6, "num_layers": 12, "vocab_size": 384}, "jinaai/jina-embedding-l-en-v1": {"architectures": ["T5EncoderModel"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "stabilityai/stablelm-base-alpha-3b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 16, "vocab_size": 50688}, "razent/SciFive-base-Pubmed_PMC": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "uer/gpt2-chinese-poem": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 22557}, "openchat/openchat_v3.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "IDEA-CCNL/Ziya-LLaMA-13B-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 39424}, "Sao10K/Mythical-Destroyer-V2-L2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "juierror/text-to-sql-with-table-schema": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "MingZhong/unieval-fact": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "TheBloke/vicuna-13B-v1.5-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "cerebras/Cerebras-GPT-256M": {"n_inner": 4352, "n_embd": 1088, "n_head": 17, "n_layer": 14, "vocab_size": 50257}, "declare-lab/flan-alpaca-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "ehartford/WizardLM-1.0-Uncensored-Llama2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "aubmindlab/aragpt2-base": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 64000}, "valhalla/t5-small-e2e-qg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32102}, "elinas/llama-7b-hf-transformers-4.29": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "lmsys/vicuna-13b-delta-v1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "garage-bAInd/Platypus2-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "PKU-Alignment/beaver-7b-v1.0-cost": {"architectures": ["LlamaModelForScore"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "allenai/unifiedqa-t5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "JackFram/llama-160m": {"architectures": ["LlamaForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 32000}, "daryl149/llama-2-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "akreal/tiny-random-t5": {"d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 99}, "cyberagent/open-calm-medium": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 1024, "intermediate_size": 4096, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 52096}, "The-Face-Of-Goonery/Huginn-13b-FP16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "facebook/tart-full-flan-t5-xl": {"architectures": ["EncT5ForSequenceClassification"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "csebuetnlp/banglat5_banglaparaphrase": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "FlagAlpha/Llama2-Chinese-7b-Chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "jerryjalapeno/Llama-2-1b-0-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 32000}, "NousResearch/Redmond-Puffin-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "bigscience/bloomz": {"architectures": ["BloomForCausalLM"], "n_layer": 70, "num_attention_heads": 112, "vocab_size": 250880}, "allenai/unifiedqa-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "WizardLM/WizardMath-7B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "pragmatic-programs/speaker-prefix-idx-300k": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3584, "d_model": 1472, "num_heads": 6, "num_layers": 12, "vocab_size": 384}, "TheBloke/CodeLlama-13B-Instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "TheBloke/Upstage-Llama-2-70B-instruct-v2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "pinkmanlove/llama-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "VietAI/envit5-translation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50048}, "cerebras/Cerebras-GPT-2.7B": {"n_inner": 10240, "n_embd": 2560, "n_head": 32, "n_layer": 32, "vocab_size": 50257}, "Open-Orca/LlongOrca-7B-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32003}, "hf-internal-testing/tiny-random-T5ForConditionalGeneration": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 32100}, "juierror/flan-t5-text2sql-with-schema-v2": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "BeIR/query-gen-msmarco-t5-base-v1": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "conceptofmind/LLongMA-2-13b-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "NousResearch/Yarn-Llama-2-13b-128k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "wangrongsheng/MiniGPT-4-LLaMA": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "hf-internal-testing/tiny-random-GPT2ForSequenceClassification": {"architectures": ["GPT2ForSequenceClassification"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "zenham/wail_m_e4_16h_2k": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "h2oai/h2ogpt-4096-llama2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ai-forever/FRED-T5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 50364}, "FreedomIntelligence/phoenix-inst-chat-7b": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "castorini/monot5-base-msmarco": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "minlik/chinese-alpaca-plus-7b-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 49954}, "joaogante/tiny-random-gpt2-with-generation-config": {"intermediate_size": 37, "n_embd": 32, "n_head": 4, "n_inner": null, "n_layer": 5, "vocab_size": 1000}, "neulab/gpt2-finetuned-wikitext103": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "jarradh/llama2_70b_chat_uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TigerResearch/tigerbot-13b-base": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 60928}, "rinna/japanese-gpt-neox-3.6b-instruction-sft-v2": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2816, "intermediate_size": 11264, "num_attention_heads": 22, "num_hidden_layers": 36, "vocab_size": 32000}, "bofenghuang/vigogne-2-7b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/stable-vicuna-13B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "aiplanet/effi-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-33b-gpt4-m2.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/orca_mini_v3_13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "HuggingFaceH4/starchat-alpha": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49156}, "WizardLM/WizardMath-13B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "upstage/Llama-2-70b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "anushehchaudry/llama-2-tiny-random": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8, "intermediate_size": 32, "num_attention_heads": 2, "num_hidden_layers": 1, "vocab_size": 32000}, "fangloveskari/ORCA_LLaMA_70B_QLoRA": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "HyperbeeAI/Tulpar-7b-v0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Llama-2-70B-Chat-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "csebuetnlp/mT5_m2m_crossSum_enhanced": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "TheBloke/Genz-70b-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "cerebras/Cerebras-GPT-6.7B": {"n_embd": 4096, "vocab_size": 50257, "n_layer": 32, "n_head": 32, "n_inner": 16384}, "ziqingyang/chinese-alpaca-2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 55296}, "google/t5-small-ssm-nq": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "EleutherAI/polyglot-ko-3.8b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 3072, "intermediate_size": 12288, "num_attention_heads": 24, "num_hidden_layers": 32, "vocab_size": 30080}, "kashif/stack-llama-2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "line-corporation/japanese-large-lm-1.7b": {"vocab_size": 51200, "n_embd": 2304, "n_layer": 24, "n_head": 24, "n_inner": 9216, "architectures": ["GPT2LMHeadModel"]}, "microsoft/codereviewer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32216}, "TheBloke/guanaco-7B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "circulus/Llama-2-7b-orca-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "FlagAlpha/Atom-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 65000}, "Tap-M/Luna-AI-Llama2-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "K024/mt5-zh-ja-en-trimmed": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 85292}, "deep-learning-analytics/automatic-title-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "luodian/llama-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "stabilityai/stablelm-base-alpha-7b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 48, "num_hidden_layers": 16, "vocab_size": 50432}, "OpenLemur/lemur-70b-chat-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32005}, "rahular/varta-t5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 128128}, "rinna/japanese-gpt-neox-3.6b-instruction-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2816, "intermediate_size": 11264, "num_attention_heads": 22, "num_hidden_layers": 36, "vocab_size": 32000}, "garage-bAInd/Platypus-30B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "WizardLM/WizardCoder-Python-7B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "chavinlo/gpt4-x-alpaca": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "sentence-transformers/gtr-t5-xl": {"architectures": ["T5EncoderModel"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "wangrongsheng/MiniGPT-4-LLaMA-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "EleutherAI/pythia-12b-deduped": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50688}, "unicamp-dl/translation-pt-en-t5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "bigscience/mt0-base": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "Pirr/pythia-13b-deduped-green_devil": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50277}, "trl-internal-testing/tiny-random-GPT2Model": {"architectures": ["GPT2Model"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "MBZUAI/LaMini-GPT-1.5B": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_inner": null, "n_layer": 48, "vocab_size": 50258}, "Universal-NER/UniNER-7B-all": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/koala-13B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Rostlab/prot_t5_xl_bfd": {"architectures": ["T5WithLMHeadModel"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 128}, "Voicelab/trurl-2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "explosion-testing/llama2-kv-sharing": {"architectures": ["LlamaForCausalLM"], "hidden_size": 256, "intermediate_size": 512, "num_attention_heads": 4, "num_hidden_layers": 5, "vocab_size": 1024}, "inpars/monot5-3b-inpars-v2-nq-promptagator": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "upstage/llama-65b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "microsoft/CodeGPT-small-py": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50001}, "VietAI/vit5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 36096}, "TheBloke/CodeUp-Llama-2-13B-Chat-HF-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/CodeLlama-34B-Instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "elyza/ELYZA-japanese-Llama-2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "jondurbin/airoboros-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/CodeLlama-7B-Python-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "FlagAlpha/Llama2-Chinese-13b-Chat-4bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/CodeLlama-13B-Python-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Enoch/llama-65b-hf": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "chargoddard/platypus-2-22b-relora": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "togethercomputer/GPT-NeoXT-Chat-Base-20B": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 64, "num_hidden_layers": 44, "vocab_size": 50432}, "porkorbeef/Llama-2-13b-sf": {"architectures": ["LlamaModel"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "ehartford/Wizard-Vicuna-13B-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "doas/test5": {"architectures": ["LlamaModel"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "klosax/open_llama_3b_350bt_preview": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "Writer/camel-5b-hf": {"architectures": ["GPT2LMHeadModel"], "n_embd": 4096, "n_head": 32, "n_inner": 16384, "n_layer": 24, "vocab_size": 50258}, "Filosofas/DialoGPT-medium-PALPATINE2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "nomic-ai/gpt4all-falcon": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "reciprocate/llama2-7b-gsm8k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/CodeLlama-13B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "pankajmathur/orca_mini_v3_13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "projecte-aina/aguila-7b": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 50257}, "TheBloke/WizardLM-13B-V1.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "MBZUAI/LaMini-GPT-124M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50258}, "google/mt5-xxl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 250112}, "MaRiOrOsSi/t5-base-finetuned-question-answering": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "satvikag/chatbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "LMFlow/Robin-7b-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "daryl149/llama-2-70b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "acrastt/Puma-3B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/orca_mini_v3_7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "taeminlee/kogpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50000}, "NousResearch/Llama-2-70b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "rinna/japanese-gpt2-xsmall": {"architectures": ["GPT2LMHeadModel"], "n_embd": 512, "n_head": 8, "n_inner": 2304, "n_layer": 6, "vocab_size": 32000}, "ziqingyang/chinese-llama-2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 55296}, "hf-internal-testing/tiny-random-t5-v1.1": {"d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 1103}, "pankajmathur/Lima_Unchained_70b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "chargoddard/llama2-22b-blocktriangular": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/CodeLlama-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "BeIR/query-gen-msmarco-t5-large-v1": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TheBloke/CodeLlama-13B-Instruct-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "acrastt/Marx-3B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "PygmalionAI/pygmalion-2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "shibing624/chinese-alpaca-plus-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 49954}, "TheBloke/OpenOrcaxOpenChat-Preview2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "syzymon/long_llama_3b_instruct": {"architectures": ["LongLlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "bofenghuang/vigogne-2-7b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Gustavosta/MagicPrompt-Dalle": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "muchad/idt5-qa-qg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 30002}, "TheBloke/vicuna-13b-v1.3.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TigerResearch/tigerbot-13b-base-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 60928}, "ehartford/WizardLM-13B-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "clibrain/Llama-2-7b-ft-instruct-es": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "google/t5_xxl_true_nli_mixture": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "unikei/t5-base-split-and-rephrase": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "microsoft/Promptist": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50257}, "stas/mt5-tiny-random": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 256, "d_model": 64, "num_heads": 4, "num_layers": 8, "vocab_size": 5100}, "AIDC-ai-business/Luban-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "microsoft/GODEL-v1_1-base-seq2seq": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32102}, "CalderaAI/30B-Lazarus": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "acrastt/Marx-3B-V2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "h2oai/h2ogpt-4096-llama2-70b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "ajibawa-2023/scarlett-33b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "jondurbin/airoboros-l2-70b-2.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "rubentito/vt5-base-spdocvqa": {"architectures": ["HF_VT5"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "aisquared/dlite-v2-774m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50260}, "elyza/ELYZA-japanese-Llama-2-7b-fast": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 45043}, "quantumaikr/llama-2-70b-fb16-korean": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TheBloke/CodeLlama-34B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "microsoft/DialogRPT-updown": {"architectures": ["GPT2ForSequenceClassification"], "n_embd": 1024, "n_head": 16, "n_layer": 24, "vocab_size": 50257}, "TheBloke/CodeLlama-34B-Instruct-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "garage-bAInd/Platypus2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "trl-internal-testing/tiny-BloomForCausalLM-correct-vocab": {"architectures": ["BloomForCausalLM"], "hidden_size": 32, "n_head": 4, "n_layer": 5, "vocab_size": 250880}, "TheBloke/Llama-2-7B-GGML": {}, "TheBloke/Wizard-Vicuna-7B-Uncensored-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "wenge-research/yayi-7b-llama2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32005}, "coffeeee/nsfw-story-generator2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "jondurbin/airoboros-33b-gpt4-2.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "totally-not-an-llm/EverythingLM-13b-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "datificate/gpt2-small-spanish": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "mrm8488/t5-base-finetuned-wikiSQL": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "bofenghuang/vigogne-2-13b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "OpenAssistant/stablelm-7b-sft-v7-epoch-3": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 48, "num_hidden_layers": 16, "vocab_size": 50288}, "bhenrym14/airoboros-33b-gpt4-1.4.1-lxctx-PI-16384-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "flozi00/codellama-34b-german-assistant-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "WizardLM/WizardCoder-1B-V1.0": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 49153}, "upstage/llama-30b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "ehartford/dolphin-llama2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Open-Orca/LlongOrca-13B-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32004}, "NousResearch/Nous-Hermes-Llama2-70b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32001}, "ml6team/mt5-small-german-query-generation": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "bigscience/mt0-xxl": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 250112}, "EleutherAI/pythia-2.8b-deduped-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50304}, "TheBloke/wizardLM-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "conceptofmind/LLongMA-2-7b-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "lmsys/vicuna-7b-delta-v1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "bofenghuang/vigogne-7b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "csebuetnlp/banglat5_nmt_en_bn": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "trl-internal-testing/tiny-random-T5Model": {"architectures": ["T5Model"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 1302}, "OpenBuddy/openbuddy-llama2-70b-v10.1-bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 37632}, "TheBloke/wizard-vicuna-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "JosephusCheung/Guanaco": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "openchat/opencoderplus": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49153}, "jacobmorrison/tk-instruct-large-lora-experiments": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "PygmalionAI/metharme-1.3b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "TheBloke/orca_mini_13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "EleutherAI/pythia-70m-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "project-baize/baize-v2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "line-corporation/japanese-large-lm-1.7b-instruction-sft": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2304, "n_head": 24, "n_inner": 9216, "n_layer": 24, "vocab_size": 51200}, "TheBloke/WizardLM-13B-V1-0-Uncensored-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/llama-2-70b-Guanaco-QLoRA-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "MBZUAI/LaMini-Flan-T5-77M": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "csebuetnlp/banglat5_nmt_bn_en": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Trelis/Llama-2-7b-chat-hf-function-calling-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ehartford/Wizard-Vicuna-7B-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "llSourcell/medllama2_7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "WizardLM/WizardLM-13B-V1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "Gryphe/MythoMix-L2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/StableBeluga2-70B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "VietAI/vit5-large-vietnews-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 36096}, "adasnew/t5-small-xsum": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "Intel/t5-small-xsum-int8-dynamic": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "daspartho/prompt-extend": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 52000}, "EleutherAI/pythia-160m-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50304}, "Ar4ikov/gpt2-650k-stable-diffusion-prompt-generator": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "ehartford/WizardLM-Uncensored-Falcon-7b": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65025}, "CobraMamba/mamba-gpt-3b-v3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/llama2_70b_chat_uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "ai-forever/FRED-T5-1.7B": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1536, "num_heads": 24, "num_layers": 24, "vocab_size": 50364}, "MBZUAI/LaMini-Cerebras-590M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 12, "n_inner": 6144, "n_layer": 18, "vocab_size": 50258}, "mrm8488/llama-2-coder-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "guardrail/llama-2-7b-guanaco-instruct-sharded": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "rinna/bilingual-gpt-neox-4b-8k": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2816, "intermediate_size": 11264, "num_attention_heads": 22, "num_hidden_layers": 36, "vocab_size": 65536}, "mrm8488/falcoder-7b": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "circulus/Llama-2-13b-orca-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "allenai/tk-instruct-3b-def": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32100}, "pierreguillou/gpt2-small-portuguese": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50257}, "junelee/wizard-vicuna-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "castorini/monot5-3b-msmarco-10k": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "TheBloke/Llama-2-70B-Chat-GGML": {}, "TheBloke/CodeLlama-7B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "yeontaek/llama-2-13B-ensemble-v5": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "ybelkada/flan-t5-xl-sharded-bf16": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "WizardLM/WizardCoder-3B-V1.0": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 2816, "n_head": 22, "n_inner": 11264, "n_layer": 36, "vocab_size": 49153}, "Langboat/mengzi-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "MBZUAI/LaMini-GPT-774M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50258}, "ToddGoldfarb/Cadet-Tiny": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "TigerResearch/tigerbot-7b-base": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 60928}, "UrukHan/t5-russian-spell": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "LinkSoul/Chinese-Llama-2-7b-4bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Vicuna-13B-CoT-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "EleutherAI/pythia-1.4b-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "MayaPH/GodziLLa2-70B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TheBloke/wizardLM-13B-1.0-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "Gryphe/MythoBoros-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "abacusai/Giraffe-v2-13b-32k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-l2-13b-gpt4-1.4.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "razent/SciFive-base-Pubmed": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "TehVenom/Pygmalion-13b-Merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "garage-bAInd/SuperPlatty-30B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "jondurbin/airoboros-l2-7b-gpt4-2.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Rostlab/ProstT5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 150}, "TheBloke/guanaco-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "JackFram/llama-68m": {"architectures": ["LlamaForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 2, "vocab_size": 32000}, "MBZUAI/LaMini-Cerebras-111M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 10, "vocab_size": 50258}, "ehartford/Wizard-Vicuna-30B-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "stockmark/gpt-neox-japanese-1.4b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50000}, "TheBloke/MythoMax-L2-13B-GGML": {}, "MBZUAI/LaMini-Cerebras-256M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1088, "n_head": 17, "n_inner": 4352, "n_layer": 14, "vocab_size": 50258}, "jondurbin/airoboros-l2-13b-gpt4-2.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "The-Face-Of-Goonery/Chronos-Beluga-v2-13bfp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "lmqg/t5-base-squad-qag": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "Voicelab/trurl-2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "ehartford/Samantha-1.11-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "clibrain/Llama-2-13b-ft-instruct-es": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "deepse/CodeUp-Llama-2-13b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "mrm8488/t5-base-finetuned-sarcasm-twitter": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "ToolBench/ToolLLaMA-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "marella/gpt-2-ggml": {}, "Henk717/airochronos-33B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "stanford-crfm/alias-gpt2-small-x21": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "WizardLM/WizardLM-30B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "timdettmers/guanaco-33b-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "xkianteb/alg_ppo_separate_lr_1e-6_n_epochs_10_v_epochs_10_kl_target_1.0_clip_range_0.2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TheBloke/wizard-mega-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "bigscience/mt0-xl": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 250112}, "luffycodes/nash-vicuna-13b-v1dot5-ep2-w-rag-w-simple": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "h2oai/h2ogpt-oig-oasst1-256-6_9b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "fabiochiu/t5-base-medium-title-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "OpenAssistant/falcon-40b-sft-mix-1226": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65040}, "Writer/palmyra-base": {"architectures": ["GPT2LMHeadModel"], "n_embd": 4096, "n_head": 32, "n_inner": 16384, "n_layer": 24, "vocab_size": 50257}, "TheBloke/llama-2-70b-Guanaco-QLoRA-GGML": {}, "Rostlab/prot_t5_base_mt_uniref50": {"architectures": ["T5WithLMHeadModel"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 256}, "Lajonbot/Llama-2-13b-hf-instruct-pl-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "WizardLM/WizardLM-7B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "pankajmathur/orca_mini_7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "yhyhy3/open_llama_7b_v2_med_instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "NousResearch/CodeLlama-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "OpenBuddy/openbuddy-llama2-13b-v11.1-bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 37632}, "hf-internal-testing/tiny-random-GPT2ForQuestionAnswering": {"architectures": ["GPT2ForQuestionAnswering"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "explosion-testing/llama2-fewer-kv-heads": {"architectures": ["LlamaForCausalLM"], "hidden_size": 256, "intermediate_size": 512, "num_attention_heads": 4, "num_hidden_layers": 5, "vocab_size": 1024}, "hetpandya/t5-base-tapaco": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "PygmalionAI/pygmalion-2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "mrm8488/t5-base-finetuned-imdb-sentiment": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "explosion-testing/falcon-test": {"architectures": ["FalconForCausalLM"], "hidden_size": 32, "num_attention_heads": 4, "num_hidden_layers": 5, "vocab_size": 1024}, "ehartford/WizardLM-33B-V1.0-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/StableBeluga-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/WizardLM-Uncensored-SuperCOT-StoryTelling-30B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "jinaai/jina-embedding-s-en-v1": {"architectures": ["T5EncoderModel"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "FelixChao/vicuna-33b-coder": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/llama-30b-supercot-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "quantumaikr/llama-2-70b-fb16-orca-chat-10k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TheBloke/airoboros-l2-13B-gpt4-1.4.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "EleutherAI/pythia-31m": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 256, "intermediate_size": 1024, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "hf-internal-testing/tiny-random-GPT2ForTokenClassification": {"architectures": ["GPT2ForTokenClassification"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "jondurbin/airoboros-l2-70b-gpt4-1.4.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "kimsan0622/gpt2-medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 64007}, "TheBloke/EverythingLM-13B-16K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Linly-AI/Chinese-LLaMA-2-13B-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 40076}, "BlackSamorez/rudialogpt3_medium_based_on_gpt2_2ch": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "EleutherAI/pythia-2.8b-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50304}, "TheBloke/llama-2-7B-Guanaco-QLoRA-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "google/byt5-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 6720, "d_model": 2560, "num_heads": 32, "num_layers": 36, "vocab_size": 384}, "TheBloke/wizard-vicuna-13B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TehVenom/Pygmalion-Vicuna-1.1-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "openaccess-ai-collective/wizard-mega-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-l2-7b-gpt4-m2.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "openchat/openchat_v3.2_super": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "openaccess-ai-collective/manticore-13b-chat-pyg": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Neko-Institute-of-Science/pygmalion-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "unicamp-dl/ptt5-small-portuguese-vocab": {"architectures": ["T5WithLMHeadModel"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "hf-internal-testing/tiny-random-T5ForQuestionAnswering": {"architectures": ["T5ForQuestionAnswering"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 32100}, "microsoft/CodeGPT-small-java-adaptedGPT2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50260}, "unicamp-dl/ptt5-base-portuguese-vocab": {"architectures": ["T5WithLMHeadModel"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Fredithefish/ScarletPajama-3B-HF": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "hf-internal-testing/tiny-random-T5ForSequenceClassification": {"architectures": ["T5ForSequenceClassification"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 32100}, "TheBloke/Nous-Hermes-Llama-2-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "calvindoingstuff/DialoGPT-medium-luffy": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "lvkaokao/llama2-7b-hf-chat-lora-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "skt/ko-gpt-trinity-1.2B-v0.5": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1920, "n_head": 16, "n_inner": 7680, "n_layer": 24, "vocab_size": 51200}, "saibo/llama-1B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 2, "vocab_size": 32000}, "vonjack/Qwen-LLaMAfied-HFTok-7B-Chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 151936}, "TheBloke/CodeLlama-34B-Python-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "GAIR/rst-all-11b": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "GeorgiaTechResearchInstitute/starcoder-gpteacher-code-instruct": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49153}, "jondurbin/airoboros-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "aisquared/dlite-v2-1_5b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_inner": null, "n_layer": 48, "vocab_size": 50260}, "aiassociates/t5-small-grammar-correction-german": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "asi/gpt-fr-cased-small": {"n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50000}, "csebuetnlp/mT5_m2o_chinese_simplified_crossSum": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "openthaigpt/openthaigpt-1.0.0-alpha-7b-chat-ckpt-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "jondurbin/airoboros-l2-13b-2.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "sentence-transformers/sentence-t5-xl": {"architectures": ["T5EncoderModel"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "OpenBuddy/openbuddy-openllama-3b-v10-bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 37120}, "TheBloke/guanaco-33B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "h2oai/h2ogpt-oasst1-512-20b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 64, "num_hidden_layers": 44, "vocab_size": 50432}, "Open-Orca/OpenOrca-Preview1-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "WizardLM/WizardLM-13B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "garage-bAInd/Camel-Platypus2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "wxjiao/alpaca-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "TheBloke/Wizard-Vicuna-13B-Uncensored-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "FelixChao/vicuna-7B-chemical": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Arc53/docsgpt-14b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "OpenAssistant/llama2-13b-megacode2-oasst": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32007}, "TheBloke/Lemur-70B-Chat-v1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32005}, "EleutherAI/pythia-6.9b-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "grimpep/L2-MythoMax22b-instruct-Falseblock": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "Austism/chronos-hermes-13b-v2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "UBC-NLP/AraT5v2-base-1024": {"d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 110208}, "fireballoon/baichuan-vicuna-chinese-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 64000}, "abeja/gpt2-large-japanese": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 32000}, "TheBloke/Airoboros-L2-70B-2.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "vicgalle/gpt2-alpaca-gpt4": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50260}, "flax-community/gpt2-small-indonesian": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TheBloke/Nous-Hermes-13B-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "imone/LLaMA2_13B_with_EOT_token": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "Corianas/111m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 10, "vocab_size": 50257}, "The-Face-Of-Goonery/Huginn-v3-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ehartford/Samantha-1.11-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "heegyu/WizardVicuna-3B-0719": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "acrastt/Griffin-3B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "totally-not-an-llm/EverythingLM-13b-V2-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ikala/bloom-zh-3b-chat": {"architectures": ["BloomForCausalLM"], "hidden_size": 2560, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250688}, "Gryphe/MythoLogic-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "AlekseyKorshuk/vicuna-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "flax-community/gpt2-medium-persian": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50000}, "ehartford/samantha-1.1-llama-33b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "garage-bAInd/Platypus2-70B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "OpenLemur/lemur-70b-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32024}, "ausboss/llama-30b-supercot": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "jondurbin/airoboros-l2-70b-gpt4-m2.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "lmqg/mt5-small-koquad-qg-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "TheBloke/OpenAssistant-SFT-7-Llama-30B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32016}, "TheBloke/h2ogpt-gm-oasst1-en-2048-falcon-40b-v2-GPTQ": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "GOAT-AI/GOAT-7B-Community": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "h2oai/h2ogpt-gm-oasst1-en-1024-20b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 64, "num_hidden_layers": 44, "vocab_size": 50432}, "beaugogh/pythia-1.4b-deduped-sharegpt": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50280}, "amurshak/llama-2-7b-miniguanaco": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "psyche/kollama2-7b-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "IlyaGusev/fred_t5_ru_turbo_alpaca": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1536, "num_heads": 24, "num_layers": 24, "vocab_size": 50365}, "potsawee/t5-large-generation-race-Distractor": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "heegyu/WizardVicuna-Uncensored-3B-0719": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/openchat_v2_openorca_preview-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "CalderaAI/13B-Legerdemain-L2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "SebastianSchramm/Cerebras-GPT-111M-instruction": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 10, "vocab_size": 50258}, "Mikael110/llama-2-7b-guanaco-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Locutusque/gpt2-large-conversational": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50260}, "CalderaAI/13B-Ouroboros": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "chaoyi-wu/MedLLaMA_13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "YeungNLP/firefly-llama2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "garage-bAInd/GPlatty-30B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "pankajmathur/orca_mini_v2_13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "pankajmathur/model_007_13b_v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "chargoddard/Chronorctypus-Limarobormes-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "timdettmers/guanaco-65b-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "digitous/13B-HyperMantis": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ckiplab/gpt2-base-chinese": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 21128}, "ehartford/dolphin-llama-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jphme/orca_mini_v2_ger_7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "malhajar/Platypus2-70B-instruct-4bit-gptq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "heegyu/WizardVicuna-open-llama-3b-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "pankajmathur/model_007": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "vicgalle/gpt2-alpaca": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50260}, "h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-300bt-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "stabilityai/stablecode-completion-alpha-3b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 49152}, "aisquared/dlite-v2-355m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50260}, "google/byt5-xxl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 12352, "d_model": 4672, "num_heads": 64, "num_layers": 36, "vocab_size": 384}, "ehartford/Samantha-1.11-CodeLlama-34b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "h2oai/h2ogpt-gm-oasst1-multilang-1024-20b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 64, "num_hidden_layers": 44, "vocab_size": 50432}, "TheBloke/koala-7B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ehartford/WizardLM-30B-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "clibrain/Llama-2-ft-instruct-es": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "quantumaikr/llama-2-70b-fb16-guanaco-1k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "psyche/kogpt": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 32002}, "wenge-research/yayi-7b": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250684}, "Aspik101/WizardVicuna-Uncensored-3B-instruct-PL-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "rombodawg/LosslessMegaCoder-llama2-7b-mini": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32007}, "TurkuNLP/gpt3-finnish-medium": {"architectures": ["BloomModel"], "hidden_size": 1024, "n_head": 16, "n_layer": 24, "vocab_size": 131072}, "pankajmathur/orca_mini_13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Mikael110/llama-2-13b-guanaco-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "totally-not-an-llm/PuddleJumper-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "jondurbin/airoboros-13b-gpt4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "CobraMamba/mamba-gpt-3b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "zarakiquemparte/zarablend-l2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Locutusque/gpt2-conversational-or-qa": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50262}, "frank098/Wizard-Vicuna-13B-juniper": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-gpt-3.5-turbo-100k-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "jondurbin/airoboros-33b-gpt4-1.4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "jondurbin/airoboros-l2-70b-gpt4-2.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "MBZUAI/LaMini-Cerebras-1.3B": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 50258}, "h2oai/h2ogpt-research-oasst1-llama-65b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "nkpz/llama2-22b-daydreamer-v3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "Aspik101/trurl-2-13b-pl-instruct_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "OpenAssistant/pythia-12b-pre-v8-12.5k-steps": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50288}, "breadlicker45/dough-instruct-base-001": {"architectures": ["LlamaForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50402}, "OpenBuddy/openbuddy-llama-30b-v7.1-bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 37632}, "andreaskoepf/llama2-13b-megacode2_min100": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32007}, "ehartford/Samantha-1.11-70b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "flax-community/t5-recipe-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "BreadAi/PM_modelV2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1088, "n_head": 17, "n_inner": 4352, "n_layer": 14, "vocab_size": 50257}, "minlik/chinese-alpaca-33b-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 49954}, "jordiclive/Llama-2-70b-oasst-1-200": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32016}, "Lajonbot/tableBeluga-7B-instruct-pl-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "sia-ai/llama-2-7b-1-percent-open-orca-1000-steps-v0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "h2oai/h2ogpt-gm-oasst1-en-1024-12b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50688}, "jondurbin/airoboros-33b-gpt4-1.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "openchat/openchat_8192": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "TaylorAI/Flash-Llama-3B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "yeontaek/llama-2-13B-ensemble-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Kirili4ik/ruDialoGpt3-medium-finetuned-telegram": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "WangZeJun/bloom-820m-chat": {"architectures": ["BloomForCausalLM"], "hidden_size": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 46145}, "4bit/Llama-2-70b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "chargoddard/llama2-22b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "augtoma/qCammel-13": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "NlpHUST/gpt2-vietnamese": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Monero/Manticore-13b-Chat-Pyg-Guanaco": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "NousResearch/CodeLlama-34b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "aisquared/dlite-v2-124m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50260}, "pankajmathur/orca_mini_v2_7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "The-Face-Of-Goonery/Huginn-22b-Prototype": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "DevaMalla/llama7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "openaccess-ai-collective/manticore-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "nkpz/llama2-22b-chat-wizard-uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "davzoku/cria-llama2-7b-v1.3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TaylorAI/Flash-Llama-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Fredithefish/ReasonixPajama-3B-HF": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "TheBloke/Platypus-30B-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "LoupGarou/WizardCoder-Guanaco-15B-V1.1": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49153}, "TheBloke/guanaco-65B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "hakurei/lotus-12B": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50688}, "bofenghuang/vigogne-33b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "grimpep/llama2-22B-GPLATTY": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "concedo/Pythia-70M-ChatSalad": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50278}, "rombodawg/LosslessMegaCoder-llama2-13b-mini": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32007}, "TaylorAI/Flash-Llama-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/chronos-wizardlm-uc-scot-st-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "OpenBuddy/openbuddy-llama-65b-v8-bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 37632}, "ajibawa-2023/scarlett-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/medalpaca-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "elinas/chronos-33b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "OpenBuddy/openbuddy-atom-13b-v9-bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 49953}, "OpenAssistant/pythia-12b-sft-v8-rlhf-2k-steps": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50282}, "TheTravellingEngineer/llama2-7b-chat-hf-v4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Ejafa/vicuna_7B_vanilla_1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "yulan-team/YuLan-Chat-2-13b-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 51200}, "huashiyiqike/testmodel": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 10, "vocab_size": 50257}, "TheBloke/WizardLM-30B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "notstoic/PygmalionCoT-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Vicuna-33B-1-3-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "FelixChao/vicuna-7B-physics": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/tulu-30B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "jondurbin/airoboros-65b-gpt4-1.4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "uukuguy/speechless-llama2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "digitous/13B-Chimera": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-7b-gpt4-1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "grimpep/llama2-28B-Airo03": {"architectures": ["LlamaForCausalLM"], "hidden_size": 7296, "intermediate_size": 22016, "num_attention_heads": 57, "num_hidden_layers": 40, "vocab_size": 32000}, "ehartford/CodeLlama-34b-Instruct-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "YeungNLP/firefly-ziya-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 39424}, "TheTravellingEngineer/bloom-560m-RLHF-v2": {"architectures": ["BloomForCausalLM"], "hidden_size": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "TheTravellingEngineer/llama2-7b-chat-hf-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "uukuguy/speechless-hermes-coig-lite-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "BreadAi/gpt-Youtube": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50304}, "Aspik101/llama-30b-instruct-2048-PL-lora": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "beaugogh/Llama2-13b-sharegpt4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "gaodrew/gaodrew-gorgonzola-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "OpenBuddy/openbuddy-llama2-13b-v11-bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 37632}, "TheBloke/guanaco-13B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/WizardLM-13B-V1-1-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "NousResearch/CodeLlama-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "BreadAi/MusePy-1-2": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "jondurbin/airoboros-33b-gpt4-1.3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "YeungNLP/firefly-bloom-7b1": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "grimpep/llama2-22b-wizard_vicuna": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "Fredithefish/Guanaco-3B-Uncensored": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "digitous/Alpacino13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "mncai/SGPT-1.3B-insurance-epoch10": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 30080}, "TheTravellingEngineer/llama2-7b-chat-hf-dpo": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "yeontaek/Platypus2xOpenOrca-13B-LoRa": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "TheBloke/gpt4-alpaca-lora-30b-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "bhenrym14/airophin-13b-pntk-16k-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Kimiko-13B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "porkorbeef/Llama-2-13b-12_153950": {"architectures": ["LlamaModel"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "PSanni/Deer-3b": {"architectures": ["BloomForCausalLM"], "hidden_size": 2560, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250683}, "IGeniusDev/llama13B-quant8-testv1-openorca-customdataset": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Neko-Institute-of-Science/metharme-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "alibidaran/medical_transcription_generator": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Panchovix/airoboros-33b-gpt4-1.2-SuperHOT-8k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "digitous/Alpacino30b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "lgaalves/gpt2-dolly": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TaylorAI/FLAN-Llama-7B-2_Llama2-7B-Flash_868_full_model": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "zarakiquemparte/zarafusionex-1.1-l2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "OpenAssistant/pythia-12b-sft-v8-2.5k-steps": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50288}, "TheBloke/airoboros-13B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/robin-33B-v2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "Aspik101/trurl-2-7b-pl-instruct_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "llama-anon/petra-13b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "TFLai/gpt2-turkish-uncased": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "health360/Healix-3B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/Mythalion-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-300bt": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "pe-nlp/llama-2-13b-vicuna-wizard": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "yeontaek/Platypus2-13B-QLoRa": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "acrastt/OmegLLaMA-3B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "jslin09/bloom-560m-finetuned-fraud": {"architectures": ["BloomForCausalLM"], "hidden_size": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "YeungNLP/firefly-bloom-2b6-v2": {"architectures": ["BloomForCausalLM"], "hidden_size": 2560, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 46145}, "xzuyn/LLaMa-1-MedicWizard-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "Azure99/blossom-v2-3b": {"architectures": ["BloomForCausalLM"], "hidden_size": 2560, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "TheBloke/Airoboros-L2-13B-2.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "MetaIX/GPT4-X-Alpasta-30b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32016}, "PocketDoc/Dans-PersonalityEngine-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "vicgalle/alpaca-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Corianas/590m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 12, "n_inner": 6144, "n_layer": 18, "vocab_size": 50257}, "OpenBuddy/openbuddy-openllama-13b-v7-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 38656}, "gywy/llama2-13b-chinese-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 49376}, "Corianas/Quokka_590m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 12, "n_inner": 6144, "n_layer": 18, "vocab_size": 50260}, "aisquared/dlite-v1-355m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50260}, "aisquared/dlite-v1-774m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50260}, "Fredithefish/RedPajama-INCITE-Chat-3B-Instruction-Tuning-with-GPT-4": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "project-baize/baize-v2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Project-Baize-v2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "FabbriSimo01/GPT_Large_Quantized": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50257}, "ajibawa-2023/carl-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Azure99/blossom-v1-3b": {"architectures": ["BloomForCausalLM"], "hidden_size": 2560, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "Aspik101/30B-Lazarus-instruct-PL-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "yeontaek/Platypus2xOpenOrca-13B-IA3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "Enno-Ai/ennodata-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "FabbriSimo01/Cerebras_1.3b_Quantized": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 50257}, "migtissera/Synthia-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "pe-nlp/llama-2-13b-platypus-vicuna-wizard": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "yeontaek/Platypus2xOpenOrca-13B-IA3-ensemble": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "Corianas/1.3b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 50257}, "Rachneet/gpt2-xl-alpaca": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_inner": null, "n_layer": 48, "vocab_size": 50257}, "Aeala/VicUnlocked-alpaca-30b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/VicUnlocked-30B-LoRA-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "Monero/WizardLM-Uncensored-SuperCOT-StoryTelling-30b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "bavest/fin-llama-33b-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "openchat/openchat_v2_w": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "FabbriSimo01/Bloom_1b_Quantized": {"architectures": ["BloomForCausalLM"], "hidden_size": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "Aspik101/tulu-7b-instruct-pl-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "TheTravellingEngineer/llama2-7b-chat-hf-v3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "yeontaek/llama-2-70b-IA3-guanaco": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Kunhao/pile-7b-250b-tokens": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 49152}, "yeontaek/llama-2-13b-QLoRA": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "yeontaek/llama-2-13b-Beluga-QLoRA": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "yeontaek/Platypus2xOpenOrca-13B-IA3-v3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "ausboss/llama7b-wizardlm-unfiltered": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/h2ogpt-oasst1-512-30B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "bofenghuang/vigogne-7b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "NYTK/PULI-GPTrio": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 150016}, "LLMs/WizardLM-30B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "openaccess-ai-collective/minotaur-13b-fixed": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheTravellingEngineer/bloom-1b1-RLHF-v2": {"architectures": ["BloomForCausalLM"], "hidden_size": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "yeontaek/Platypus2xOpenOrca-13B-IA3-v4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "BreadAi/DiscordPy": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1088, "n_head": 17, "n_inner": 4352, "n_layer": 14, "vocab_size": 50257}, "TehVenom/oasst-sft-6-llama-33b-xor-MERGED-16bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32016}, "gaodrew/gaodrew-llama-30b-instruct-2048-Open-Platypus-100steps": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "yeontaek/Platypus2xOpenOrca-13B-IA3-v2.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "yeontaek/Platypus2xOpenOrca-13B-LoRa-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "dvruette/oasst-pythia-12b-6000-steps": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50288}, "LoupGarou/WizardCoder-Guanaco-15B-V1.0": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49153}, "KnutJaegersberg/gpt-2-xl-EvolInstruct": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_inner": null, "n_layer": 48, "vocab_size": 50257}, "Lajonbot/WizardLM-13B-V1.2-PL-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "yeontaek/Platypus2-13B-IA3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "zarakiquemparte/zaraxe-l2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "BreadAi/gpt-YA-1-1_70M": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "dvruette/oasst-pythia-12b-reference": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50288}, "Panchovix/WizardLM-33B-V1.0-Uncensored-SuperHOT-8k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "titan087/OpenLlama13B-Guanaco": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "w601sxs/b1ade-1b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 50304}, "Andron00e/YetAnother_Open-Llama-3B-LoRA": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "quantumaikr/QuantumLM": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-65b-gpt4-2.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Aspik101/llama-30b-2048-instruct-PL-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "yeontaek/Platypus2-13B-LoRa": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "zarakiquemparte/zarafusionix-l2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "huggingtweets/gladosystem": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "eachadea/legacy-vicuna-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "Aeala/GPT4-x-AlpacaDente2-30b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32016}, "shibing624/chinese-llama-plus-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 49953}, "euclaise/gpt-neox-122m-minipile-digits": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 48262}, "TheBloke/UltraLM-13B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "lvkaokao/llama2-7b-hf-instruction-lora": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "BreadAi/StoryPy": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "dvruette/oasst-pythia-12b-flash-attn-5000-steps": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50288}, "aisquared/dlite-v1-124m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "ewof/koishi-instruct-3b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "h2oai/h2ogpt-gm-oasst1-en-1024-open-llama-7b-preview-400bt": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/airoboros-7b-gpt4-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "jondurbin/airoboros-13b-gpt4-1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/tulu-13B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "yhyhy3/med-orca-instruct-33b": {"architectures": ["LlamaModel"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "heegyu/LIMA-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "abhishek/llama2guanacotest": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "heegyu/LIMA2-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Corianas/Quokka_256m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1088, "n_head": 17, "n_inner": 4352, "n_layer": 14, "vocab_size": 50260}, "golaxy/gogpt-560m": {"architectures": ["BloomForCausalLM"], "hidden_size": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "OptimalScale/robin-7b-v2-delta": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "bofenghuang/vigogne-13b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "klosax/pythia-160m-deduped-step92k-193bt": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50304}, "golaxy/gogpt2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 68420}, "YeungNLP/firefly-llama2-13b-v1.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "WhoTookMyAmogusNickname/NewHope_HF_not_official": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "ehartford/CodeLlama-34b-Python-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "KnutJaegersberg/megatron-GPT-2-345m-EvolInstruct": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": 4096, "n_layer": 24, "vocab_size": 50257}, "Aeala/Alpaca-elina-65b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Monero/WizardLM-30B-Uncensored-Guanaco-SuperCOT-30b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "csitfun/llama-7b-logicot": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "OptimalScale/robin-65b-v2-delta": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "LLMs/WizardLM-13B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "CobraMamba/mamba-gpt-3b-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "aisquared/dlite-v1-1_5b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_inner": null, "n_layer": 48, "vocab_size": 50257}, "nthngdy/pythia-owt2-70m-100k": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "LLMs/AlpacaGPT4-7B-elina": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Yhyu13/oasst-rlhf-2-llama-30b-7k-steps-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32006}, "jondurbin/airoboros-7b-gpt4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "grantprice/Cerebras-GPT-590M-finetuned-DND": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 12, "n_inner": 6144, "n_layer": 18, "vocab_size": 50257}, "TheBloke/robin-13B-v2-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/robin-65b-v2-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "FPHam/Free_Sydney_13b_HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "acrastt/RedPajama-INCITE-Chat-Instruct-3B-V1": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "jondurbin/airoboros-65b-gpt4-m2.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "heegyu/LIMA2-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "BreadAi/MuseCan": {"architectures": ["GPT2LMHeadModel"], "n_embd": 960, "n_head": 15, "n_inner": 9, "n_layer": 5, "vocab_size": 50304}, "ausboss/llama-13b-supercot": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "openaccess-ai-collective/manticore-30b-chat-pyg-alpha": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "OptimalScale/robin-13b-v2-delta": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "quantumaikr/llama-2-7b-hf-guanaco-1k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Fredithefish/RedPajama-INCITE-Chat-3B-ShareGPT-11K": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "CalderaAI/13B-BlueMethod": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "SaylorTwift/gpt2_test": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50257}, "WeOpenML/PandaLM-Alpaca-7B-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "WeOpenML/Alpaca-7B-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "sumo43/lora_moe_7b_baseline": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "wenge-research/yayi-13b-llama2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32005}, "golaxy/gowizardlm": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 68420}, "abhiramtirumala/DialoGPT-sarcastic-medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Corianas/Quokka_2.7b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2560, "n_head": 32, "n_inner": 10240, "n_layer": 32, "vocab_size": 50260}, "Corianas/256_5epoch": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1088, "n_head": 17, "n_inner": 4352, "n_layer": 14, "vocab_size": 50257}, "dvruette/llama-13b-pretrained": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "TheBloke/alpaca-lora-65B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "ashercn97/giraffe-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Aspik101/Vicuzard-30B-Uncensored-instruct-PL-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "TheBloke/dromedary-65b-lora-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Yhyu13/chimera-inst-chat-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "ehartford/based-30b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "concedo/Vicuzard-30B-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "64bits/LexPodLM-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "MayaPH/GodziLLa-30B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "Aspik101/vicuna-7b-v1.3-instruct-pl-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "julianweng/Llama-2-7b-chat-orcah": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "heegyu/RedTulu-Uncensored-3B-0719": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "Aspik101/Llama-2-7b-hf-instruct-pl-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "quantumaikr/QuantumLM-70B-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "BreadAi/gpt-YA-1-1_160M": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50304}, "dvruette/oasst-pythia-12b-pretrained-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50288}, "Aeala/GPT4-x-AlpacaDente-30b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32016}, "TehVenom/Pygmalion_AlpacaLora-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "LLMs/Stable-Vicuna-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "quantumaikr/open_llama_7b_hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Aeala/GPT4-x-Alpasta-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "Fredithefish/CrimsonPajama": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "openaccess-ai-collective/hippogriff-30b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "camel-ai/CAMEL-13B-Role-Playing-Data": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/landmark-attention-llama7b-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32002}, "TheBloke/robin-33B-v2-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/GPlatty-30B-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/Chinese-Alpaca-33B-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 49954}, "TheBloke/CAMEL-33B-Combined-Data-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "klosax/open_llama_13b_600bt_preview": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Aspik101/Nous-Hermes-13b-pl-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "jondurbin/airoboros-l2-7b-gpt4-1.4.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "YeungNLP/firefly-llama-30b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "ashercn97/manatee-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "lizhuang144/starcoder_mirror": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49152}, "Aspik101/vicuna-13b-v1.5-PL-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Aspik101/Redmond-Puffin-13B-instruct-PL-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "Aspik101/StableBeluga-13B-instruct-PL-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "RoversX/llama-2-7b-hf-small-shards-Samantha-V1-SFT": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Corianas/Quokka_1.3b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 50260}, "nthngdy/pythia-owt2-70m-50k": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "danielhanchen/open_llama_3b_600bt_preview": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/VicUnlocked-alpaca-65B-QLoRA-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "IDEA-CCNL/Ziya-LLaMA-13B-Pretrain-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 39424}, "kevinpro/Vicuna-13B-CoT": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "wahaha1987/llama_7b_sharegpt94k_fastchat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "openaccess-ai-collective/minotaur-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/tulu-7B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "golaxy/gogpt-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 68420}, "Aeala/Enterredaas-33b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "kingbri/chronolima-airo-grad-l2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheTravellingEngineer/bloom-560m-RLHF": {"architectures": ["BloomForCausalLM"], "hidden_size": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "HWERI/Llama2-7b-sharegpt4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "l3utterfly/llama2-7b-layla": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "yeontaek/llama-2-13b-Guanaco-QLoRA": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "duliadotio/dulia-13b-8k-alpha": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "yeontaek/llama-2-13B-ensemble-v3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "dvruette/oasst-gpt-neox-20b-3000-steps": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 64, "num_hidden_layers": 44, "vocab_size": 50288}, "dvruette/oasst-gpt-neox-20b-1000-steps": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 64, "num_hidden_layers": 44, "vocab_size": 50288}, "huggingtweets/jerma985": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Dampish/Dante-2.8B": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "TheBloke/Planner-7B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "klosax/pythia-70m-deduped-step44k-92bt": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "klosax/open_llama_7b_400bt_preview": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "golaxy/gogpt2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 68420}, "Lajonbot/Llama-2-7b-chat-hf-instruct-pl-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheTravellingEngineer/llama2-7b-chat-hf-guanaco": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Lajonbot/vicuna-7b-v1.5-PL-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "kingbri/airolima-chronos-grad-l2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "yeontaek/llama-2-70B-ensemble-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "dvruette/oasst-llama-13b-2-epochs": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "dvruette/llama-13b-pretrained-sft-epoch-1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "dvruette/llama-13b-pretrained-dropout": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "hakurei/instruct-12b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50277}, "dvruette/gpt-neox-20b-full-precision": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 64, "num_hidden_layers": 44, "vocab_size": 50288}, "Monero/WizardLM-13b-OpenAssistant-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "Yhyu13/llama-30B-hf-openassitant": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "jondurbin/airoboros-65b-gpt4-1.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "camel-ai/CAMEL-33B-Combined-Data": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "MBZUAI/bactrian-x-llama-13b-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "dsvv-cair/alpaca-cleaned-llama-30b-bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "YeungNLP/firefly-llama-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "YeungNLP/firefly-llama-13b-v1.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "heegyu/WizardVicuna2-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "dvruette/oasst-llama-13b-1000-steps": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "dvruette/llama-13b-pretrained-sft-do2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "pillowtalks-ai/delta13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "illuin/test-custom-llama": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "MrNJK/gpt2-xl-sft": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_inner": null, "n_layer": 48, "vocab_size": 50257}, "PocketDoc/Dans-PileOfSets-Mk1-llama-13b-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-65b-gpt4-1.3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "bhenrym14/airoboros-33b-gpt4-1.4.1-PI-8192-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "frank098/WizardLM_13B_juniper": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "golaxy/goims": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 68420}, "dvruette/oasst-pythia-6.9b-4000-steps": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50288}, "mncai/chatdoctor": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "wannaphong/openthaigpt-0.1.0-beta-full-model_for_open_llm_leaderboard": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "golaxy/gogpt-3b-bloom": {"architectures": ["BloomForCausalLM"], "hidden_size": 2560, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "golaxy/gogpt-7b-bloom": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "jondurbin/airoboros-33b-gpt4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "jondurbin/airoboros-13b-gpt4-1.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-7b-gpt4-1.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "jondurbin/airoboros-13b-gpt4-1.3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-7b-gpt4-1.4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "jondurbin/airoboros-13b-gpt4-1.4-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-7b-gpt4-1.4.1-qlora": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "frank098/orca_mini_3b_juniper": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "Lajonbot/vicuna-13b-v1.3-PL-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jxhong/CAlign-alpaca-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "quantumaikr/KoreanLM-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "keyfan/vicuna-chinese-replication-v1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 49953}, "jondurbin/airoboros-7b-gpt4-1.3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "jerryjalapeno/nart-100k-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "xzuyn/Alpacino-SuperCOT-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "wahaha1987/llama_13b_sharegpt94k_fastchat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "quantumaikr/QuantumLM-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Undi95/ReMM-SLERP-L2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "huggingtweets/bladeecity-jerma985": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "pszemraj/pythia-6.9b-HC3": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "CalderaAI/30B-Epsilon": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TFLai/OpenOrca-Platypus2-13B-QLoRA-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "alpindale/pygmalion-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "jondurbin/airoboros-c34b-2.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "abacaj/starcoderbase-1b-sft": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 49153}, "bongchoi/test-llama2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TinyPixel/lima-test": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "quantumaikr/llama-2-70B-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "yeontaek/llama-2-13B-ensemble-v4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "cointegrated/rut5-base-absum": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 30000}, "pankajmathur/model_420_preview": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Fredithefish/Guanaco-3B-Uncensored-v2": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "yeontaek/llama-2-70B-ensemble-v4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Writer/palmyra-large": {"architectures": ["GPT2LMHeadModel"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 44, "vocab_size": 50257}, "RobbeD/OpenLlama-Platypus-3B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TFLai/OrcaMini-Platypus2-13B-QLoRA-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "NobodyExistsOnTheInternet/PuffedConvo13bLoraE4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Sao10K/Medusa-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Manticore-13B-Chat-Pyg-Guanaco-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TFLai/Nous-Hermes-Platypus2-13B-QLoRA-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "dhmeltzer/llama-7b-SFT_eli5_wiki65k_1024_r_64_alpha_16_merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TFLai/MythoMix-Platypus2-13B-QLoRA-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "chargoddard/llama-2-34b-uncode": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "zarakiquemparte/zaraxls-l2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TFLai/Stable-Platypus2-13B-QLoRA-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Danielbrdz/Barcenas-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "alan-turing-institute/mt5-large-finetuned-mnli-xtreme-xnli": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 250112}, "TFLai/Limarp-Platypus2-13B-QLoRA-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TFLai/PuddleJumper-Platypus2-13B-QLoRA-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "The-Face-Of-Goonery/Huginn-13b-v4.5": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-l2-7b-2.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "allenai/unifiedqa-v2-t5-large-1363200": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TheBloke/OpenAssistant-Llama2-13B-Orca-8K-3319-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "synapsoft/Llama-2-7b-hf-flan2022-1.2M": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "yeontaek/Platypus2-13B-LoRa-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "KES/T5-KES": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "robowaifudev/megatron-gpt2-345m": {"vocab_size": 50257, "n_embd": 1024, "n_layer": 24, "n_head": 16, "n_inner": 4096, "architectures": ["GPT2LMHeadModel"]}, "Sao10K/Mythical-Destroyer-L2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "CHIH-HUNG/llama-2-13b-dolphin_20w": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "The-Face-Of-Goonery/Huginn-13b-V4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "haining/scientific_abstract_simplification": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "ChanonUtupon/openthaigpt-merge-lora-llama-2-7B-3470k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 56554}, "chaoyi-wu/PMC_LLAMA_7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "CHIH-HUNG/llama-2-13b-OpenOrca_5w": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "clibrain/lince-zero": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "TheBloke/Project-Baize-v2-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "uukuguy/speechless-codellama-platypus-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "CHIH-HUNG/llama-2-13b-dolphin_5w": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "yeontaek/airoboros-2.1-llama-2-13B-QLoRa": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "uukuguy/speechless-llama2-luban-orca-platypus-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Norquinal/llama-2-7b-claude-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TFLai/Luban-Platypus2-13B-QLora-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "conceptofmind/Open-LLongMA-3b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "Norquinal/llama-2-7b-claude-chat-rp": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "lgaalves/llama-2-7b-hf_open-platypus": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ziqingyang/chinese-llama-2-7b-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 55296}, "yeontaek/llama-2-13B-ensemble-v6": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "yeontaek/llama-2-70B-ensemble-v7": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "ubikpt/t5-small-finetuned-cnn": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "rajkumarrrk/t5-base-fine-tuned-on-cnn-dm": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "google/t5-efficient-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "TFLai/Airboros2.1-Platypus2-13B-QLora-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "dhmeltzer/llama-7b-SFT_ds_eli5_1024_r_64_alpha_16_merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "h2oai/h2ogpt-4096-llama2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "dhmeltzer/llama-7b-SFT_ds_wiki65k_1024_r_64_alpha_16_merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TFLai/Ensemble5-Platypus2-13B-QLora-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "TFLai/Athena-Platypus2-13B-QLora-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "4bit/Llama-2-7b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TFLai/MythicalDestroyerV2-Platypus2-13B-QLora-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TFLai/OpenOrcaPlatypus2-Platypus2-13B-QLora-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "Salesforce/codegen25-7b-mono": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 51200}, "Sao10K/Stheno-L2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "yeontaek/WizardCoder-Python-13B-LoRa": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "uukuguy/speechless-orca-platypus-coig-lite-2k-0.6e-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "casperhansen/vicuna-7b-v1.5-awq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "luffycodes/nash-vicuna-33b-v1dot3-ep2-w-rag-w-simple": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "CHIH-HUNG/llama-2-13b-OpenOrca_20w": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "google/t5-efficient-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "TheBloke/orca_mini_v2_7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "tianyil1/denas-llama2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Sao10K/Stheno-Inverted-L2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "junelee/ko_vicuna_7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Seungyoun/codellama-7b-instruct-pad": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32025}, "TheBloke/Kimiko-v2-13B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "uukuguy/speechless-orca-platypus-coig-lite-4k-0.5e-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "uukuguy/speechless-orca-platypus-coig-lite-4k-0.6e-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "Undi95/UndiMix-v1-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "yeontaek/llama-2-70B-ensemble-v6": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TheBloke/vicuna-13B-v1.5-16K-GGML": {}, "KnutJaegersberg/black_goo_recipe_a": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "PKU-Alignment/beaver-7b-v1.0-reward": {"architectures": ["LlamaModelForScore"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "KnutJaegersberg/black_goo_recipe_b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "lgaalves/gpt2_open-platypus": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "cointegrated/rut5-base-multitask": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 30000}, "h2oai/h2ogpt-gm-oasst1-multilang-2048-falcon-7b": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "Cheng98/llama-160m": {"architectures": ["LlamaForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 32000}, "Andron00e/YetAnother_Open-Llama-3B-LoRA-OpenOrca": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "lgaalves/gpt2_guanaco-dolly-platypus": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "gagan3012/k2t-base": {"architectures": ["T5WithLMHeadModel"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "marcchew/Platypus-2-7B-LaMini-14K": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "lgaalves/gpt2_platypus-dolly-guanaco": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "czearing/article-title-generator": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "luffycodes/mcq-vicuna-13b-v1.5": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Universal-NER/UniNER-7B-definition": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Meli/GPT2-Prompt": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50259}, "s-nlp/ruT5-base-detox": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "cointegrated/rut5-base-paraphraser": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 30000}, "DevaMalla/llama7b_alpaca_bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "Universal-NER/UniNER-7B-type": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/starchat-beta-GPTQ": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49156}, "bigscience/sgpt-bloom-7b1-msmarco": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 30, "num_attention_heads": 32, "vocab_size": 250682}, "4bit/Llama-2-13b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "uukuguy/speechless-llama2-hermes-orca-platypus-wizardlm-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ClueAI/PromptCLUE-base-v1-5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "budecosystem/genz-13b-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/LlongOrca-13B-16K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32004}, "ozcangundes/mt5-multitask-qa-qg-turkish": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250102}, "EleutherAI/pythia-410m-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 1024, "intermediate_size": 4096, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "CHIH-HUNG/llama-2-13b-FINETUNE2_3w": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "sonoisa/t5-base-japanese-v1.1": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "bolbolzaban/gpt2-persian": {"n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 25000, "architectures": ["GPT2LMHeadModel"]}, "google/t5-large-ssm": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "DeepPavlov/rudialogpt3_medium_based_on_gpt2_v2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "Mikivis/xuanxuan": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "uukuguy/speechless-llama2-hermes-orca-platypus-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "KnutJaegersberg/black_goo_recipe_c": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "beaugogh/Llama2-7b-sharegpt4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Salesforce/codet5p-770m-py": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "codefuse-ai/CodeFuse-CodeLlama-34B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "AUTOMATIC/promptgen-majinai-safe": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 6, "vocab_size": 50257}, "reciprocate/shepherd-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Devio/test-22B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "acrastt/Bean-3B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/L2-MythoMax22b-Instruct-Falseblock-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "vihangd/smartplat-3b-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "jinaai/jina-embedding-b-en-v1": {"architectures": ["T5EncoderModel"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "yahma/llama-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "uukuguy/speechless-codellama-orca-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "CHIH-HUNG/llama-2-13b-FINETUNE1_17w": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "VMware/open-llama-13b-open-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ToolBench/ToolLLaMA-2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "luffycodes/mcq-hal-vicuna-13b-v1.5": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "TheBloke/BigTranslate-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 53613}, "PeanutJar/LLaMa-2-PeanutButter_v18_A-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "openbmb/UltraLM-65b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Mikivis/gpt2-large-lora-sft": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50257}, "Devio/test-3b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 16, "vocab_size": 32000}, "akhooli/gpt2-small-arabic": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50257}, "Rardilit/Panther_v1": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ValiantLabs/ShiningValiant": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Devio/test100": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "Devio/testC": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Chronoboros-33B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/Pygmalion-13B-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "vihangd/smartplat-3b-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "laituan245/t5-v1_1-small-smiles2caption-ft-from-pretrained-c4": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "4bit/Llama-2-7b-Chat-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "CHIH-HUNG/llama-2-13b-FINETUNE2_3w-gate_up_down_proj": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "CHIH-HUNG/llama-2-13b-FINETUNE2_3w-q_k_v_o_proj": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/vicuna-33B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "Devio/test-1400": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/gpt4-alpaca-lora-30B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "notstoic/pygmalion-13b-4bit-128g": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "NousResearch/Yarn-Llama-2-7b-128k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Den4ikAI/FRED-T5-LARGE_text_qa": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 50364}, "valhalla/t5-base-qa-qg-hl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32102}, "Undi95/ReMM-L2-13B-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Zarablend-L2-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "KnutJaegersberg/black_goo_recipe_d": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "ckip-joint/bloom-1b1-zh": {"architectures": ["BloomModel"], "hidden_size": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "seonglae/llama-2-13b-chat-hf-gptq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Trelis/Llama-2-7b-chat-hf-sharded-bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "KnutJaegersberg/LLongMA-3b-LIMA": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "h2oai/h2ogpt-gm-oasst1-en-xgen-7b-8k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 51200}, "PeanutJar/LLaMa-2-PeanutButter_v18_B-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ehartford/WizardLM-1.0-Uncensored-CodeLlama-34b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "uukuguy/speechless-codellama-orca-platypus-13b-0.10e": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "DeepESP/gpt2-spanish": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "paust/pko-flan-t5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 50358}, "ThomasNLG/t5-qa_squad2neg-en": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "PharMolix/BioMedGPT-LM-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "eenzeenee/t5-base-korean-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50358}, "porkorbeef/Llama-2-13b-public": {"architectures": ["LlamaModel"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/WizardLM-Uncensored-Falcon-7B-GPTQ": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65025}, "dahara1/weblab-10b-instruction-sft-GPTQ": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4864, "intermediate_size": 19456, "num_attention_heads": 38, "num_hidden_layers": 36, "vocab_size": 50277}, "CHIH-HUNG/llama-2-13b-FINETUNE2_TEST_2.2w": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "mrm8488/t5-small-finetuned-emotion": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "gurgutan/saiga2-13b-4bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "IlyaGusev/rut5_base_sum_gazeta": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 30000}, "TheBloke/Llama-2-13B-German-Assistant-v4-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 37632}, "gaodrew/OpenOrca-Platypus2-13B-thera-1250": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "minlik/chinese-llama-7b-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 49953}, "TheBloke/Stable-Platypus2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Luna-AI-Llama2-Uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "allenai/t5-small-squad2-question-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "microsoft/bloom-deepspeed-inference-fp16": {"architectures": ["BloomModel"], "n_layer": 70, "num_attention_heads": 112, "vocab_size": 250880}, "csebuetnlp/banglat5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "potsawee/t5-large-generation-race-QuestionAnswer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "grammarly/coedit-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32100}, "Narrativaai/bloom-560m-finetuned-totto-table-to-text": {"architectures": ["BloomForCausalLM"], "hidden_size": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "jjaaaww/posi_13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "IDEA-CCNL/Randeng-T5-784M-MultiTask-Chinese": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32596}, "Undi95/Nous-Hermes-13B-Code": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "paust/pko-t5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 50358}, "learnanything/llama-7b-huggingface": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "weiren119/Taiwan-LLaMa-v1.0-4bits-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ml6team/keyphrase-generation-t5-small-inspec": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "TheBloke/CodeLlama-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "Undi95/MLewd-L2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "tscholak/cxmefzzi": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32102}, "Gaivoronsky/ruGPT-3.5-13B-8bit": {"architectures": ["GPT2LMHeadModel"], "n_embd": 5120, "n_head": 40, "n_inner": null, "n_layer": 40, "vocab_size": 50272}, "SatoruDano/llama-2-7b-finetuned_v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ClueAI/PromptCLUE-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "uukuguy/speechless-codellama-orca-airoboros-13b-0.10e": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "anonymous-german-nlp/german-gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 52000}, "fxmarty/gpt2-tiny-onnx": {"intermediate_size": 37, "n_embd": 32, "n_head": 4, "n_inner": null, "n_layer": 5, "vocab_size": 1000}, "prakharz/DIAL-FLANT5-XL": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32100}, "h2oai/h2ogpt-oasst1-falcon-40b": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "synapsoft/Llama-2-7b-chat-hf-flan2022-1.2M": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Undi95/ReMM-L2-13B-PIPPA": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "CHIH-HUNG/llama-2-13b-FINETUNE1_17w-gate_up_down_proj": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Fredithefish/Guanaco-7B-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "conceptofmind/Yarn-Llama-2-13b-128k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Undi95/LewdEngine": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/CodeLlama-7B-Instruct-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "CHIH-HUNG/llama-2-13b-Open_Platypus_and_ccp_2.6w": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "CHIH-HUNG/llama-2-13b-FINETUNE1_17w-q_k_v_o_proj": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-33b-2.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "Salesforce/codet5p-220m-py": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "Danielbrdz/CodeBarcenas-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "SJ-Ray/Re-Punctuate": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "EasthShin/Youth_Chatbot_Kogpt2-base": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 51200}, "ThomasNLG/t5-qg_squad1-en": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "EleutherAI/pythia-160m-deduped-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50304}, "MBZUAI/LaMini-T5-223M": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "HooshvareLab/gpt2-fa": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 42001}, "TFLai/Nova-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "conceptofmind/LLongMA-2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TDC2023/trojan-base-pythia-1.4b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "phpaiola/ptt5-base-summ-xlsum": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "TFLai/SpeechlessV1-Nova-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/stablecode-instruct-alpha-3b-GPTQ": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 49152}, "stanford-crfm/music-small-800k": {"vocab_size": 55028, "n_embd": 768, "n_layer": 12, "n_head": 12, "n_inner": null, "architectures": null}, "TFLai/EnsembleV5-Nova-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "declare-lab/flan-alpaca-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "jpwahle/t5-large-word-sense-disambiguation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "lizhuang144/flan-t5-large-factual-sg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "DKYoon/mt5-base-lm-adapt": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "TheBloke/guanaco-65B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Salesforce/codegen25-7b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 51200}, "bigscience-data/sgpt-bloom-1b7-nli": {"architectures": ["BloomModel"], "n_inner": null, "n_layer": 24, "num_attention_heads": 16, "vocab_size": 250880}, "TurkuNLP/gpt3-finnish-small": {"architectures": ["BloomModel"], "hidden_size": 768, "n_head": 12, "n_layer": 12, "vocab_size": 131072}, "jordiclive/flan-t5-3b-summarizer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "marblyso/DialoGPT-small-what-the-fuck": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "retrieva-jp/t5-small-short": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "codeparrot/codeparrot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_inner": null, "n_layer": 48, "vocab_size": 32768}, "openthaigpt/openthaigpt-1.0.0-beta-7b-chat-ckpt-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 56554}, "Rocketknight1/falcon-rw-1b": {"architectures": ["FalconForCausalLM"], "hidden_size": 2048, "num_attention_heads": 32, "num_hidden_layers": 24, "vocab_size": 50304}, "TaylorAI/Flash-Llama-30M-20001": {"architectures": ["LlamaForCausalLM"], "hidden_size": 384, "intermediate_size": 1024, "num_attention_heads": 12, "num_hidden_layers": 4, "vocab_size": 32000}, "castorini/t5-base-canard": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "r3dhummingbird/DialoGPT-medium-joshua": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "IDEA-CCNL/Wenzhong2.0-GPT2-110M-BertTokenizer-chinese": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 12, "vocab_size": 21133}, "TigerResearch/tigerbot-13b-chat-8bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 60928}, "pranavpsv/gpt2-genre-story-generator": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50266}, "Photolens/llama-2-7b-langchain-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ck46/t5-base-hotpot-qa-qg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32102}, "castorini/monot5-small-msmarco-10k": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "yujiepan/llama-2-tiny-random": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8, "intermediate_size": 32, "num_attention_heads": 2, "num_hidden_layers": 1, "vocab_size": 32000}, "castorini/doc2query-t5-base-msmarco": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "oliverguhr/spelling-correction-multilingual-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "allenai/unifiedqa-t5-11b": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "TheBloke/CodeLlama-34B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "snorkelai/sdnet": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "SiberiaSoft/SiberianFRED-T5-XL": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1536, "num_heads": 24, "num_layers": 24, "vocab_size": 50365}, "sultan/ArabicT5-Base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 20, "vocab_size": 32000}, "nikaashpuri/gpt-expt-sp-v3-K-600-MA-Mac-actions-kmeans-v16": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 1913}, "TheBloke/Yarn-Llama-2-13B-128K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "allenai/cosmo-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "flax-community/gpt2-bengali": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "EleutherAI/pythia-410m-deduped-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 1024, "intermediate_size": 4096, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "Writer/palmyra-small": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 12, "vocab_size": 50257}, "LukasStankevicius/t5-base-lithuanian-news-summaries-175": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "laituan245/molt5-large-caption2smiles": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "google/ul2": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 16384, "d_model": 4096, "num_heads": 16, "num_layers": 32, "vocab_size": 32128}, "Suva/uptag-keyphrase-model": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "TheBloke/orca_mini_7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TusharJoshi89/title-generator": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "juierror/flan-t5-text2sql-with-schema": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "hf-tiny-model-private/tiny-random-T5ForConditionalGeneration": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 32100}, "stacked-summaries/flan-t5-large-stacked-samsum-1024": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TheBloke/WizardLM-33B-V1-0-Uncensored-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "persiannlp/mt5-base-parsinlu-opus-translation_fa_en": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "gurgutan/ruGPT-13B-4bit": {"architectures": ["GPT2LMHeadModel"], "n_embd": 5120, "n_head": 40, "n_inner": null, "n_layer": 40, "vocab_size": 50272}, "TheBloke/upstage-llama-30b-instruct-2048-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "sdadas/polish-gpt2-medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": 4096, "n_layer": 24, "vocab_size": 51200}, "aubmindlab/aragpt2-medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 64000}, "SEBIS/code_trans_t5_large_source_code_summarization_python_multitask_finetune": {"architectures": ["T5Model"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "maximxls/text-normalization-ru-terrible": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 256, "num_heads": 4, "num_layers": 3, "vocab_size": 5120}, "TheBloke/llama-2-13B-Guanaco-QLoRA-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ziqingyang/chinese-alpaca-2-13b-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 55296}, "KETI-AIR/ke-t5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 64128}, "ibm/qcpg-sentences": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32158}, "tiiuae/falcon-rw-7b": {"architectures": ["FalconForCausalLM"], "hidden_size": 4096, "num_attention_heads": 64, "num_hidden_layers": 36, "vocab_size": 65024}, "timdettmers/guanaco-13b-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "h2oai/h2ogpt-oig-oasst1-falcon-40b": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "coffeeee/nsfw-story-generator": {"architectures": ["GPT2Model"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "zpn/llama-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "crumb/bloom-560m-RLHF-SD2-prompter-aesthetic": {"architectures": ["BloomForCausalLM"], "hidden_size": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "kalpeshk2011/dipper-paraphraser-xxl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "TheBloke/WizardLM-13B-V1.0-Uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/WizardLM-13B-V1-1-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "allenai/unifiedqa-t5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "oliverguhr/spelling-correction-german-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "ThomasSimonini/t5-end2end-question-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "asi/gpt-fr-cased-base": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1792, "n_head": 14, "n_inner": null, "n_layer": 24, "vocab_size": 50000}, "lora-x/backpack-gpt2": {"architectures": ["BackpackGPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50264}, "TheBloke/Vigogne-2-13B-Instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ai-forever/ruT5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "ml6team/keyphrase-generation-t5-small-openkp": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "mrm8488/t5-base-finetuned-e2m-intent": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "nikaashpuri/gpt-expt-sp-v3-K-600-MA-Mac-actions-kmeans-v14": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 1902}, "TheBloke/Marx-3b-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/Dolphin-Llama2-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "oscorrea/scores-falcon40b-sm-merged": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "lmqg/t5-small-squad-qag": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32101}, "ehartford/WizardLM-Uncensored-Falcon-40b": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65025}, "persiannlp/mt5-base-parsinlu-sentiment-analysis": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "VietAI/vit5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 36100}, "thanathorn/mt5-cpe-kmutt-thai-sentence-sum": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "Blackroot/Hermes-Kimiko-13B-f16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "CarperAI/stable-vicuna-13b-delta": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "j5ng/kullm-12.8b-GPTQ-8bit": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 30080}, "TheBloke/ReMM-SLERP-L2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Weni/WeniGPT-L-70": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "valhalla/t5-small-qg-hl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32102}, "retrieva-jp/t5-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "TheBloke/Wizard-Vicuna-30B-Superhot-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "openllmplayground/openalpaca_3b_600bt_preview": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "ArmelR/starcoder-gradio-v0": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49152}, "chanind/frame-semantic-transformer-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "akreal/tiny-random-gpt2": {"intermediate_size": 37, "n_embd": 32, "n_head": 4, "n_inner": null, "n_layer": 5, "vocab_size": 99}, "Neko-Institute-of-Science/LLaMA-7B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Writer/palmyra-med-20b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 44, "vocab_size": 50259}, "SiberiaSoft/SiberianPersonaFred": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1536, "num_heads": 24, "num_layers": 24, "vocab_size": 50364}, "mrm8488/spanish-gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "unicamp-dl/translation-en-pt-t5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "OFA-Sys/gsm8k-rft-llama7b-u13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "liuhaotian/LLaVA-13b-delta-v0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32003}, "huggingface/falcon-40b-gptq": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "Ravi07bec/llama-qlora-65b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "PKU-Alignment/alpaca-7b-reproduced": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "Unbabel/gec-t5_small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "TheBloke/Speechless-Llama2-Hermes-Orca-Platypus-WizardLM-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "MIIB-NLP/Arabic-question-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 110080}, "google/t5-large-ssm-nq": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "stanford-crfm/arwen-gpt2-medium-x21": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "sentence-transformers/gtr-t5-xxl": {"architectures": ["T5EncoderModel"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "TheBloke/Nous-Hermes-Llama2-70B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32001}, "paust/pko-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50358}, "allenai/tk-instruct-11b-def": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "amphora/FinABSA": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32105}, "TurkuNLP/gpt3-finnish-13B": {"architectures": ["BloomModel"], "hidden_size": 5120, "n_head": 40, "n_layer": 40, "vocab_size": 131072}, "PAIXAI/Astrid-LLama-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Aalaa/opt-125m-wikitext2": {"architectures": ["OPTForCausalLM"], "hidden_size": 768, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50272}, "hf-internal-testing/tiny-random-GPTNeoXForQuestionAnswering": {"architectures": ["GPTNeoXForQuestionAnswering"], "hidden_size": 32, "intermediate_size": 37, "num_attention_heads": 4, "num_hidden_layers": 5, "vocab_size": 1024}, "cssupport/t5-small-awesome-text-to-sql": {"vocab_size": 32128, "d_model": 512, "d_ff": 2048, "num_layers": 6, "num_heads": 8, "architectures": ["T5ForConditionalGeneration"]}, "TheBloke/MythoMix-L2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "conceptofmind/Hermes-LLongMA-2-13b-8k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "lysandre/arxiv-nlp": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50257}, "Pcik/DialoGPT-medium-Kirby": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "PY007/SLM_1-4B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 2048, "intermediate_size": 5632, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50432}, "ceshine/t5-paraphrase-paws-msrp-opinosis": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "microsoft/bloom-deepspeed-inference-int8": {"architectures": ["BloomModel"], "n_layer": 70, "num_attention_heads": 112, "vocab_size": 250880}, "TheBloke/PuddleJumper-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "gorilla-llm/gorilla-falcon-7b-hf-v0": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "TheBloke/starcoder-GPTQ": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49152}, "lmsys/longchat-7b-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "DAMO-NLP-MT/polylm-1.7b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 256000}, "Salesforce/xgen-7b-4k-base": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 51200}, "DAMO-NLP-MT/polylm-13b": {"architectures": ["PolyLMHeadModel"], "n_embd": 5120, "n_head": 40, "n_inner": 20480, "n_layer": 40, "vocab_size": 256000}, "dbddv01/gpt2-french-small": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "EleutherAI/pythia-70m-deduped-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "algolet/mt5-base-chinese-qg": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250101}, "hf-internal-testing/tiny-random-BloomForQuestionAnswering": {"architectures": ["BloomForQuestionAnswering"], "hidden_size": 32, "n_head": 4, "n_layer": 5, "vocab_size": 1024}, "hf-internal-testing/tiny-random-BloomForTokenClassification": {"architectures": ["BloomForTokenClassification"], "hidden_size": 32, "n_head": 4, "n_layer": 5, "vocab_size": 1024}, "flax-community/t5-base-cnn-dm": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "hf-internal-testing/tiny-random-BloomForSequenceClassification": {"architectures": ["BloomForSequenceClassification"], "hidden_size": 32, "n_head": 4, "n_layer": 5, "vocab_size": 1024}, "tau/t5-v1_1-large-rss": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TheBloke/airoboros-l2-13b-gpt4-m2.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "hf-internal-testing/tiny-random-GPTNeoXForSequenceClassification": {"architectures": ["GPTNeoXForSequenceClassification"], "hidden_size": 32, "intermediate_size": 37, "num_attention_heads": 4, "num_hidden_layers": 5, "vocab_size": 1024}, "allegro/plt5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 50048}, "TheBloke/stable-vicuna-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "hf-internal-testing/tiny-random-GPTNeoXForTokenClassification": {"architectures": ["GPTNeoXForTokenClassification"], "hidden_size": 32, "intermediate_size": 37, "num_attention_heads": 4, "num_hidden_layers": 5, "vocab_size": 1024}, "TheBloke/WizardLM-7B-V1-0-Uncensored-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "HuggingFaceH4/tiny-random-LlamaForSequenceClassification": {"architectures": ["LlamaForSequenceClassification"], "hidden_size": 16, "intermediate_size": 64, "num_attention_heads": 4, "num_hidden_layers": 2, "vocab_size": 32000}, "hf-internal-testing/tiny-random-GPTNeoXModel": {"architectures": ["GPTNeoXModel"], "hidden_size": 32, "intermediate_size": 37, "num_attention_heads": 4, "num_hidden_layers": 5, "vocab_size": 1024}, "IlyaGusev/rut5_base_headline_gen_telegram": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 30000}, "lgaalves/gpt2_camel_physics-platypus": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "lightonai/alfred-40b-0723": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "KETI-AIR/ke-t5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 64128}, "ibm/regen-disambiguation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "vihangd/smartplat-3b-v3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/OpenBuddy-Llama2-13B-v11.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 37632}, "BlinksFly/Harry_Potter-Ai": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "conceptofmind/Yarn-Llama-2-7b-128k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "liujch1998/vera": {"architectures": ["T5EncoderModel"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "kaist-ai/CoT-T5-11B": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "lintang/t5-v1_1-base-flan": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "sentence-transformers/sentence-t5-xxl": {"architectures": ["T5EncoderModel"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "TheBloke/vicuna-7B-v1.5-16K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "retrieva-jp/t5-large-long": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "retrieva-jp/t5-base-long": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "upstage/SOLAR-0-70b-8bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "jerteh/gpt2-vrabac": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 49152}, "Parth/boolean": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "hf-internal-testing/tiny-random-GPTBigCodeForSequenceClassification": {"architectures": ["GPTBigCodeForSequenceClassification"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "hf-internal-testing/tiny-random-GPTBigCodeForTokenClassification": {"architectures": ["GPTBigCodeForTokenClassification"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "megagonlabs/t5-base-japanese-web": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32000}, "MisguidedKerbal/DialoGPT-kerbalV3": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "praeclarum/cuneiform": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "uw-hai/polyjuice": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "reciprocate/tiny-llama": {"architectures": ["LlamaForCausalLM"], "hidden_size": 64, "intermediate_size": 64, "num_attention_heads": 1, "num_hidden_layers": 1, "vocab_size": 32000}, "luqh/ClinicalT5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "stanford-crfm/celebrimbor-gpt2-medium-x81": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/CodeLlama-13B-Python-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "laituan245/molt5-large-smiles2caption": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TurkuNLP/gpt3-finnish-8B": {"architectures": ["BloomModel"], "hidden_size": 4096, "n_head": 32, "n_layer": 32, "vocab_size": 131072}, "NeuML/t5-small-txtsql": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "malteos/bloom-6b4-clp-german": {"hidden_size": 4096, "n_head": 32, "n_layer": 30, "vocab_size": 50304}, "GT4SD/multitask-text-and-chemistry-t5-base-augm": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "allenai/open-instruct-stanford-alpaca-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "CarperAI/randomwalks": {"architectures": ["GPT2LMHeadModel"], "n_embd": 144, "n_head": 6, "n_inner": null, "n_layer": 6, "vocab_size": 23}, "unicamp-dl/mt5-13b-mmarco-100k": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 250112}, "lmqg/t5-small-squad-qg-ae": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32101}, "naltukhov/joke-generator-rus-t5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "hf-internal-testing/tiny-random-UMT5Model": {"architectures": ["UMT5Model"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 256300}, "rentcarsAI/falcon-7b-codegenerator-qlora-merged": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "panggi/t5-base-indonesian-summarization-cased": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "hf-internal-testing/tiny-random-UMT5ForQuestionAnswering": {"architectures": ["UMT5ForQuestionAnswering"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 256300}, "UBC-NLP/AraT5-base": {"d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 110080}, "kmewhort/stable-diffusion-prompt-bolster": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 52000}, "TheBloke/Llama-2-13B-GGML": {}, "gaussalgo/T5-LM-Large-text2sql-spider": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "DAMO-NLP-MT/polylm-multialpaca-13b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 5120, "n_head": 40, "n_inner": 20480, "n_layer": 40, "vocab_size": 256000}, "hf-internal-testing/tiny-random-UMT5ForSequenceClassification": {"architectures": ["UMT5ForSequenceClassification"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 256300}, "tinkoff-ai/ruDialoGPT-small": {"n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50261}, "indonesian-nlp/gpt2-medium-indonesian": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "Salesforce/mixqg-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "EleutherAI/pythia-1b-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 50304}, "NinedayWang/PolyCoder-2.7B": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50304}, "zanchat/falcon-1b": {"architectures": ["RWForCausalLM"], "hidden_size": 2048, "n_head": 32, "n_layer": 24, "vocab_size": 50304}, "Goodnoway/DialoGPT-nerbalV2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "crumb/llama2-7b-shard-bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "sagawa/ReactionT5-retrosynthesis": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 268}, "DKYoon/mt5-large-lm-adapt": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 250112}, "lintang/t5-v1_1-xl-flan": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "castorini/monot5-large-msmarco-10k": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "Ichsan2895/Merak-7B-v3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "stanford-crfm/caprica-gpt2-small-x81": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "vicgalle/gpt2-open-instruct-v1": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50260}, "philschmid/llama-2-7b-instruction-generator": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "aubmindlab/aragpt2-large": {"architectures": ["GPT2LMHeadModel"], "intermediate_size": 5120, "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 64000}, "NonzeroCornet34/DialoGPT-small-philbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Trelis/Llama-2-7b-chat-hf-sharded-bf16-5GB": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "deep-learning-analytics/wikihow-t5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "JDBN/t5-base-fr-qg-fquad": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32102}, "stanford-crfm/durin-gpt2-medium-x343": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "abjbpi/Dwight_Schrute": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "Spico/Humback-Myx": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "T-Systems-onsite/mt5-small-sum-de-en-v2": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250100}, "kaiyuy/leandojo-lean3-tacgen-byt5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3584, "d_model": 1472, "num_heads": 6, "num_layers": 12, "vocab_size": 384}, "pinkmanlove/llama-33b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "lintang/t5-v1_1-large-flan": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "Naseej/noon-7b": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "chizhikchi/sci-five-radsum23": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "impyadav/GPT2-FineTuned-Hinglish-Song-Generation": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "elinas/llama-13b-hf-transformers-4.29": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/GodziLLa2-70B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TheBloke/Llama-2-70B-OASST-1-200-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32016}, "jacobmorrison/tk-instruct-base-lora-experiments": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-3b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "ingen51/DialoGPT-medium-GPT4": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "cointegrated/rut5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 30000}, "PocketDoc/Dans-CreepingSenseOfDoom": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "tsmatz/mt5_summarize_japanese": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "domenicrosati/QA2D-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "gorkemgoknar/gpt2chatbotenglish": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50262}, "DeliveryBoy/DiabloGPT-medium-Kurisu": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "philschmid/instruct-igel-001": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_layer": 30, "vocab_size": 50304}, "xDAN2099/xDAN_13B_Zh_Base": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 60928}, "codeparrot/codeparrot-small": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 32768}, "paust/pko-t5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 50358}, "flozi00/Llama-2-13b-german-assistant-v4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 37632}, "doc2query/msmarco-t5-base-v1": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "microsoft/DialogRPT-depth": {"architectures": ["GPT2ForSequenceClassification"], "n_embd": 1024, "n_head": 16, "n_layer": 24, "vocab_size": 50257}, "nomic-ai/gpt4all-13b-snoozy": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "NousResearch/Yarn-Llama-2-13b-64k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "mrm8488/t5-base-e2e-question-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "postbot/gpt2-medium-emailgen": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "vanilladucky/Friends_chatting_bot_redefined": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/LlongOrca-7B-16K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32003}, "mutamuta/DialoGPT-spongebob-small": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Ar4ikov/gpt2-medium-650k-stable-diffusion-prompt-generator": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/HermesLimaRP-L2-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "clibrain/Llama-2-7b-ft-instruct-es-gptq-4bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Yarn-Llama-2-7B-128K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "lmqg/mt5-small-jaquad-qg-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "allenai/tk-instruct-base-def-pos": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "davidkim205/komt-Llama-2-7b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "tangy0/llama-2-7b-dtlpy_v0.4chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TigerResearch/tigerbot-70b-base": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 60928}, "hadifar/eventextraction": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "TintinMeimei/NousResearch-Llama-2-7b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/airoboros-l2-13b-gpt4-2.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Nekochu/Llama-2-13B-fp16-french": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "minhtoan/t5-translation-vietnamese-nom": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 300, "num_heads": 8, "num_layers": 6, "vocab_size": 30100}, "BELLE-2/BELLE-Llama2-13B-chat-0.4M": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "bigscience/T0": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "andreaskoepf/pythia-1.4b-gpt4all-pretrain": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50288}, "Salesforce/codet5-base-codexglue-clone": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "Chae/scottbot_med": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/LLaMA-7b-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "sagard21/python-code-explainer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "stanfordnlp/SteamSHP-flan-t5-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "MarinHinawa/DialoGPT-medium-Ene": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "aiautomationlab/german-news-title-gen-mt5": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "TheBloke/vicuna-13B-1.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Chronos-Hermes-13B-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "microsoft/DialogRPT-human-vs-machine": {"architectures": ["GPT2ForSequenceClassification"], "n_embd": 1024, "n_head": 16, "n_layer": 24, "vocab_size": 50257}, "uer/gpt2-distil-chinese-cluecorpussmall": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 6, "vocab_size": 21128}, "h2oai/h2ogpt-gm-oasst1-en-2048-falcon-40b-v1": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "davidkim205/komt-Llama-2-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ibm/qcpg-questions": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32155}, "gavin124/gpt2-finetuned-cnn-summarization-v2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50260}, "hogru/MolReactGen-GuacaMol-Molecules": {"architectures": ["GPT2LMHeadModel"], "n_embd": 144, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 93}, "stanford-crfm/darkmatter-gpt2-small-x343": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "conceptofmind/Yarn-Llama-2-7b-64k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Radicalkiddo/DialoGPT-small-Radical": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Ninja5000/DialoGPT-medium-HarryPotter": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "theblackcat102/alpaca-title-generator-mt0-large": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 250112}, "transfaeries/Twilight-Sparkle-GPT": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/Vigogne-2-7B-Instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "markofhope/DialoGPT-medium-HarringtonBot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "seeksery/DialoGPT-calig3": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "beomi/kcgpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 55000}, "vilm/vietcuna-3b": {"architectures": ["BloomForCausalLM"], "hidden_size": 2560, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "IDEA-CCNL/Randeng-T5-784M": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32596}, "vwxyzjn/starcoderbase-triviaqa": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49152}, "IDEA-CCNL/Wenzhong2.0-GPT2-3.5B-chinese": {"architectures": ["GPT2LMHeadModel"], "n_embd": 3072, "n_head": 32, "n_inner": 12288, "n_layer": 30, "vocab_size": 50304}, "TheBloke/Llama-2-7b-Chat-GGUF": {}, "MingZhong/unieval-dialog": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "OpenAssistant/falcon-40b-megacode2-oasst": {"architectures": ["FalconForCausalLM"], "hidden_size": 8192, "num_attention_heads": 128, "num_hidden_layers": 60, "vocab_size": 65152}, "axiong/PMC_LLaMA_13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "codeparrot/codeparrot-small-multi": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 12, "vocab_size": 32768}, "EleutherAI/pythia-6.9b-deduped-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "Riiid/sheep-duck-llama-2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "alibaba-pai/pai-bloom-1b1-text2prompt-sd": {"architectures": ["BloomForCausalLM"], "hidden_size": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "TheBloke/Chronos-Beluga-v2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "malmarjeh/t5-arabic-text-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 110080}, "GarfExit/DialogGPT-medium-707": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "audreycl/DialoGPT-RPF": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "florentiino/DialoGPT-small-harrypotter": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "JazzyLucas/DialoGPT-small-TonyStark": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "marblyso/DialoGPT-medium-marina": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "polandball/GPT-Polen": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "GarrisonBot/DialoGPT-medium-herbertgarrison": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "XuYipei/kw-cutegpt-13b-ift": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 49954}, "TheBloke/Pygmalion-7B-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "timothykim04/DialoGPT-medium-harrypotter": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "allegro/plt5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50048}, "lengoctuong/gpt2-finetuned-wikitext2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "PyaeSoneK/Fine_Tuned_Pythia_smallest_140_legal": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 1024, "intermediate_size": 4096, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "psyche/KoT5-paraphrase-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "microsoft/DialogRPT-width": {"architectures": ["GPT2ForSequenceClassification"], "n_embd": 1024, "n_head": 16, "n_layer": 24, "vocab_size": 50257}, "Dahoas/pythia-1B-static-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "jerteh/gpt2-orao": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 49152}, "TheBloke/LosslessMegaCoder-Llama2-13B-Mini-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32007}, "Ngao/DialoGPT-small-ngao": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "4i-ai/Llama-2-7b-alpaca-es": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "asifhugs/open_llama_7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "RajuKandasamy/tamillama_tiny_30m": {"architectures": ["LlamaForCausalLM"], "hidden_size": 256, "intermediate_size": 786, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 32000}, "stabilityai/StableBeluga1-Delta": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Linly-AI/Chinese-LLaMA-2-7B-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 40076}, "flax-community/gpt2-base-thai": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "shalomma/llama-7b-embeddings": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/llama2-7b-chat-codeCherryPop-qLoRA-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "KhanAdeeb/model-tony-stark": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "spy24/autonlp-UK-to-US-600416931": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "DKYoon/mt5-small-lm-adapt": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "TheBloke/Llama-2-70B-GGML": {}, "TheBloke/model_007-70B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "abhi-8/DialoGPT-medium-Joshua-twevy": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "camenduru/MiniGPT4-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "paripi/Malishka": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "SiberiaSoft/SiberianPersonaFred_large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 50364}, "Alred/t5-small-finetuned-summarization-cnn": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "Leomas/DialoGPT-medium-Leomas": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TehVenom/Pygmalion-7b-Merged-Safetensors": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "marblyso/DialoGPT-medium-pearl": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "lmqg/mt5-small-dequad-qg-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "TheBloke/WizardLM-Uncensored-Falcon-40B-GPTQ": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65025}, "NlpHUST/t5-small-vi-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "Elucia/Diluc_Bot_1.3": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "h2oai/h2ogpt-16k-codellama-34b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "microsoft/CodeGPT-small-java": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 52000}, "Starry/COUNTNARC": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "OpenMEDLab/PULSE-7bv5": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "marblyso/DialoGPT-medium-aubrey": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "Ashypaws/DialoGPT-medium-Ashybot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "YTTD/DialoGPT-medium-sou": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "marblyso/DialoGPT-medium-hero": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "Trelis/Llama-2-7b-chat-hf-function-calling-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "NousResearch/CodeLlama-7b-hf-flash": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "TheBloke/CodeLlama-34B-Python-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "musabgultekin/functionary-7b-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "amasand/gpt2-imdb-pos-ppo": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "bigscience/bloomz-7b1-p3": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 30, "num_attention_heads": 32, "vocab_size": 250880}, "rirv938/wizard-vicuna-13b-uncensored-awq-4bit-g128": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "marblyso/DialoGPT-medium-marblesbagel": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "vilm/vietcuna-7b-v3": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "stas/t5-very-small-random": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 256, "d_model": 64, "num_heads": 4, "num_layers": 8, "vocab_size": 32128}, "KeLiu/Title-Gen": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "vampiregirl/DialoGPT-medium-lennoxram": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "sharpbai/Llama-2-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "sam2ai/openllama_odia_3b_base": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "lmqg/mt5-small-esquad-qg-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "stanfordnlp/SteamSHP-flan-t5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "allenai/tulu-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "JNDankwah/DialoGPT-small-ThorCB": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "lmqg/mt5-small-ruquad-qg": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "lmqg/mt5-small-ruquad-qg-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "Dinocroth/DialoGPT-medium-Trevor-PhilipsV2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "Speedemon/jake-peralta-ai": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "chanind/frame-semantic-transformer-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "stanford-crfm/music-medium-800k": {"vocab_size": 55028, "n_embd": 1024, "n_layer": 24, "n_head": 16, "n_inner": null, "architectures": null}, "h2oai/h2ogpt-16k-codellama-7b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "TheBloke/Pygmalion-2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "huggingface-course/codeparrot-ds": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50000}, "KakoSi/AcciGPT-smol": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "lmqg/mt5-small-itquad-qg-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "EggsInAJar/DialoGPT-small-MerrickBot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "razent/SciFive-large-Pubmed": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "armandnlp/gpt2-TOD_finetuned_SGD": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50271}, "RuterNorway/Llama-2-13b-chat-norwegian": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "AIDC-ai-business/Marcoroni-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "deep-learning-analytics/GrammarCorrector": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "redrussianarmy/gpt2-turkish-cased": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "lmqg/mt5-small-frquad-qg-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "psyche/KoT5-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "loitran/DialoGPT-medium-peppapig": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "openchat/openchat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "saikatc/NatGen": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "Coderhuynin/DialoGPT-large-TonyStark": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "declare-lab/flan-sharegpt-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "TheBloke/Chronos-Hermes-13B-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "usvsnsp/pythia-6.9b-rm-full-hh-rlhf": {"architectures": ["GPTNeoXForSequenceClassification"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50277}, "yujiepan/llama-2-tiny-3layers-random": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8, "intermediate_size": 32, "num_attention_heads": 2, "num_hidden_layers": 3, "vocab_size": 32000}, "allenai/unifiedqa-v2-t5-3b-1363200": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "gsarti/it5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32103}, "simple2312/DialoGPT-Ellie": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "kashif/llama-7b_stack-exchange_RM_peft-adapter-merged": {"architectures": ["LlamaForSequenceClassification"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "larryvrh/mt5-translation-ja_zh": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 250112}, "j5ng/et5-typos-corrector": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 45100}, "vilsonrodrigues/falcon-7b-sharded": {"architectures": ["FalconForCausalLM"], "hidden_size": 4544, "num_attention_heads": 71, "num_hidden_layers": 32, "vocab_size": 65024}, "felinecity/ScaraBot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "persiannlp/mt5-base-parsinlu-translation_en_fa": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "Jonesy/HomersNightOut": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "conceptofmind/LLongMA-2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/LoKuS-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "shibing624/mengzi-t5-base-chinese-correction": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Lamia/DialoGPT-small-Sundrop": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Blizzchor/DialoGPT-medium-gamora": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "jlsalty9999/DialoGPT-medium-Riddle": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "uer/gpt2-chinese-lyric": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 21128}, "LMFlow/Full-Robin-7b-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "llm-book/t5-base-long-livedoor-news-corpus": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "nuggster/DialoGPT-small-ianbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Tristan/gpt2_reward_summarization": {"architectures": ["GPT2ForSequenceClassification"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "MysteriousAmazon/DialoGPT-medium-freddy": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "wdidfau/Pygmalion-13b-Landmark-Attention-Merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "kaiyuy/leandojo-lean3-retriever-byt5-small": {"architectures": ["T5EncoderModel"], "d_ff": 3584, "d_model": 1472, "num_heads": 6, "num_layers": 12, "vocab_size": 384}, "kz919/ntk_scaled_open_llama_3b_32k": {"architectures": ["NTKScaledLlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "abhi-8/DialoGPT-medium-Rick": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "polymath707/llama-2-13b-miniguanaco": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Langboat/bloom-389m-zh": {"architectures": ["BloomForCausalLM"], "hidden_size": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 42437}, "Techcs002/DialoGPT-medium-AboTalkTest": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "AIDC-ai-business/Marcoroni-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "ybelkada/t5-3b-sharded": {"architectures": ["T5WithLMHeadModel"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "benjamin/gerpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "abhi-8/DialoGPT-medium-Michael": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "cahya/gpt2-small-indonesian-522M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "marianna13/flan-t5-base-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Lakoc/fisher_dec_6_layers": {"architectures": ["GPT2Model"], "n_embd": 512, "n_head": 4, "n_inner": null, "n_layer": 6, "vocab_size": 5000}, "simple2312/DialoGPT-nayeon": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "sjrhuschlee/flan-t5-base-squad2": {"architectures": ["T5ForQuestionAnswering"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "eqhylxx/full-vicuna-160m": {"architectures": ["LlamaForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 32000}, "Ashypaws/DialoGPT-medium-Kitaibot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/Wizard-Vicuna-7B-Uncensored-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "NHStudios/DialoGPT-small-jake": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "ZipperXYZ/DialoGPT-medium-TheWorldMachineExpressive2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "IIC/mt5-spanish-mlsum": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "mattymchen/gense-base-plus": {"architectures": ["T5Model"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "DAMO-NLP/SeqGPT-560M": {"architectures": ["BloomForCausalLM"], "hidden_size": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "AMHR/T5-for-Adversarial-Paraphrasing": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Blizzchor/DialoGPT-medium-HarryBotter": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "brianveebee/DialoGPT-medium-bender": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "YTTD/DialoGPT-medium-keiji": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "anon8231489123/gpt4-x-alpaca-13b-native-4bit-128g": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "Pcik/DialoGPT-medium-Dante": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "mHossain/bangla-para-v3-500000": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "TheBloke/Llama-2-7B-GGUF": {}, "diwas7777/HarryBot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "seduerr/t5-small-pytorch": {"architectures": ["T5WithLMHeadModel"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "felinecity/DioloGPT-small-KaeyaBot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "lmsys/vicuna-7b-delta-v0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "inu-ai/dolly-japanese-gpt-1b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 44928}, "TheBloke/Vicuna-33B-1-3-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "Dahoas/pythia-125M-static-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50304}, "Blizzchor/DialoGPT-medium-QuillLord": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "beomi/KoAlpaca-llama-1-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "patrickNLP/Graphix-3B": {"architectures": ["Model"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "Starry/HELLORUKAS": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "keans/DialoGPT-small-highjacker": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "DoesNoPro/DialoGPT-small-RaidenG": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "ThatSkyFox/DialoGPT-medium-whatsapp": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "EnterNameBros/Senko-san-medium-scl": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/CodeLlama-7B-Python-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "mrm8488/t5-small-finetuned-quora-for-paraphrasing": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "NonzeroCornet34/DialoGPT-small-hansolo": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "d0rj/rut5-base-summ": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "CAIRE-CedarsSinai/falcon-7b-qlora-chat-support-bot-faq-alzkb-version-2": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "el-profesor/code_t5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "Soumyajit1008/DialoGPT-small-harryPotterssen": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "malteos/bloom-1b5-clp-german": {"architectures": ["BloomForCausalLM"], "hidden_size": 2048, "n_head": 16, "n_layer": 24, "vocab_size": 50304}, "yesuns/DialoGPT-small-yesun": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Stevo/DiagloGPT-medium-spamton": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Vision-CAIR/vicuna-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "TheBloke/airoboros-33B-gpt4-1-4-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "tanishqvashisht/DialoGPT-small-Joshua": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TurkuNLP/gpt3-finnish-3B": {"architectures": ["BloomModel"], "hidden_size": 2560, "n_head": 32, "n_layer": 32, "vocab_size": 131072}, "lizhuang144/flan-t5-base-VG-factual-sg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "TheBloke/Athena-v1-GGUF": {}, "xxyyy123/test-28b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "pastlecry/DialoGPT-small-harrypotter": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "DiscordRequestsAPI/NurDeeps-Bot-2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "channashi/DialoGPT-small-rocket": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "ritog/bangla-gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TheBloke/Redmond-Puffin-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "Shakerlicious/DialoGPT-small-raquelbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "lmqg/mt5-base-jaquad-qag": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250101}, "anon8231489123/vicuna-13b-GPTQ-4bit-128g": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "jacobmorrison/tk-instruct-small-lora-experiments": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32100}, "TheBloke/open-llama-13b-open-instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "cedpsam/chatbot_fr": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "Photolens/llama-2-13b-langchain-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "avinashshrangee/DialoGPT-small-Ricky": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "YeungNLP/firefly-llama2-7b-pretrain": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 55296}, "efederici/it5-efficient-small-fanpage": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 32, "vocab_size": 32100}, "saikiranmaddukuri/chat_to_sql0.17": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "TheBloke/Llama2-28B-Air03-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 7296, "intermediate_size": 22016, "num_attention_heads": 57, "num_hidden_layers": 40, "vocab_size": 32000}, "crodri/falcon_aguila_meteocat": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 50257}, "Narsil/starcoder-gptq": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49152}, "CobraMamba/mamba-gpt-3b-v4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "YeungNLP/firefly-llama2-13b-pretrain": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 55296}, "TheBloke/airoboros-l2-7b-gpt4-1.4.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "DecafNosebleed/DialoGPT-small-ScaraBot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "yazdipour/text-to-sparql-t5-small-qald9": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "ClassCat/gpt2-base-french": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50000}, "TheBloke/airoboros-33B-GPT4-m2.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "quantumaikr/KoreanLM-1.5b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 1024, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "toyfreak/DialoGPT-small-addy": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "spursyy/mT5_multilingual_XLSum_rust": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "lengoctuong/gpt2-finetuned-chatbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50261}, "KnutJaegersberg/megatron-gpt2-345m-evol_instruct_v2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": 4096, "n_layer": 24, "vocab_size": 50257}, "zkdtckk/falcon40-instruct-qlora-tta-v1": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "TheBloke/Nous-Hermes-13B-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "TheBloke/Nous-Hermes-Llama2-GGML": {}, "IkariDev/Athena-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/WizardLM-7B-V1.0-Uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/llama-2-13B-German-Assistant-v2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "cahya/gpt2-large-indonesian-522M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50257}, "VietAI/envit5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50048}, "kam1run/DialoGPT-large-kami": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50257}, "uukuguy/speechless-codellama-dolphin-orca-platypus-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "aluserhuggingface/DialoGPT-small-harrypotter": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TheBloke/gpt4-x-vicuna-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "Pcik/DialoGPT-medium-Ruby": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/LLaMA-30b-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "sdadas/polish-gpt2-small": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 12, "vocab_size": 51200}, "ahxt/llama2_xs_460M_experimental": {"architectures": ["LlamaForCausalLM"], "hidden_size": 1024, "intermediate_size": 4096, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "lemon234071/t5-base-Chinese": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 35364}, "4bit/pyg-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "squarelike/Gugugo-koen-1.3B-V1.0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 30080}, "lvwerra/t5-imdb": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "psymon/KoLlama2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Maxwere/DiabloGPT-medium-maxbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "nafisehNik/mt5-persian-summary": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "nams/nams-bot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "lmqg/mt5-small-esquad-qag": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "mattbit/gpt2wb": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "ghazikhanihamed/TooT-PLM-P2S": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 48, "vocab_size": 144}, "lonewanderer27/YoshinoriBot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "VinVanGogh/Llama-2-7b-Aixiety-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "GroNLP/gpt2-medium-italian-embeddings": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 30001}, "IDEA-CCNL/Randeng-T5-784M-QA-Chinese": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32601}, "kingbri/airo-llongma-2-13B-16k-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "lvwerra/starcoderbase-gsm8k": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49152}, "mofawzy/gpt2-arabic-sentence-generator": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50000}, "lmqg/mt5-small-itquad-qag": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "sharpbai/Llama-2-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "lomahony/eleuther-pythia70m-hh-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "Salesforce/codet5-large-ntp-py": {"architectures": ["T5WithLMHeadModel"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "TheBloke/Samantha-1.11-CodeLlama-34B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "Lenza/DialoGPT-medium-Kobayashi": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "davidviriato/DialoGPT-small-joshua": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Shakerlicious/DialoGPT-small-descentbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TurkuNLP/gpt3-finnish-xl": {"architectures": ["BloomModel"], "hidden_size": 2064, "n_head": 24, "n_layer": 24, "vocab_size": 131072}, "TheBloke/starcoderplus-GPTQ": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49152}, "TheBloke/Airoboros-L2-7B-2.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Mikivis/gpt2-large-lora-sft1": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50257}, "gagan3012/k2t": {"architectures": ["T5WithLMHeadModel"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "MerlynMind/merlyn-education-safety": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50688}, "explosion-testing/refined-web-model-test": {"architectures": ["RWForCausalLM"], "hidden_size": 32, "n_head": 4, "n_layer": 5, "vocab_size": 1024}, "explosion-testing/falcon-no-parallel-attn-test": {"architectures": ["RWForCausalLM"], "hidden_size": 32, "n_head": 4, "n_layer": 5, "vocab_size": 1024}, "Marxav/frpron": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 268}, "AmbricJohnson5888/claura": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TheBloke/CodeLlama-7B-Instruct-GGUF": {}, "felinecity/DioloGPT-small-LisaBot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "lmqg/mt5-small-frquad-qag": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "RobiKenobi/DialoGPT-medium-pete": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/Vicuna-13B-CoT-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/airoboros-33B-gpt4-1.4-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "SEBIS/code_trans_t5_base_code_documentation_generation_java_multitask": {"architectures": ["T5Model"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "retrieva-jp/t5-base-medium": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "elinas/chronos-13b-4bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "abhinavkulkarni/meta-llama-Llama-2-7b-chat-hf-w4-g128-awq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Luban-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "uer/t5-base-chinese-cluecorpussmall": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 21228}, "ClueAI/ChatYuan-large-v1": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "helenai/gpt2-ov": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "fireballoon/baichuan-vicuna-chinese-7b-gptq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 64000}, "Trelis/Llama-2-7b-chat-hf-hosted-inference-8bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Starry/KARENTRIES": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "umm-maybe/SportsFanGhost": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/airoboros-13B-gpt4-1.4-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TabbyML/StarCoder-1B": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 49152}, "TFLai/Nova-13B-50-step": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "Mikivis/gpt2-large-lora-sft2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50257}, "CHIH-HUNG/llama-2-13b-Open_Platypus_and_ccp_2.6w-3_epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "BlackSamorez/falcon-40b-tiny-testing": {"architectures": ["RWForCausalLM"], "hidden_size": 256, "n_head": 4, "n_layer": 2, "vocab_size": 65024}, "Rocketknight1/tiny-random-falcon-40b": {"architectures": ["FalconForCausalLM"], "hidden_size": 1024, "num_attention_heads": 128, "num_hidden_layers": 2, "vocab_size": 65024}, "TheBloke/WizardLM-1.0-Uncensored-Llama2-13B-GGML": {}, "TheBloke/Zarafusionex-1.1-L2-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "lmqg/t5-large-squad-qg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32101}, "casperhansen/falcon-7b-awq": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "Azure99/blossom-v2-llama2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "DeepESP/gpt2-spanish-medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "StudentLLM/Alpagasus-2-13b-QLoRA-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Weni/WeniGPT": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "niicovila/llama-v2-tst-law": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Undi95/CreativityEngine": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "DB13067/Peterbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "EleutherAI/pythia-12b-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50688}, "allenai/tulu-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "TheBloke/airoboros-l2-13b-gpt4-m2.0-GGML": {}, "TheBloke/Griffin-3B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "imthanhlv/vigpt2medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "m3hrdadfi/gpt2-persian-qa": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50000}, "TheBloke/MythoMax-L2-Kimiko-v2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "CHIH-HUNG/llama-2-13b-FINETUNE1_17w-r16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ppn/DialoGPT-small-harrypotter": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "lmqg/mt5-base-ruquad-qag": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250101}, "TheBloke/Firefly-Llama2-13B-v1.2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "persiannlp/mt5-large-parsinlu-opus-translation_fa_en": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 250112}, "simple2312/DialoGPT-Twice": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "declare-lab/flan-alpaca-xxl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "ChanceFocus/finma-7b-nlp": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "osunlp/attrscore-flan-t5-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "likenneth/honest_llama2_chat_7B": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Hugherinit/hi": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32119}, "vaibhav9/GPT2-qa": {"architectures": ["GPT2ModelForQuestionAnswering"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "st3rl4nce/t5-small-finetuned-pubmed": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "uonlp/okapi-ro-llama": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ThomasNLG/t5-weighter_cnndm-en": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "google/t5-11b-ssm-tqa": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "lizhuang144/flan-t5-small-VG-factual-sg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "hyunjae/skt-kogpt2-kullm-v2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 51200}, "Mirage-Studio/llama-gaan-2-7b-chat-hf-dutch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/LosslessMegaCoder-Llama2-7B-Mini-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32007}, "lmqg/t5-small-squad-qg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32101}, "castorini/doc2query-t5-large-msmarco": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TheBloke/manticore-13b-chat-pyg-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "22h/open-cabrita3b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 52000}, "alzoubi36/priva_t5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "TheBloke/vicuna-7B-v0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "TheBloke/falcon-7b-instruct-GGML": {}, "Rozi05/QuoteVibes_Model_Trained": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Tidum/DialoGPT-large-Michael": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "valhalla/t5-small-qg-prepend": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32102}, "lmqg/t5-large-squad-qag": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32101}, "abhiramtirumala/DialoGPT-sarcastic": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "mindrage/Manticore-13B-Chat-Pyg-Guanaco-GGML": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Salesforce/dialogstudio-t5-base-v1.0": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32103}, "allenai/unifiedqa-v2-t5-base-1363200": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "kleinay/qanom-seq2seq-model-joint": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32101}, "puugz/DialoGPT-small-spiderman": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "UrukHan/t5-russian-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "HuggingFaceH4/tiny-random-LlamaForSeqClass": {"architectures": ["LlamaForSequenceClassification"], "hidden_size": 16, "intermediate_size": 64, "num_attention_heads": 4, "num_hidden_layers": 2, "vocab_size": 32000}, "JosephusCheung/Qwen-LLaMAfied-7B-Chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 151936}, "Abzu/orca-mini-v3-70b-gptq-q4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "wnic00/t5-small-finetune-bilingual-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "ChukSamuels/DialoGPT-small-Dr.FauciBot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "macavaney/doc2query-t5-base-msmarco": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "nlp-waseda/comet-t5-base-japanese": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32000}, "stjiris/t5-portuguese-legal-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Icaruas/V2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "imxly/t5-pegasus": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50000}, "stefan-it/german-gpt2-larger": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50265}, "noahkim/KoT5_news_summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50358}, "hoskinson-center/proofGPT-v0.1": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "TheBloke/WizardMath-7B-V1.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "FieldSu/distil_student_24": {"architectures": ["RWForCausalLM"], "hidden_size": 1136, "n_head": 71, "n_layer": 8, "vocab_size": 65024}, "shyamsn97/Mario-GPT2-700-context-length": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 6, "vocab_size": 50257}, "dgnk007/eagle": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "sharpbai/Llama-2-7b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "jackyv/DialoGPT-small-pinocchio": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "felinecity/DioloGPT-small-KaeyaBot2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "toyfreak/DialoGPT-small-shy": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "chavinlo/alpaca-13b": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "TehVenom/Pygmalion-7b-4bit-GPTQ-Safetensors": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "georgesung/open_llama_7b_qlora_uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ostorc/rick-sanchez-chatbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "polymath707/llama-2-7b-miniguanaco": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "KBlueLeaf/guanaco-7b-leh-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Chronos-Hermes-13B-v2-GGML": {}, "approach0/mathy-vicuna-13B-FFT-phase2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "gorilla-llm/gorilla-7b-hf-delta-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "j5ng/kullm-5.8b-GPTQ-8bit": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 16, "num_hidden_layers": 28, "vocab_size": 30080}, "bitadin/checkpoint-230167": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "NekoPunchBBB/Llama2-13b-hf-Open-Platypus-QLoRA-att": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "mrm8488/t5-small-finetuned-wikiSQL": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "ozcangundes/T5-base-for-BioQA": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "AriakimTaiyo/gpt2-chat": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_layer": 36, "vocab_size": 50257}, "TheBloke/WizardLM-13B-V1.2-GGML": {}, "TheBloke/Trurl-2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "ccore/opt-125-smart-test": {"architectures": ["OPTForCausalLM"], "hidden_size": 768, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50272}, "James-WYang/BigTranslate": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 53613}, "Trelis/Llama-2-7b-chat-hf-function-calling": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Wikidepia/IndoT5-base-paraphrase": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "csebuetnlp/mT5_m2m_crossSum": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "seanmor5/tiny-llama-test": {"architectures": ["LlamaForCausalLM"], "hidden_size": 32, "intermediate_size": 64, "num_attention_heads": 2, "num_hidden_layers": 2, "vocab_size": 32000}, "explosion-testing/refined-web-model-new-decoder-test": {"architectures": ["RWModel"], "hidden_size": 256, "n_head": 4, "n_layer": 5, "vocab_size": 1024}, "jondurbin/airocoder-34b-2.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "lmqg/t5-base-squad-qg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "PORTULAN/gervasio-ptpt-base": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 50304}, "UWB-AIR/barticzech-1.0": {"architectures": ["MBartForConditionalGeneration"], "d_model": 1024, "num_hidden_layers": 12, "vocab_size": 50265}, "TokenBender/llama2-7b-chat-hf-codeCherryPop-qLoRA-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Voicelab/trurl-2-7b-8bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "TheBloke/Llama-2-13B-chat-GGUF": {}, "VietAI/vit5-base-vietnews-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 36096}, "lmqg/t5-small-squad-ae": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32101}, "retrieva-jp/t5-base-short": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "grammarly/coedit-xxl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32100}, "heack/HeackMT5-ZhSum100k": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "TheBloke/LLaMA-13b-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TFMC/ELYZA-japanese-Llama-2-7b-instruct-GPTQ-4bit-64g": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "mxmax/Chinese_Chat_T5_Base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "elinas/chronos-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "kajdun/iubaris-13b-v3_GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jmeadows17/MathT5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32104}, "TheBloke/Kimiko-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "nlp-waseda/gpt2-small-japanese": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 32000}, "rshrott/description-together-ai": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "noah-ai/mt5-base-question-generation-vi": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "AI4PD/ZymCTRL": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 458}, "bitadin/gpt-4-long-titles-v2-flan-t5-base-llm-12": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "shorthillsai/flan-t5-large-absa": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TheBloke/CodeLlama-13B-oasst-sft-v10-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "prithivida/active_to_passive_styletransfer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "lcw99/t5-large-korean-text-summary": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 50358}, "EleutherAI/pythia-1.4b-deduped-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "sdadas/polish-gpt2-large": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": 5120, "n_layer": 36, "vocab_size": 51200}, "uonlp/okapi-vi-bloom": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "cenkersisman/gpt2-turkish-900m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "IlyaGusev/rugpt_large_turbo_instructed": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50258}, "Waterhorse/chessgpt-base-v1": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "jondurbin/spicyboros-13b-2.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "echarlaix/t5-small-openvino": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "mrm8488/santacoder-finetuned-the-stack-bash-shell": {"architectures": ["GPT2LMHeadCustomModel"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 49280}, "ckip-joint/bloom-3b-zh": {"architectures": ["BloomForCausalLM"], "hidden_size": 2560, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "Dawnstarhunter/DialoGPT-medium-Eveline": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "lmqg/t5-base-squad-ae": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "mesolitica/finetune-translation-t5-small-standard-bahasa-cased-v2": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "liuhaotian/LLaVA-7b-delta-v0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32004}, "yzhuang/autotree_llama_small_snxor_l1_2_vit": {"architectures": ["LlamaForAutoTree"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 6, "vocab_size": 32000}, "mrm8488/t5-base-finetuned-wikiSQL-sql-to-en": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "aleksickx/llama-7b-hf": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "yongzx/pythia-70m-sft-hh": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "sonoisa/t5-base-english-japanese": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "BramVanroy/Llama-2-13b-chat-dutch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Bhuvana/t5-base-spellchecker": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "PlanTL-GOB-ES/gpt2-base-bne": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50261}, "lmqg/mt5-small-jaquad-qg": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "Mirage-Studio/llama-gaan-2-7b-chat-hf-dutch-epoch-5": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "microsoft/DialogRPT-human-vs-rand": {"architectures": ["GPT2ForSequenceClassification"], "n_embd": 1024, "n_head": 16, "n_layer": 24, "vocab_size": 50257}, "aubmindlab/aragpt2-mega": {"architectures": ["GPT2LMHeadModel"], "intermediate_size": 6144, "n_embd": 1536, "n_head": 24, "n_inner": null, "n_layer": 48, "vocab_size": 64000}, "liyuesen/druggpt": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 53083}, "conceptofmind/Hermes-LLongMA-2-7b-8k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/scarlett-33B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/EverythingLM-13b-V2-16K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "sartmis1/starcoder-v2-openapi-special-tokens": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49152}, "TheBloke/Phind-CodeLlama-34B-v1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "TheBloke/Yarn-Llama-2-7B-64K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Dolphin-Llama-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "kfkas/Legal-Llama-2-ko-7b-Chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 46336}, "Ichsan2895/Merak-7B-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "allenai/unifiedqa-v2-t5-base-1251000": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "sagawa/ReactionT5-product-prediction": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 268}, "lmqg/mt5-small-jaquad-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "Narrativa/mT5-base-finetuned-tydiQA-xqa": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "allenai/macaw-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "gagan3012/k2t-new": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "google/t5-efficient-tiny-nl2": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 256, "num_heads": 4, "num_layers": 2, "vocab_size": 32128}, "sam2ai/open_llama_3b_odia_gptq_128_4bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "lmqg/mt5-small-dequad-qg": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "mrm8488/mT5-small-finetuned-tydiqa-for-xqa": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "zjunlp/knowlm-13b-zhixi": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "h2oai/h2ogpt-16k-codellama-13b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "mymusise/gpt2-medium-chinese": {"architectures": ["TFGPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 8021}, "ai-forever/mGPT-13B": {"architectures": ["GPT2LMHeadModel"], "n_embd": 5120, "n_head": 40, "n_inner": null, "n_layer": 40, "vocab_size": 100000}, "TinaLiHF/fined-tuned-T5small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "TheBloke/airoboros-l2-7B-gpt4-2.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Mihakram/AraT5-base-question-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 110080}, "fjungstedt/t5-criteria-text-to-json": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "luqh/ClinicalT5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "h2oai/h2ogpt-16k-codellama-13b-python": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "masakhane/afri-mt5-base": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "YeungNLP/bloom-1b4-zh": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 24, "num_attention_heads": 16, "vocab_size": 46145}, "shekharchatterjee/temp-model-174": {}, "TheBloke/Kimiko-v2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jeffwan/vicuna-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "kz919/ntk_scaled_open_llama_13b_32k": {"architectures": ["NTKScaledLlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "lmqg/t5-base-squad-qg-ae": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "r3dhummingbird/DialoGPT-small-harrypotter": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "camenduru/MiniGPT4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "TheBloke/open-llama-7b-open-instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "MoinFaisal/Llama-2-7b-chat-finetune": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/CodeLlama-13B-Instruct-GGUF": {}, "fbellame/llama2-pdf-to-quizz-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "fractalego/fact-checking": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "michelecafagna26/gpt2-medium-finetuned-sst2-sentiment": {"architectures": ["GPT2ForSequenceClassification"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/WizardLM-Uncensored-SuperCOT-StoryTelling-30B-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "TheBloke/Airoboros-7B-GPT4-1-4-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Airoboros-L2-70B-GPT4-m2.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Arc53/docsgpt-7b-falcon": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "alenusch/mt5large-ruparaphraser": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 250112}, "ApoTro/slovak-t5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32100}, "microsoft/dolly-v2-7b-olive-optimized": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50280}, "huggingtweets/gordonramsay": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "prithivida/formal_to_informal_styletransfer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "model-attribution-challenge/gpt2-xl": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_layer": 48, "vocab_size": 50257}, "saiful9379/Bangla_GPT2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 33391}, "deepse/CodeUp-Llama-2-7b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ziqingyang/chinese-llama-2-13b-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 55296}, "ChandlerU11/t5_fine": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "TheBloke/Guanaco-3B-Uncensored-v2-GPTQ": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "mamiksik/T5-commit-message-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32104}, "conceptofmind/Yarn-Llama-2-13b-64k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "mesolitica/llama-13b-hf-16384-fpf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Sao10K/Stheno-1.2-L2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "gsarti/it5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32103}, "Den4ikAI/FRED-T5-XL-interpreter": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1536, "num_heads": 24, "num_layers": 24, "vocab_size": 50364}, "TheBloke/WizardCoder-Guanaco-15B-V1.1-GPTQ": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49153}, "seonglae/llama-2-7b-chat-hf-gptq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/llama2_7b_chat_uncensored-GGML": {}, "ecosumit/gpt-model": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "allegro/plt5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 50048}, "cointegrated/rut5-small": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 20100}, "it5/it5-large-question-answering": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32103}, "tscholak/1zha5ono": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32102}, "optible/unifiedqa-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "CleverShovel/falcon-7b-instruct-sharded-bf16": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "TheBloke/Pygmalion-13B-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "BlackSamorez/llama-2-tiny-testing": {"architectures": ["LlamaForCausalLM"], "hidden_size": 128, "intermediate_size": 11008, "num_attention_heads": 8, "num_hidden_layers": 2, "vocab_size": 2000}, "ianagra/Llama-2-7b-ALLM-virtual-sales-assistant": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "quantumaikr/KoreanLM-3B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 2048, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "quantumaikr/llama-2-70B-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Deniskin/gpt3_medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50264}, "ozcangundes/mt5-small-turkish-summarization": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "EleutherAI/pythia-1b-deduped-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 50304}, "flozi00/Llama-2-7b-german-assistant-v3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Mikivis/gpt2-large-lora-stf4": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50257}, "AK270802/DialoGPT-small-harrypotter": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "EleutherAI/pythia-12b-deduped-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50688}, "EricPeter/Llama-2-multilingual": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Pygmalion-2-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "miguelvictor/python-gpt2-large": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_layer": 36, "vocab_size": 50257}, "h2oai/h2ogpt-16k-codellama-7b-python": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "ammarinjtkrbh/llama-2-7b-food-search": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "GroNLP/gpt2-small-dutch": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 40000}, "pszemraj/opt-350m-email-generation": {"architectures": ["OPTForCausalLM"], "hidden_size": 1024, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50265}, "caffsean/t5-small-finetuned-keyword-to-text-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "lmqg/mt5-small-dequad-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "yuyijiong/T5-large-sentiment-analysis-Chinese-MultiTask": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32596}, "sonoisa/t5-qiita-title-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "YeungNLP/firefly-bloom-1b4": {"architectures": ["BloomForCausalLM"], "hidden_size": 2048, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 46145}, "samwit/koala-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Vicuna-13B-1-3-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Photolens/OpenOrcaxOpenChat-2-13b-langchain-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "Ichsan2895/Merak-7B-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "flozi00/Llama-2-7b-german-assistant-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ss1612/loki-chat": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "OpenBuddy/openbuddy-falcon-7b-v5-fp16": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 70144}, "wellecks/llmstep-mathlib4-pythia2.8b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50278}, "dariolopez/llama-2-7b-oasst1-es": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/WizardLM-1.0-Uncensored-CodeLlama-34B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "brad1141/gpt2-finetuned-comp2": {"architectures": ["GPT2ForTokenClassification"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TheBloke/chronos-hermes-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "lizhuang144/flan-t5-large-VG-factual-sg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "nivos/pythia-410m-deduped-finetuned-final-activity-text-10epoch": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 1024, "intermediate_size": 4096, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "HamidRezaAttar/gpt2-product-description-generator": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TheBloke/ORCA_LLaMA_70B_QLoRA-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "lmsys/vicuna-13b-delta-v0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "jacobmorrison/tk-instruct-xl-lora-experiments": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32100}, "GroNLP/gpt2-small-italian": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 30001}, "yihsuan/mt5_chinese_small": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250100}, "YTTD/DialoGPT-medium-souv2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "arubenruben/ptt5-portuguese-cnn-dailymail-azure-pt-pt": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "localmodels/Llama-2-7B-Chat-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "lgaalves/llama-2-13b-chat-platypus": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "it5/it5-large-question-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32103}, "psyche/KoT5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "TheBloke/Llama2-70B-OASST-SFT-v10-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32007}, "deepparag/Aeona": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "lmqg/mt5-small-koquad-qg": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "lmqg/mt5-small-esquad-qg": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "NinedayWang/PolyCoder-0.4B": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 1024, "intermediate_size": 4096, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "ConvLab/t5-small-nlu-multiwoz21": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "SIC98/GPT2-python-code-generator": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "lmqg/mt5-small-itquad-qg": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "kaiyuy/leandojo-lean4-tacgen-byt5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3584, "d_model": 1472, "num_heads": 6, "num_layers": 12, "vocab_size": 384}, "usvsnsp/pythia-6.9b-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "PlanTL-GOB-ES/gpt2-large-bne": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_layer": 36, "vocab_size": 50261}, "jordiclive/flan-t5-11b-summarizer-filtered": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "Jordine/scpoo": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "behnamsh/gpt2_camel_physics": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "lmqg/mt5-small-esquad-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "MerlynMind/merlyn-education-teacher-assistant": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50688}, "mesolitica/llama-7b-hf-16384-fpf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "MatthisHoules/rat-t5-qdmr-grounded-with-db": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "persiannlp/mt5-small-parsinlu-qqp-query-paraphrasing": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "lmqg/mt5-small-koquad-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "lmqg/mt5-small-itquad-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "openthaigpt/openthaigpt-gpt2-instructgpt-poc-0.0.4": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50268}, "ChanceFocus/finma-7b-full": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "vivekraina/Llama-2-7b-hf-8bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "dpml/vicuna_mt_450s": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "burberg92/resume_summary": {"architectures": ["BloomForCausalLM"], "hidden_size": 2048, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "Monero/Pygmalion-Metharme-7b-4bit-TopScore": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Icaruas/7bill8k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32002}, "dahara1/ELYZA-japanese-Llama-2-7b-fast-instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 45043}, "TheBloke/Yarn-Llama-2-13B-64K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "prithivida/passive_to_active_styletransfer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "lmqg/mt5-small-frquad-qg": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "JamesStratford/PLord-bot-DialoGPT-medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "yizhangliu/prompt-extend": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 52000}, "lmqg/mt5-small-frquad-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "Beltenebros/DialoGPT-small-PerionOfGaul": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "sominw/rel23_conll": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "mncai/SGPT-5.8B-wiki-mirae-bank_securities-epoch5": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 16, "num_hidden_layers": 28, "vocab_size": 30080}, "MickyMike/VulRepair": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32105}, "ybelkada/t5-11b-sharded": {"architectures": ["T5WithLMHeadModel"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "Einmalumdiewelt/T5-Base_GNAD_MaxSamples": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "digitous/13B-HyperMantis_GPTQ_4bit-128g": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "weqweasdas/hh_rlhf_rm_open_llama_3b": {"architectures": ["LlamaForSequenceClassification"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/WizardMath-13B-V1.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "ziqingyang/chinese-alpaca-2-7b-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 55296}, "valhalla/t5-base-squad": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "ELiRF/mt5-base-dacsa-es": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "abhitopia/question-answer-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32102}, "TurkuNLP/gpt3-finnish-large": {"architectures": ["BloomModel"], "hidden_size": 1536, "n_head": 16, "n_layer": 24, "vocab_size": 131072}, "Abyss-fyf/DialoGPT-small-discord": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TheBloke/OpenOrca-Platypus2-13B-GGML": {}, "TheBloke/Airoboros-L2-7B-2.1-GGUF": {}, "huggingtweets/googleai": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "it5/it5-base-question-answering": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32103}, "woodmtaylor/DialoGPT-medium-Heej": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "kimdwan/t5-base-korean-summarize-LOGAN": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50358}, "Narrativa/mT5-base-finetuned-tydiQA-question-generation": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "huggingtweets/normmacdonald": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "r3dhummingbird/DialoGPT-medium-neku": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "yhavinga/t5-v1.1-base-dutch-cnn-test": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32103}, "kennethhendricks/DialoGPT-medium-jared-hendricks-gen1": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "retrieva-jp/t5-small-long": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "TheBloke/Vigogne-2-7B-Chat-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TigerResearch/tigerbot-7b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 60928}, "Fredithefish/Guanaco-13B-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "consciousAI/question-answering-generative-t5-v1-base-s-q-c": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "TheBloke/open-llama-7B-v2-open-instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "mosama/Llama-2-Medical-Merged-LoRA": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "bullmount/quanIta_t5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32103}, "YeungNLP/bloomz-396m-zh": {"architectures": ["BloomForCausalLM"], "hidden_size": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 46145}, "GreenBitAI/LLaMA-7B-2bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "chgk13/decicoder-1b-openvino-int8": {"architectures": ["DeciCoderForCausalLM"], "hidden_size": 2048, "intermediate_size": 5888, "num_attention_heads": 32, "num_hidden_layers": 20, "vocab_size": 49152}, "bigscience/bloomz-mt": {"architectures": ["BloomForCausalLM"], "n_layer": 70, "num_attention_heads": 112, "vocab_size": 250880}, "LarkAI/codet5p-770m_nl2sql_oig": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "Linly-AI/Chinese-Falcon-7B": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 90046}, "ckip-joint/bloom-3b-zh-instruct": {"architectures": ["BloomForCausalLM"], "hidden_size": 2560, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "sgr23/llama2-fine-tuned-dolly-15k-dto": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "edbeeching/gpt2-imdb": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "cardiffnlp/flan-t5-small-tweet-emotion": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "TheBloke/airoboros-7B-gpt4-1.4-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/CodeLlama-7B-GGUF": {}, "TheBloke/Airoboros-c34B-2.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "byeongal/Ko-DialoGPT": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 51200}, "ismaelfaro/gpt2-poems.en": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "tuner007/t5_abs_qa": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "kennethhendricks/DialoGPT-medium-PowPowGaming": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "DunnBC22/flan-t5-base-text_summarization_data": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "zarakiquemparte/hermeslimarp-l2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "MagicLEMP/llamavocat_13B_mixed_16K": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "4bit/ELYZA-japanese-Llama-2-7b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "EnglishVoice/t5-base-us-to-uk-english": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "devanshipatel/t5-gec-english-125k": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "helloollel/vicuna-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "nferroukhi/WizardLM-Uncensored-Falcon-7b-sharded-bf16": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65025}, "dacorvo/tiny-random-gpt2-neuronx": {"intermediate_size": 37, "n_embd": 32, "n_head": 4, "n_inner": null, "n_layer": 5, "vocab_size": 1000}, "JamesStratford/Pidrow-bot-DialoGPT-Large-Feb2023": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50257}, "tsuyuan/Llama-2-7b-unit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 41218}, "OFA-Sys/gsm8k-rft-llama7b2-u13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "uer/gpt2-chinese-ancient": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 25370}, "YTTD/DialoGPT-medium-safv3": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "Neko-Institute-of-Science/LLaMA-65B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TheBloke/Spicyboros-13B-2.2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "IDEA-CCNL/Randeng-T5-77M-MultiTask-Chinese": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32596}, "coreml-projects/Llama-2-7b-chat-coreml": {"architectures": ["LlamaForCausalLM"], "vocab_size": 32000}, "oscorrea/scores-lince-sm": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "morzecrew/FRED-T5-RefinedPersonaChat": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1536, "num_heads": 24, "num_layers": 24, "vocab_size": 50364}, "anjakuzev/harry_7": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Mythalion-13B-GGUF": {}, "Kryptone/monikAI": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/Luna-AI-Llama2-Uncensored-GGML": {}, "mlabonne/llama-2-7b-miniguanaco": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Hermes-LLongMA-2-7B-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "zlsl/l_erotic_kink_chat": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50259}, "Sao10K/Stheno-Inverted-1.2-L2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "castorini/duot5-base-msmarco": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "mrm8488/t5-base-finetuned-qasc": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "entropy/gpt2_zinc_87m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 2707}, "MarkyMarx/DialoGPT-medium-jimmybot2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "stefan-it/secret-gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Narrativa/byt5-base-tweet-hate-detection": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3968, "d_model": 1536, "num_heads": 12, "num_layers": 18, "vocab_size": 384}, "nicholasKluge/Aira-2-124M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50261}, "TheBloke/Samantha-1.11-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "castorini/monot5-large-msmarco": {"architectures": ["T5Model"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "PoloHuggingface/French_grammar_error_corrector": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32102}, "cambridgeltl/magic_mscoco": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50259}, "Gatozu35/tortoise-tts": {"architectures": ["GPT2InferenceModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 30, "vocab_size": 604}, "abacusai/Giraffe-v1-delta-13b-scaled-16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "flozi00/Llama-2-13B-german-assistant-v3-4bit-autogptq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "HAERAE-HUB/tulu_13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "doc2query/msmarco-14langs-mt5-base-v1": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "Maciel/T5Corrector-base-v2": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "vilm/vietcuna-3b-v2": {"architectures": ["BloomForCausalLM"], "hidden_size": 2560, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "TitanML/ct2-int8-falcon-7b-instruct": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "ybelkada/llama-7b-GPTQ-test": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "h2oai/h2ogpt-16k-codellama-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "TigerResearch/tigerbot-70b-chat-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 60928}, "Supiri/t5-base-conversation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "msterbentz/t5-base-break-high": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "igorktech/rut5-small-chit-chat-intelligent": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 20100}, "kuleshov/llama-7b-4bit": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "hipnologo/gpt2-imdb-finetune": {"architectures": ["GPT2ForSequenceClassification"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "qwopqwop/danbooru-llama-gptq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "t-dai-con/gpt-fine-tuned-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Platypus2-70B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "KETI-AIR/ke-t5-base-ko": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 64128}, "doc2query/all-t5-base-v1": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "GT4SD/multitask-text-and-chemistry-t5-base-standard": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "uer/gpt2-medium-chinese-cluecorpussmall": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 21128}, "UBC-NLP/AraT5-base-title-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 110080}, "dsivakumar/text2sql": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "power-greg/super-fast-llm": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2048, "n_head": 16, "n_inner": 2048, "n_layer": 4, "vocab_size": 2048}, "AlexWortega/instruct_rugptMedium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50263}, "hiyouga/Llama-2-Chinese-13b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "frank098/llama2-13b-8k-vnf-virtualization": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "OFA-Sys/gsm8k-rft-llama7b-sample100": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "EnterNameBros/Senko-ai-medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "PeanutJar/LLaMa-2-PeanutButter_v19_R8-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Sao10K/Medusa-1.1-L2-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ChrisVCB/DialoGPT-medium-cmjs": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "indonesian-nlp/gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "persiannlp/mt5-small-parsinlu-squad-reading-comprehension": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "stmnk/codet5-small-code-summarization-python": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "emozilla/LLongMA-2-13b-16k-flash": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "bigscience/bloom-petals": {"architectures": ["BloomForCausalLM"], "hidden_size": 14336, "n_head": 112, "n_layer": 70, "vocab_size": 250880}, "procesaur/gpt2-srlat": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 52000}, "ashwinR/CodeExplainer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "Chirayu/nl2pandas": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "OpenBuddy/openbuddy-falcon-7b-v6-bf16": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 70144}, "swbaek/tulu_65b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32001}, "huggingtweets/wallstreetbets": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Sultannn/gpt2-ft-id-puisi": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 35000}, "sonoisa/sentence-t5-base-ja-mean-tokens": {"architectures": ["T5Model"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "sdadas/polish-gpt2-xl": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_inner": 6400, "n_layer": 48, "vocab_size": 51200}, "sjrhuschlee/flan-t5-large-squad2": {"architectures": ["T5ForQuestionAnswering"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "Hnabil/t5-address-standardizer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Gryphe/MythoLogic-Mini-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Athena-v1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Undi95/MythoMax-L2-Kimiko-v2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "magnifi/llama-augmented-contextual-2-epoch-6-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "doc2query/msmarco-chinese-mt5-base-v1": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "Sakuna/t5_grammar_checker": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Dahoas/pythia-1B-response-full-static-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "localmodels/Vicuna-7B-v1.3-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Sao10K/Stheno-1.1-L2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "mlabonne/drllama-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "IlyaGusev/rugpt3medium_sum_gazeta": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "describeai/gemini": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "mojians/E2E-QA-Mining": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32102}, "TheBloke/Wizard-Vicuna-13B-Uncensored-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "dnagpt/human_gpt2-v1": {"architectures": ["GPT2Model"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 25000}, "heegyu/WizardVicuna-Uncensored-pythia-160m-deduped": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50304}, "maximuslee07/llama-2-7b-rockwell": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "DylanJHJ/fidt5-base-nq": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "laituan245/molt5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "DancingIguana/music-generation": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 6, "vocab_size": 25000}, "Qiliang/flan-t5-large-summarization-finetuned-xsum": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TheBloke/Vicuna-7B-CoT-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "hpcaitech/openmoe-base": {"architectures": ["OpenMoeForCausalLM"], "hidden_size": 768, "intermediate_size": 2048, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 256384}, "CalderaAI/13B-Thorns-l2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "CHIH-HUNG/llama-2-13b-FINETUNE1_17w-r4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "IlyaGusev/rugpt_medium_turbo_instructed": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50258}, "pankajmathur/orca_alpaca_3b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/Wizard-Vicuna-7B-Uncensored-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "abacusai/Giraffe-v1-delta-13b-scaled-4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Huginn-v3-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "bloom-testing/test-bloomd-350m-main": {"architectures": ["BloomModel"], "n_inner": null, "n_layer": 24, "num_attention_heads": 16, "vocab_size": 250880}, "AI-Sweden/gpt-sw3-356m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": 4096, "n_layer": 24, "vocab_size": 64000}, "raymondho/DialoGPT-small-harry": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TheBloke/airochronos-33B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/OpenChat_v3.2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "ahnyeonchan/OpenOrca-AYT-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "stanford-crfm/expanse-gpt2-small-x777": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "doc2query/msmarco-german-mt5-base-v1": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "ku-nlp/gpt2-medium-japanese-char": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 6000}, "llm-blender/gen_fuser_3b": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32100}, "lomahony/eleuther-pythia2.8b-hh-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50304}, "TheBloke/Llama2-22B-GPLATTY-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "grammarly/coedit-xl-composite": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32100}, "imuncomfortable/DiabloGPT-small-CocoAtarashi": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "kaiyuy/leandojo-lean3-retriever-tacgen-byt5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3584, "d_model": 1472, "num_heads": 6, "num_layers": 12, "vocab_size": 384}, "michaelwzhu/Chinese-LlaMA2-13B-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 55296}, "Xenova/llama2.c-stories110M": {"architectures": ["LlamaForCausalLM"], "hidden_size": 768, "intermediate_size": 2048, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 32000}, "Youngwoo9/T5_Pyeongsan": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50358}, "literallywood/DialoGPT-small-ekansh": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "jondurbin/spicyboros-7b-2.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "indobenchmark/indogpt": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 40005}, "it5/it5-efficient-small-el32-news-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 32, "vocab_size": 32100}, "mesolitica/finetune-translation-t5-base-standard-bahasa-cased-v2": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "Den4ikAI/FRED-T5-XL_instructor": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1536, "num_heads": 24, "num_layers": 24, "vocab_size": 50365}, "mlabonne/gpt2-GPTQ-4bit": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "steerapi/Llama-2-7b-chat-hf-onnx": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Langboat/bloom-1b4-zh": {"architectures": ["BloomForCausalLM"], "hidden_size": 2048, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 46145}, "neulab/docprompting-codet5-python-doc-retriever": {"architectures": ["BERTScorerForCL"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "AI-Sweden/gpt-sw3-20b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 44, "vocab_size": 64000}, "syndi-models/article-title-generator": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "vgaraujov/Dummy5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "TFLai/Orca-Nova-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "allenai/tk-instruct-11b-def-pos": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "aspis/gpt2-genre-story-generation": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50267}, "lcw99/t5-base-korean-paraphrase": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50358}, "Celestinian/TopicGPT": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50257}, "TheBloke/Redmond-Hermes-Coder-GPTQ": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49153}, "TheBloke/h2ogpt-gm-oasst1-en-2048-falcon-7b-v3-GPTQ": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "abhinavkulkarni/meta-llama-Llama-2-13b-chat-hf-w4-g128-awq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jypppp/llama-2-7b-manual_GPT_ver2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Llama-2-7B-32K-Instruct-GGML": {}, "TheBloke/Yarn-Llama-2-7B-128K-GGML": {}, "quantumaikr/KoreanLM-llama-2-7B-finetuned": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 46336}, "google/t5-xl-ssm-nq": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "nikokons/gpt2-greek": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 22000}, "NYTK/PULI-GPT-3SX": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50048}, "Futyn-Maker/rugpt3small_based_on_gpt2-finetuned_teachers_quotes_small": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50264}, "localmodels/Llama-2-13B-Chat-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "SebastianSchramm/UniNER-7B-all-GPTQ-4bit-128g-actorder_True": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Airoboros-L2-70B-2.1-Creative-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "casperhansen/vicuna-7b-v1.5-awq-gemv": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "IDEA-CCNL/Wenzhong-GPT2-3.5B": {"architectures": ["GPT2LMHeadModel"], "n_embd": 3072, "n_head": 32, "n_inner": 12288, "n_layer": 30, "vocab_size": 50304}, "antoinelouis/belgpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50257}, "atkh6673/DialoGPT-small-trump": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "huggingface-course/mt5-small-finetuned-amazon-en-es": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "malteos/gpt2-xl-wechsel-german": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_inner": 6400, "n_layer": 48, "vocab_size": 50304}, "KES/caribe-capitalise": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "pszemraj/flan-t5-large-instruct-dolly_hhrlhf": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "Tanmay09516/StableBeluga-7B-sharded-bf16-5GB": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "abhinavkulkarni/codellama-CodeLlama-7b-Python-hf-w4-g128-awq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Spicyboros-7B-2.2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "huggingtweets/elonmusk": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "BelleGroup/BELLE-7B-2M": {"architectures": ["BloomModel"], "n_inner": null, "n_layer": 30, "num_attention_heads": 32, "vocab_size": 250880}, "snoop2head/Gomoku-GPT2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 256, "n_head": 4, "n_inner": null, "n_layer": 4, "vocab_size": 404}, "AnimusOG/pygmalion-7b-4bit-128g-cuda-2048Token": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/airoboros-l2-7B-gpt4-m2.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Juniplayground/Mist_LLaMA-2-7B-1024_V3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "DataLinguistic/DataLinguistic-34B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32001}, "erikycd/chatbot_hadita": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50262}, "medicalai/ClinicalGPT-base-zh": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "TheBloke/orca_mini_v2_13b-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "NIRVANA/T5_academic_paraphraser": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "josmunpen/mt5-small-spanish-summarization": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "shahp7575/gpt2-horoscopes": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50260}, "yihsuan/best_model_0427_small_long": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250100}, "malteos/bloom-6b4-clp-german-oasst-v0.1": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_layer": 30, "vocab_size": 50272}, "openllmplayground/openalpaca_7b_700bt_preview": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Gaivoronsky/ruGPT-3.5-13B-fp16": {"architectures": ["GPT2LMHeadModel"], "n_embd": 5120, "n_head": 40, "n_inner": null, "n_layer": 40, "vocab_size": 50272}, "universeTBD/astrollama": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "gorkemgoknar/gpt2-small-turkish": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "huggingtweets/joejoinerr": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Prarabdha/T5-Transformer-RickBot": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "beomi/kollama-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 52000}, "mohammadtaghizadeh/flan-t5-base-imdb-text-classification": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "nicholasKluge/Aira-Instruct-774M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50259}, "bhenrym14/airoboros-7b-gpt4-1.4.1-lxctx-PI-16384-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Alireza1044/michael_bert_lm": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 6, "vocab_size": 50257}, "shibing624/gpt2-dialogbot-base-chinese": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 10, "vocab_size": 13317}, "mesolitica/finetune-summarization-ms-t5-base-standard-bahasa-cased": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "lmqg/flan-t5-large-squad-qg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32101}, "circulus/alpaca-7b": {"architectures": ["LlaMAForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "reeducator/vicuna-13b-free": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "flozi00/Llama-2-13b-german-assistant-v6-4bit-autogptq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 37632}, "SasnayaLetovka/tinkoff-zhientaev-model": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50262}, "mesolitica/t5-base-standard-bahasa-cased": {"architectures": ["T5Model"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "EllyPony/flutterbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "pszemraj/flan-t5-xl-grammar-synthesis": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "jinxuewen/vicuna-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "fireballoon/baichuan-llama-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 64000}, "TheBloke/Vicuna-7B-v1-3-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "scural/arxiv_model": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "Undi95/CodeEngine": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Codexister/DialoGPT-medium-KafkaBotV1": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "google/t5-xxl-ssm-nq": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "uer/gpt2-chinese-couplet": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 21128}, "nicholasKluge/Aira-Instruct-355M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50259}, "HIT-SCIR/huozi-7b-sft": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "NousResearch/CodeLlama-13b-Instruct-hf-flash": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "Enno-Ai/vigogne2-enno-13b-sft-lora-4bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "sonoisa/t5-base-japanese-article-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Kyrmasch/t5-kazakh-qa": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 18947}, "TheBloke/airoboros-13b-gpt4-1.4-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Kimiko-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "arya555/vicuna-7b-v1.5-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Geo/gpt2_custom_c_q_and_a": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "laituan245/molt5-small-smiles2caption": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "cloudqi/cqi_brain_memory_summarizer_large_pt_v0": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "ybelkada/bloom-1b7-8bit": {"architectures": ["BloomForCausalLM"], "hidden_size": 2048, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "snipaid/snip-igel-500-v2-adapter-merged": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_layer": 30, "vocab_size": 50304}, "TabbyML/SantaCoder-1B": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 49280}, "TheBloke/Guanaco-33B-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "hanseokhyeon/kullm-polyglot-5.8b-v2-GPTQ": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 16, "num_hidden_layers": 28, "vocab_size": 30080}, "CAIRE-CedarsSinai/falcon-7b-qlora-chat-support-bot-faq-alzkb-version-1": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "pranavpsv/genre-story-generator-v2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50266}, "nandakishormpai/t5-small-machine-articles-tag-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "ITG/DialoGPT-medium-spanish-chitchat": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "4bit/falcon-7b-instruct-GPTQ": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "OpenBuddy/openbuddy-openllama-7b-v5-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 38449}, "papahawk/keya-560m": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 24, "num_attention_heads": 16, "vocab_size": 250880}, "abhinavkulkarni/tiiuae-falcon-40b-instruct-w4-g128-awq": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "funstoryai/immersiveL-exp": {"architectures": ["BloomForCausalLM"], "hidden_size": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "Benson/llama-2-7b-miniguanaco-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "clancystudios/DialoGPT-medium-Morty": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "huggingtweets/realdonaldtrump": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "charanhu/text_to_sql_2": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32102}, "beomi/kollama-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 52000}, "IDEA-CCNL/Ziya-LLaMA-13B-v1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 39424}, "nicholasKluge/Aira-Instruct-PT-1B7": {"architectures": ["BloomForCausalLM"], "hidden_size": 2048, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250683}, "TheBloke/Llama2-22B-Daydreamer-v3-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "yongzx/pythia-160m-sft-hh": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50304}, "h2oai/h2ogpt-16k-codellama-34b-python": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "nedima68/author_articles_GPT2_textgen_TR": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 52001}, "IronChef/MascotAI_Open_LLaMA_FINAL": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "unionai/pythia-1B-deduped-wikipedia-8bit": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "Chirayu/nl2cql": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "TheBloke/Nous-Puffin-70B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TheBloke/Llama-2-70B-Orca-200k-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TheBloke/Llama-2-70B-chat-GGUF": {}, "sartmis1/CodeLlama-34b-instruct-openapi": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "flax-community/bengali-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32103}, "csebuetnlp/mT5_m2o_hindi_crossSum": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "huggingtweets/fabrizioromano": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "yshen99/ZhiGuoLiZheng-GPT2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 21128}, "malalejandra/putinspeaks": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Intel/fid_flan_t5_base_nq": {"architectures": ["FusionInDecoderForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "sjrhuschlee/flan-t5-base-mnli": {"architectures": ["T5ForSequenceClassification"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "TheBloke/Codegen25-7B-mono-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 51200}, "frank098/starcoder-vyatta": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49152}, "Xenova/llama2.c-stories42M": {"architectures": ["LlamaForCausalLM"], "hidden_size": 512, "intermediate_size": 1376, "num_attention_heads": 8, "num_hidden_layers": 8, "vocab_size": 32000}, "flozi00/Llama-2-13b-german-assistant-v5": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 37632}, "Andrei-Alex/Fine-Tuned-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/vicuna-7B-1.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "sharpbai/alpaca-7b-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "Clakmann/t5-base-Clakmann-thesis": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "RuterNorway/Llama-2-13b-chat-norwegian-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "osieosie/bloom-560m-4bit": {"architectures": ["BloomForCausalLM"], "hidden_size": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "paulowoicho/t5-podcast-summarisation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "liujch1998/rainier-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "gsdas/qct5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "nicholasKluge/Aira-Instruct-1B5": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_inner": null, "n_layer": 48, "vocab_size": 50259}, "kajdun/iubaris-13b-v3_GGML": {}, "csebuetnlp/mT5_m2o_english_crossSum": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "dehio/german-qg-t5-quad": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "emil2000/dialogpt-for-french-language": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "khalidsaifullaah/bengali-lyricist-gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "thinhda/chatbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "Finnish-NLP/llama-7b-finnish": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 64256}, "ehartford/WizardLM-7B-V1.0-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/WizardCoder-Guanaco-15B-V1.0-GPTQ": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49153}, "DUOMO-Lab/TransGPT-v0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 49954}, "TheBloke/Platypus2-70B-Instruct-GGUF": {}, "lmqg/t5-large-squad-qg-ae": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32101}, "rubentito/hivt5-base-mpdocvqa": {"architectures": ["HiVT5"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "cosimoiaia/Loquace-70m": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "metamyth/jennyNew": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "AlexWortega/LLama2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "totally-not-an-llm/AlpacaCielo2-7b-8k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/orca_mini_v3_7B-GGML": {}, "zjunlp/knowlm-13b-base-v1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ticoAg/gpt2-tigerbot-pt-zh": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "akshat3492/mT5": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "TheBloke/Falcon-180B-Chat-GGUF": {}, "unicamp-dl/mt5-base-mmarco-v2": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "malteos/gpt2-wechsel-german-ds-meg": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 12, "vocab_size": 50304}, "phpaiola/ptt5-base-summ-temario": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "mesolitica/finetune-translation-t5-super-tiny-standard-bahasa-cased": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 256, "num_heads": 6, "num_layers": 2, "vocab_size": 32100}, "ademfatnassi/bonjourGPT-small": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "pr1me/llama2_13b_eros_instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "Xenova/llama2.c-stories15M": {"architectures": ["LlamaForCausalLM"], "hidden_size": 288, "intermediate_size": 768, "num_attention_heads": 6, "num_hidden_layers": 6, "vocab_size": 32000}, "sekarmulyani/gpt2-ulasan-beauty-products-gen": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "akhooli/gpt2-small-arabic-poetry": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50257}, "mrm8488/spanish-t5-small-sqac-for-qa": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32103}, "flozi00/falcon-7b-german-assistant-v2": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65040}, "TheBloke/llama-2-13B-chat-limarp-v2-merged-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ticoAg/gpt2-tiger-sft-zh": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "thiagomf/Llama-2-7b-hf-sharded-bf16-1GB": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "THUMT/mGPT": {"architectures": ["GPT2LMHeadModel"], "vocab_size": 250100, "n_embd": 1024, "n_layer": 24, "n_head": 16, "n_inner": 4096}, "lmqg/flan-t5-base-squad-qg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-700bt": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Phind-CodeLlama-34B-Python-v1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "arogov/llama2_13b_chat_uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ai-forever/mGPT-1.3B-bulgarian": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2048, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 100000}, "davesoma/SageBeluga13": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "pssubitha/llama-2-7b-sales-force-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "PyaeSoneK/pythia_70m_legalQA": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "hidude562/OpenMusenet-2.1-L": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50259}, "abeiler/huggingface-goatLora-goatV9-testData-morePushes": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "abinayam/gpt-2-tamil": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "persiannlp/mt5-base-parsinlu-squad-reading-comprehension": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "pierreguillou/t5-base-qa-squad-v1.1-portuguese": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "lchaloupsky/czech-gpt2-oscar": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50257}, "OpenHust/viet-gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "tiansz/ChatYuan-7B-merge": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "voidful/llama-v2-unit-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 8195}, "taaredikahan23/Llama-2-7b-chat-finetune": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "deutsche-telekom/mt5-small-sum-de-en-v1": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250100}, "hetpandya/t5-small-tapaco": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "sunhao666/chi-sum2": {"architectures": ["T5Model"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 21228}, "smartik/mt5-small-finetuned-gec-0.2": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "PORTULAN/gervasio-ptbr-base": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 50304}, "clibrain/Llama-2-13b-ft-instruct-es-gptq-4bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "soketlabs/bhasha-7b-2k-hi": {"architectures": ["MPTForCausalLM"], "d_model": 4096, "vocab_size": 61772}, "codefuse-ai/CodeFuse-13B": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 100831}, "Sentdex/GPyT": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 52000}, "it5/it5-large-news-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32103}, "FredZhang7/distilgpt2-stable-diffusion": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 6, "vocab_size": 50257}, "Rostlab/ProstT5_fp16": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 150}, "approach0/mathy-vicuna-13B-FFT": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "lighteternal/gpt2-finetuned-greek": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "stanford-crfm/battlestar-gpt2-small-x49": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "stacked-summaries/flan-t5-small-stacked-samsum-1024": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "TigerResearch/tigerbot-7b-base-v1": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250680}, "Chang-Su/llama-2-13b-chat-ko": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 39478}, "Clakmann/t5-base-Clakmann-thesis-epoch10": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "yekaraoglann/results": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "bitadin/gpt-4-medium-titles-v2-flan-t5-base-llm-6": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "google/t5_11b_trueteacher_and_anli": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "TaylorAI/Flash-Llama-30M": {"architectures": ["LlamaForCausalLM"], "hidden_size": 384, "intermediate_size": 1024, "num_attention_heads": 12, "num_hidden_layers": 4, "vocab_size": 32000}, "flax-community/t5-base-wikisplit": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "razent/SciFive-large-Pubmed_PMC": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "inkoziev/rugpt_chitchat": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50258}, "lomahony/eleuther-pythia410m-hh-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 1024, "intermediate_size": 4096, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "TheBloke/Vicuna-13B-v1.3-German-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "emozilla/LLongMA-2-13b-storysummarizer": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "yongzx/pythia-1b-sft-hh": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 50304}, "TheBloke/airoboros-13b-gpt4-1.4-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "heegyu/llama-2-ko-7b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 46336}, "flozi00/Llama-2-7b-german-assistant-v3-4bit-autogptq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "zarakiquemparte/zararp-l2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Sao10K/Stheno-1.3-L2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TsinghuaAI/CPM-Generate": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2560, "n_head": 32, "n_inner": null, "n_layer": 32, "vocab_size": 30000}, "AlexWortega/instruct_rugptlarge": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50263}, "tatsu-lab/alpaca-7b-wdiff": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}} \ No newline at end of file diff --git a/asset-manifest.json b/asset-manifest.json index 21303c8..0dcf5e5 100644 --- a/asset-manifest.json +++ b/asset-manifest.json @@ -1,15 +1,15 @@ { "files": { - "main.css": "/gpu_poor/static/css/main.66e7bbdc.css", - "main.js": "/gpu_poor/static/js/main.8b24599d.js", + "main.css": "/gpu_poor/static/css/main.456c3b59.css", + "main.js": "/gpu_poor/static/js/main.48479645.js", "static/js/787.dccdf937.chunk.js": "/gpu_poor/static/js/787.dccdf937.chunk.js", "index.html": "/gpu_poor/index.html", - "main.66e7bbdc.css.map": "/gpu_poor/static/css/main.66e7bbdc.css.map", - "main.8b24599d.js.map": "/gpu_poor/static/js/main.8b24599d.js.map", + "main.456c3b59.css.map": "/gpu_poor/static/css/main.456c3b59.css.map", + "main.48479645.js.map": "/gpu_poor/static/js/main.48479645.js.map", "787.dccdf937.chunk.js.map": "/gpu_poor/static/js/787.dccdf937.chunk.js.map" }, "entrypoints": [ - "static/css/main.66e7bbdc.css", - "static/js/main.8b24599d.js" + "static/css/main.456c3b59.css", + "static/js/main.48479645.js" ] } \ No newline at end of file diff --git a/index.html b/index.html index 3279e2e..443b649 100644 --- a/index.html +++ b/index.html @@ -1 +1 @@ -LLM memory check
\ No newline at end of file +LLM memory check
\ No newline at end of file diff --git a/static/css/main.66e7bbdc.css b/static/css/main.456c3b59.css similarity index 57% rename from static/css/main.66e7bbdc.css rename to static/css/main.456c3b59.css index 5c6a823..cd0f9a5 100644 --- a/static/css/main.66e7bbdc.css +++ b/static/css/main.456c3b59.css @@ -1,4 +1,4 @@ /* ! tailwindcss v3.3.3 | MIT License | https://tailwindcss.com -*/*,:after,:before{border:0 solid #e5e7eb;box-sizing:border-box}:after,:before{--tw-content:""}html{-webkit-text-size-adjust:100%;-webkit-font-feature-settings:normal;font-feature-settings:normal;font-family:ui-sans-serif,system-ui,-apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Helvetica Neue,Arial,Noto Sans,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol,Noto Color Emoji;font-variation-settings:normal;line-height:1.5;tab-size:4}body{line-height:inherit}hr{border-top-width:1px;color:inherit;height:0}abbr:where([title]){-webkit-text-decoration:underline dotted;text-decoration:underline dotted}h1,h2,h3,h4,h5,h6{font-size:inherit;font-weight:inherit}a{color:inherit;text-decoration:inherit}b,strong{font-weight:bolder}code,kbd,pre,samp{font-family:ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,monospace;font-size:1em}small{font-size:80%}sub,sup{font-size:75%;line-height:0;position:relative;vertical-align:initial}sub{bottom:-.25em}sup{top:-.5em}table{border-collapse:collapse;border-color:inherit;text-indent:0}button,input,optgroup,select,textarea{-webkit-font-feature-settings:inherit;font-feature-settings:inherit;color:inherit;font-family:inherit;font-size:100%;font-variation-settings:inherit;font-weight:inherit;line-height:inherit;margin:0;padding:0}button,select{text-transform:none}[type=button],[type=reset],[type=submit],button{-webkit-appearance:button;background-color:initial;background-image:none}:-moz-focusring{outline:auto}:-moz-ui-invalid{box-shadow:none}progress{vertical-align:initial}::-webkit-inner-spin-button,::-webkit-outer-spin-button{height:auto}[type=search]{-webkit-appearance:textfield;outline-offset:-2px}::-webkit-search-decoration{-webkit-appearance:none}::-webkit-file-upload-button{-webkit-appearance:button;font:inherit}summary{display:list-item}blockquote,dd,dl,figure,h1,h2,h3,h4,h5,h6,hr,p,pre{margin:0}fieldset{margin:0}fieldset,legend{padding:0}menu,ol,ul{list-style:none;margin:0;padding:0}dialog{padding:0}textarea{resize:vertical}input::-webkit-input-placeholder,textarea::-webkit-input-placeholder{color:#9ca3af;opacity:1}input::placeholder,textarea::placeholder{color:#9ca3af;opacity:1}[role=button],button{cursor:pointer}:disabled{cursor:default}audio,canvas,embed,iframe,img,object,svg,video{display:block;vertical-align:middle}img,video{height:auto;max-width:100%}[hidden]{display:none}*,:after,:before{--tw-border-spacing-x:0;--tw-border-spacing-y:0;--tw-translate-x:0;--tw-translate-y:0;--tw-rotate:0;--tw-skew-x:0;--tw-skew-y:0;--tw-scale-x:1;--tw-scale-y:1;--tw-pan-x: ;--tw-pan-y: ;--tw-pinch-zoom: ;--tw-scroll-snap-strictness:proximity;--tw-gradient-from-position: ;--tw-gradient-via-position: ;--tw-gradient-to-position: ;--tw-ordinal: ;--tw-slashed-zero: ;--tw-numeric-figure: ;--tw-numeric-spacing: ;--tw-numeric-fraction: ;--tw-ring-inset: ;--tw-ring-offset-width:0px;--tw-ring-offset-color:#fff;--tw-ring-color:rgba(59,130,246,.5);--tw-ring-offset-shadow:0 0 #0000;--tw-ring-shadow:0 0 #0000;--tw-shadow:0 0 #0000;--tw-shadow-colored:0 0 #0000;--tw-blur: ;--tw-brightness: ;--tw-contrast: ;--tw-grayscale: ;--tw-hue-rotate: ;--tw-invert: ;--tw-saturate: ;--tw-sepia: ;--tw-drop-shadow: ;--tw-backdrop-blur: ;--tw-backdrop-brightness: ;--tw-backdrop-contrast: ;--tw-backdrop-grayscale: ;--tw-backdrop-hue-rotate: ;--tw-backdrop-invert: ;--tw-backdrop-opacity: ;--tw-backdrop-saturate: ;--tw-backdrop-sepia: }::-webkit-backdrop{--tw-border-spacing-x:0;--tw-border-spacing-y:0;--tw-translate-x:0;--tw-translate-y:0;--tw-rotate:0;--tw-skew-x:0;--tw-skew-y:0;--tw-scale-x:1;--tw-scale-y:1;--tw-pan-x: ;--tw-pan-y: ;--tw-pinch-zoom: ;--tw-scroll-snap-strictness:proximity;--tw-gradient-from-position: ;--tw-gradient-via-position: ;--tw-gradient-to-position: ;--tw-ordinal: ;--tw-slashed-zero: ;--tw-numeric-figure: ;--tw-numeric-spacing: ;--tw-numeric-fraction: ;--tw-ring-inset: ;--tw-ring-offset-width:0px;--tw-ring-offset-color:#fff;--tw-ring-color:rgba(59,130,246,.5);--tw-ring-offset-shadow:0 0 #0000;--tw-ring-shadow:0 0 #0000;--tw-shadow:0 0 #0000;--tw-shadow-colored:0 0 #0000;--tw-blur: ;--tw-brightness: ;--tw-contrast: ;--tw-grayscale: ;--tw-hue-rotate: ;--tw-invert: ;--tw-saturate: ;--tw-sepia: ;--tw-drop-shadow: ;--tw-backdrop-blur: ;--tw-backdrop-brightness: ;--tw-backdrop-contrast: ;--tw-backdrop-grayscale: ;--tw-backdrop-hue-rotate: ;--tw-backdrop-invert: ;--tw-backdrop-opacity: ;--tw-backdrop-saturate: ;--tw-backdrop-sepia: }::backdrop{--tw-border-spacing-x:0;--tw-border-spacing-y:0;--tw-translate-x:0;--tw-translate-y:0;--tw-rotate:0;--tw-skew-x:0;--tw-skew-y:0;--tw-scale-x:1;--tw-scale-y:1;--tw-pan-x: ;--tw-pan-y: ;--tw-pinch-zoom: ;--tw-scroll-snap-strictness:proximity;--tw-gradient-from-position: ;--tw-gradient-via-position: ;--tw-gradient-to-position: ;--tw-ordinal: ;--tw-slashed-zero: ;--tw-numeric-figure: ;--tw-numeric-spacing: ;--tw-numeric-fraction: ;--tw-ring-inset: ;--tw-ring-offset-width:0px;--tw-ring-offset-color:#fff;--tw-ring-color:rgba(59,130,246,.5);--tw-ring-offset-shadow:0 0 #0000;--tw-ring-shadow:0 0 #0000;--tw-shadow:0 0 #0000;--tw-shadow-colored:0 0 #0000;--tw-blur: ;--tw-brightness: ;--tw-contrast: ;--tw-grayscale: ;--tw-hue-rotate: ;--tw-invert: ;--tw-saturate: ;--tw-sepia: ;--tw-drop-shadow: ;--tw-backdrop-blur: ;--tw-backdrop-brightness: ;--tw-backdrop-contrast: ;--tw-backdrop-grayscale: ;--tw-backdrop-hue-rotate: ;--tw-backdrop-invert: ;--tw-backdrop-opacity: ;--tw-backdrop-saturate: ;--tw-backdrop-sepia: }.fixed{position:fixed}.inset-0{inset:0}.m-auto{margin:auto}.mt-24{margin-top:6rem}.inline-block{display:inline-block}.flex{display:flex}.hidden{display:none}.w-24{width:6rem}.w-3\/4{width:75%}.w-32{width:8rem}.w-48{width:12rem}.w-64{width:16rem}.transform{-webkit-transform:translate(var(--tw-translate-x),var(--tw-translate-y)) rotate(var(--tw-rotate)) skewX(var(--tw-skew-x)) skewY(var(--tw-skew-y)) scaleX(var(--tw-scale-x)) scaleY(var(--tw-scale-y));transform:translate(var(--tw-translate-x),var(--tw-translate-y)) rotate(var(--tw-rotate)) skewX(var(--tw-skew-x)) skewY(var(--tw-skew-y)) scaleX(var(--tw-scale-x)) scaleY(var(--tw-scale-y))}.cursor-pointer{cursor:pointer}.content-center{align-content:center}.justify-center{justify-content:center}.rounded{border-radius:.25rem}.rounded-lg{border-radius:.5rem}.border{border-width:1px}.border-black{--tw-border-opacity:1;border-color:rgb(0 0 0/var(--tw-border-opacity))}.border-blue-500{--tw-border-opacity:1;border-color:rgb(59 130 246/var(--tw-border-opacity))}.border-gray-300{--tw-border-opacity:1;border-color:rgb(209 213 219/var(--tw-border-opacity))}.border-gray-400{--tw-border-opacity:1;border-color:rgb(156 163 175/var(--tw-border-opacity))}.border-gray-500{--tw-border-opacity:1;border-color:rgb(107 114 128/var(--tw-border-opacity))}.border-gray-600{--tw-border-opacity:1;border-color:rgb(75 85 99/var(--tw-border-opacity))}.border-red-500{--tw-border-opacity:1;border-color:rgb(239 68 68/var(--tw-border-opacity))}.bg-black{--tw-bg-opacity:1;background-color:rgb(0 0 0/var(--tw-bg-opacity))}.bg-blue-100{--tw-bg-opacity:1;background-color:rgb(219 234 254/var(--tw-bg-opacity))}.bg-gray-100{--tw-bg-opacity:1;background-color:rgb(243 244 246/var(--tw-bg-opacity))}.bg-gray-200{--tw-bg-opacity:1;background-color:rgb(229 231 235/var(--tw-bg-opacity))}.bg-green-50{--tw-bg-opacity:1;background-color:rgb(240 253 244/var(--tw-bg-opacity))}.bg-red-100{--tw-bg-opacity:1;background-color:rgb(254 226 226/var(--tw-bg-opacity))}.bg-white{--tw-bg-opacity:1;background-color:rgb(255 255 255/var(--tw-bg-opacity))}.bg-opacity-50{--tw-bg-opacity:0.5}.p-4{padding:1rem}.px-1{padding-left:.25rem;padding-right:.25rem}.px-4{padding-left:1rem;padding-right:1rem}.py-1{padding-bottom:.25rem;padding-top:.25rem}.py-2{padding-bottom:.5rem;padding-top:.5rem}.pb-1{padding-bottom:.25rem}.pb-2{padding-bottom:.5rem}.pl-2{padding-left:.5rem}.pl-4{padding-left:1rem}.pl-6{padding-left:1.5rem}.pl-8{padding-left:2rem}.pr-2{padding-right:.5rem}.pr-4{padding-right:1rem}.pr-6{padding-right:1.5rem}.pt-1{padding-top:.25rem}.pt-3{padding-top:.75rem}.pt-8{padding-top:2rem}.text-center{text-align:center}.font-mono{font-family:ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,monospace}.font-serif{font-family:ui-serif,Georgia,Cambria,Times New Roman,Times,serif}.text-2xl{font-size:1.5rem;line-height:2rem}.text-base{font-size:1rem;line-height:1.5rem}.text-sm{font-size:.875rem;line-height:1.25rem}.text-xs{font-size:.75rem;line-height:1rem}.font-bold{font-weight:700}.font-semibold{font-weight:600}.text-black{--tw-text-opacity:1;color:rgb(0 0 0/var(--tw-text-opacity))}.text-blue-600{--tw-text-opacity:1;color:rgb(37 99 235/var(--tw-text-opacity))}.text-blue-700{--tw-text-opacity:1;color:rgb(29 78 216/var(--tw-text-opacity))}.text-gray-600{--tw-text-opacity:1;color:rgb(75 85 99/var(--tw-text-opacity))}.text-red-700{--tw-text-opacity:1;color:rgb(185 28 28/var(--tw-text-opacity))}.underline{text-decoration-line:underline}.shadow-xl{--tw-shadow:0 20px 25px -5px rgba(0,0,0,.1),0 8px 10px -6px rgba(0,0,0,.1);--tw-shadow-colored:0 20px 25px -5px var(--tw-shadow-color),0 8px 10px -6px var(--tw-shadow-color);box-shadow:0 0 #0000,0 0 #0000,var(--tw-shadow);box-shadow:var(--tw-ring-offset-shadow,0 0 #0000),var(--tw-ring-shadow,0 0 #0000),var(--tw-shadow)}.transition-transform{transition-duration:.15s;transition-property:-webkit-transform;transition-property:transform;transition-property:transform,-webkit-transform;transition-timing-function:cubic-bezier(.4,0,.2,1)}.duration-300{transition-duration:.3s}body{-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale;font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Oxygen,Ubuntu,Cantarell,Fira Sans,Droid Sans,Helvetica Neue,sans-serif;margin:0}code{font-family:source-code-pro,Menlo,Monaco,Consolas,Courier New,monospace}.hover\:scale-110:hover{--tw-scale-x:1.1;--tw-scale-y:1.1;-webkit-transform:translate(var(--tw-translate-x),var(--tw-translate-y)) rotate(var(--tw-rotate)) skewX(var(--tw-skew-x)) skewY(var(--tw-skew-y)) scaleX(var(--tw-scale-x)) scaleY(var(--tw-scale-y));transform:translate(var(--tw-translate-x),var(--tw-translate-y)) rotate(var(--tw-rotate)) skewX(var(--tw-skew-x)) skewY(var(--tw-skew-y)) scaleX(var(--tw-scale-x)) scaleY(var(--tw-scale-y))}.hover\:cursor-not-allowed:hover{cursor:not-allowed}.hover\:border-2:hover{border-width:2px}.hover\:border-black:hover{--tw-border-opacity:1;border-color:rgb(0 0 0/var(--tw-border-opacity))}.hover\:bg-blue-200:hover{--tw-bg-opacity:1;background-color:rgb(191 219 254/var(--tw-bg-opacity))}.hover\:bg-gray-300:hover{--tw-bg-opacity:1;background-color:rgb(209 213 219/var(--tw-bg-opacity))}.hover\:bg-red-200:hover{--tw-bg-opacity:1;background-color:rgb(254 202 202/var(--tw-bg-opacity))}.hover\:text-3xl:hover{font-size:1.875rem;line-height:2.25rem}.hover\:font-bold:hover{font-weight:700}@media (min-width:768px){.md\:w-1\/2{width:50%}}@media (min-width:1024px){.lg\:w-1\/3{width:33.333333%}} -/*# sourceMappingURL=main.66e7bbdc.css.map*/ \ No newline at end of file +*/*,:after,:before{border:0 solid #e5e7eb;box-sizing:border-box}:after,:before{--tw-content:""}html{-webkit-text-size-adjust:100%;-webkit-font-feature-settings:normal;font-feature-settings:normal;font-family:ui-sans-serif,system-ui,-apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Helvetica Neue,Arial,Noto Sans,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol,Noto Color Emoji;font-variation-settings:normal;line-height:1.5;tab-size:4}body{line-height:inherit}hr{border-top-width:1px;color:inherit;height:0}abbr:where([title]){-webkit-text-decoration:underline dotted;text-decoration:underline dotted}h1,h2,h3,h4,h5,h6{font-size:inherit;font-weight:inherit}a{color:inherit;text-decoration:inherit}b,strong{font-weight:bolder}code,kbd,pre,samp{font-family:ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,monospace;font-size:1em}small{font-size:80%}sub,sup{font-size:75%;line-height:0;position:relative;vertical-align:initial}sub{bottom:-.25em}sup{top:-.5em}table{border-collapse:collapse;border-color:inherit;text-indent:0}button,input,optgroup,select,textarea{-webkit-font-feature-settings:inherit;font-feature-settings:inherit;color:inherit;font-family:inherit;font-size:100%;font-variation-settings:inherit;font-weight:inherit;line-height:inherit;margin:0;padding:0}button,select{text-transform:none}[type=button],[type=reset],[type=submit],button{-webkit-appearance:button;background-color:initial;background-image:none}:-moz-focusring{outline:auto}:-moz-ui-invalid{box-shadow:none}progress{vertical-align:initial}::-webkit-inner-spin-button,::-webkit-outer-spin-button{height:auto}[type=search]{-webkit-appearance:textfield;outline-offset:-2px}::-webkit-search-decoration{-webkit-appearance:none}::-webkit-file-upload-button{-webkit-appearance:button;font:inherit}summary{display:list-item}blockquote,dd,dl,figure,h1,h2,h3,h4,h5,h6,hr,p,pre{margin:0}fieldset{margin:0}fieldset,legend{padding:0}menu,ol,ul{list-style:none;margin:0;padding:0}dialog{padding:0}textarea{resize:vertical}input::-webkit-input-placeholder,textarea::-webkit-input-placeholder{color:#9ca3af;opacity:1}input::placeholder,textarea::placeholder{color:#9ca3af;opacity:1}[role=button],button{cursor:pointer}:disabled{cursor:default}audio,canvas,embed,iframe,img,object,svg,video{display:block;vertical-align:middle}img,video{height:auto;max-width:100%}[hidden]{display:none}*,:after,:before{--tw-border-spacing-x:0;--tw-border-spacing-y:0;--tw-translate-x:0;--tw-translate-y:0;--tw-rotate:0;--tw-skew-x:0;--tw-skew-y:0;--tw-scale-x:1;--tw-scale-y:1;--tw-pan-x: ;--tw-pan-y: ;--tw-pinch-zoom: ;--tw-scroll-snap-strictness:proximity;--tw-gradient-from-position: ;--tw-gradient-via-position: ;--tw-gradient-to-position: ;--tw-ordinal: ;--tw-slashed-zero: ;--tw-numeric-figure: ;--tw-numeric-spacing: ;--tw-numeric-fraction: ;--tw-ring-inset: ;--tw-ring-offset-width:0px;--tw-ring-offset-color:#fff;--tw-ring-color:rgba(59,130,246,.5);--tw-ring-offset-shadow:0 0 #0000;--tw-ring-shadow:0 0 #0000;--tw-shadow:0 0 #0000;--tw-shadow-colored:0 0 #0000;--tw-blur: ;--tw-brightness: ;--tw-contrast: ;--tw-grayscale: ;--tw-hue-rotate: ;--tw-invert: ;--tw-saturate: ;--tw-sepia: ;--tw-drop-shadow: ;--tw-backdrop-blur: ;--tw-backdrop-brightness: ;--tw-backdrop-contrast: ;--tw-backdrop-grayscale: ;--tw-backdrop-hue-rotate: ;--tw-backdrop-invert: ;--tw-backdrop-opacity: ;--tw-backdrop-saturate: ;--tw-backdrop-sepia: }::-webkit-backdrop{--tw-border-spacing-x:0;--tw-border-spacing-y:0;--tw-translate-x:0;--tw-translate-y:0;--tw-rotate:0;--tw-skew-x:0;--tw-skew-y:0;--tw-scale-x:1;--tw-scale-y:1;--tw-pan-x: ;--tw-pan-y: ;--tw-pinch-zoom: ;--tw-scroll-snap-strictness:proximity;--tw-gradient-from-position: ;--tw-gradient-via-position: ;--tw-gradient-to-position: ;--tw-ordinal: ;--tw-slashed-zero: ;--tw-numeric-figure: ;--tw-numeric-spacing: ;--tw-numeric-fraction: ;--tw-ring-inset: ;--tw-ring-offset-width:0px;--tw-ring-offset-color:#fff;--tw-ring-color:rgba(59,130,246,.5);--tw-ring-offset-shadow:0 0 #0000;--tw-ring-shadow:0 0 #0000;--tw-shadow:0 0 #0000;--tw-shadow-colored:0 0 #0000;--tw-blur: ;--tw-brightness: ;--tw-contrast: ;--tw-grayscale: ;--tw-hue-rotate: ;--tw-invert: ;--tw-saturate: ;--tw-sepia: ;--tw-drop-shadow: ;--tw-backdrop-blur: ;--tw-backdrop-brightness: ;--tw-backdrop-contrast: ;--tw-backdrop-grayscale: ;--tw-backdrop-hue-rotate: ;--tw-backdrop-invert: ;--tw-backdrop-opacity: ;--tw-backdrop-saturate: ;--tw-backdrop-sepia: }::backdrop{--tw-border-spacing-x:0;--tw-border-spacing-y:0;--tw-translate-x:0;--tw-translate-y:0;--tw-rotate:0;--tw-skew-x:0;--tw-skew-y:0;--tw-scale-x:1;--tw-scale-y:1;--tw-pan-x: ;--tw-pan-y: ;--tw-pinch-zoom: ;--tw-scroll-snap-strictness:proximity;--tw-gradient-from-position: ;--tw-gradient-via-position: ;--tw-gradient-to-position: ;--tw-ordinal: ;--tw-slashed-zero: ;--tw-numeric-figure: ;--tw-numeric-spacing: ;--tw-numeric-fraction: ;--tw-ring-inset: ;--tw-ring-offset-width:0px;--tw-ring-offset-color:#fff;--tw-ring-color:rgba(59,130,246,.5);--tw-ring-offset-shadow:0 0 #0000;--tw-ring-shadow:0 0 #0000;--tw-shadow:0 0 #0000;--tw-shadow-colored:0 0 #0000;--tw-blur: ;--tw-brightness: ;--tw-contrast: ;--tw-grayscale: ;--tw-hue-rotate: ;--tw-invert: ;--tw-saturate: ;--tw-sepia: ;--tw-drop-shadow: ;--tw-backdrop-blur: ;--tw-backdrop-brightness: ;--tw-backdrop-contrast: ;--tw-backdrop-grayscale: ;--tw-backdrop-hue-rotate: ;--tw-backdrop-invert: ;--tw-backdrop-opacity: ;--tw-backdrop-saturate: ;--tw-backdrop-sepia: }.fixed{position:fixed}.inset-0{inset:0}.m-auto{margin:auto}.mt-2{margin-top:.5rem}.mt-24{margin-top:6rem}.inline-block{display:inline-block}.flex{display:flex}.hidden{display:none}.w-24{width:6rem}.w-3\/4{width:75%}.w-32{width:8rem}.w-48{width:12rem}.w-64{width:16rem}.transform{-webkit-transform:translate(var(--tw-translate-x),var(--tw-translate-y)) rotate(var(--tw-rotate)) skewX(var(--tw-skew-x)) skewY(var(--tw-skew-y)) scaleX(var(--tw-scale-x)) scaleY(var(--tw-scale-y));transform:translate(var(--tw-translate-x),var(--tw-translate-y)) rotate(var(--tw-rotate)) skewX(var(--tw-skew-x)) skewY(var(--tw-skew-y)) scaleX(var(--tw-scale-x)) scaleY(var(--tw-scale-y))}.cursor-pointer{cursor:pointer}.content-center{align-content:center}.justify-center{justify-content:center}.divide-y>:not([hidden])~:not([hidden]){--tw-divide-y-reverse:0;border-bottom-width:calc(1px*var(--tw-divide-y-reverse));border-top-width:calc(1px*(1 - var(--tw-divide-y-reverse)))}.rounded{border-radius:.25rem}.rounded-lg{border-radius:.5rem}.border{border-width:1px}.border-black{--tw-border-opacity:1;border-color:rgb(0 0 0/var(--tw-border-opacity))}.border-blue-500{--tw-border-opacity:1;border-color:rgb(59 130 246/var(--tw-border-opacity))}.border-gray-300{--tw-border-opacity:1;border-color:rgb(209 213 219/var(--tw-border-opacity))}.border-gray-400{--tw-border-opacity:1;border-color:rgb(156 163 175/var(--tw-border-opacity))}.border-gray-500{--tw-border-opacity:1;border-color:rgb(107 114 128/var(--tw-border-opacity))}.border-gray-600{--tw-border-opacity:1;border-color:rgb(75 85 99/var(--tw-border-opacity))}.border-red-500{--tw-border-opacity:1;border-color:rgb(239 68 68/var(--tw-border-opacity))}.bg-black{--tw-bg-opacity:1;background-color:rgb(0 0 0/var(--tw-bg-opacity))}.bg-blue-100{--tw-bg-opacity:1;background-color:rgb(219 234 254/var(--tw-bg-opacity))}.bg-gray-100{--tw-bg-opacity:1;background-color:rgb(243 244 246/var(--tw-bg-opacity))}.bg-gray-200{--tw-bg-opacity:1;background-color:rgb(229 231 235/var(--tw-bg-opacity))}.bg-green-50{--tw-bg-opacity:1;background-color:rgb(240 253 244/var(--tw-bg-opacity))}.bg-red-100{--tw-bg-opacity:1;background-color:rgb(254 226 226/var(--tw-bg-opacity))}.bg-white{--tw-bg-opacity:1;background-color:rgb(255 255 255/var(--tw-bg-opacity))}.bg-opacity-50{--tw-bg-opacity:0.5}.p-2{padding:.5rem}.p-4{padding:1rem}.px-1{padding-left:.25rem;padding-right:.25rem}.px-4{padding-left:1rem;padding-right:1rem}.py-1{padding-bottom:.25rem;padding-top:.25rem}.py-2{padding-bottom:.5rem;padding-top:.5rem}.pb-1{padding-bottom:.25rem}.pb-2{padding-bottom:.5rem}.pl-2{padding-left:.5rem}.pl-4{padding-left:1rem}.pl-6{padding-left:1.5rem}.pl-8{padding-left:2rem}.pr-2{padding-right:.5rem}.pr-4{padding-right:1rem}.pr-6{padding-right:1.5rem}.pt-1{padding-top:.25rem}.pt-3{padding-top:.75rem}.pt-8{padding-top:2rem}.text-center{text-align:center}.font-mono{font-family:ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,monospace}.font-serif{font-family:ui-serif,Georgia,Cambria,Times New Roman,Times,serif}.text-2xl{font-size:1.5rem;line-height:2rem}.text-base{font-size:1rem;line-height:1.5rem}.text-sm{font-size:.875rem;line-height:1.25rem}.text-xl{font-size:1.25rem;line-height:1.75rem}.text-xs{font-size:.75rem;line-height:1rem}.font-bold{font-weight:700}.font-semibold{font-weight:600}.text-black{--tw-text-opacity:1;color:rgb(0 0 0/var(--tw-text-opacity))}.text-blue-600{--tw-text-opacity:1;color:rgb(37 99 235/var(--tw-text-opacity))}.text-blue-700{--tw-text-opacity:1;color:rgb(29 78 216/var(--tw-text-opacity))}.text-gray-600{--tw-text-opacity:1;color:rgb(75 85 99/var(--tw-text-opacity))}.text-red-700{--tw-text-opacity:1;color:rgb(185 28 28/var(--tw-text-opacity))}.underline{text-decoration-line:underline}.shadow-xl{--tw-shadow:0 20px 25px -5px rgba(0,0,0,.1),0 8px 10px -6px rgba(0,0,0,.1);--tw-shadow-colored:0 20px 25px -5px var(--tw-shadow-color),0 8px 10px -6px var(--tw-shadow-color);box-shadow:0 0 #0000,0 0 #0000,var(--tw-shadow);box-shadow:var(--tw-ring-offset-shadow,0 0 #0000),var(--tw-ring-shadow,0 0 #0000),var(--tw-shadow)}.filter{-webkit-filter:var(--tw-blur) var(--tw-brightness) var(--tw-contrast) var(--tw-grayscale) var(--tw-hue-rotate) var(--tw-invert) var(--tw-saturate) var(--tw-sepia) var(--tw-drop-shadow);filter:var(--tw-blur) var(--tw-brightness) var(--tw-contrast) var(--tw-grayscale) var(--tw-hue-rotate) var(--tw-invert) var(--tw-saturate) var(--tw-sepia) var(--tw-drop-shadow)}.transition-transform{transition-duration:.15s;transition-property:-webkit-transform;transition-property:transform;transition-property:transform,-webkit-transform;transition-timing-function:cubic-bezier(.4,0,.2,1)}.duration-300{transition-duration:.3s}body{-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale;font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Oxygen,Ubuntu,Cantarell,Fira Sans,Droid Sans,Helvetica Neue,sans-serif;margin:0}code{font-family:source-code-pro,Menlo,Monaco,Consolas,Courier New,monospace}.hover\:scale-110:hover{--tw-scale-x:1.1;--tw-scale-y:1.1;-webkit-transform:translate(var(--tw-translate-x),var(--tw-translate-y)) rotate(var(--tw-rotate)) skewX(var(--tw-skew-x)) skewY(var(--tw-skew-y)) scaleX(var(--tw-scale-x)) scaleY(var(--tw-scale-y));transform:translate(var(--tw-translate-x),var(--tw-translate-y)) rotate(var(--tw-rotate)) skewX(var(--tw-skew-x)) skewY(var(--tw-skew-y)) scaleX(var(--tw-scale-x)) scaleY(var(--tw-scale-y))}.hover\:cursor-not-allowed:hover{cursor:not-allowed}.hover\:border-2:hover{border-width:2px}.hover\:border-black:hover{--tw-border-opacity:1;border-color:rgb(0 0 0/var(--tw-border-opacity))}.hover\:bg-blue-200:hover{--tw-bg-opacity:1;background-color:rgb(191 219 254/var(--tw-bg-opacity))}.hover\:bg-gray-200:hover{--tw-bg-opacity:1;background-color:rgb(229 231 235/var(--tw-bg-opacity))}.hover\:bg-gray-300:hover{--tw-bg-opacity:1;background-color:rgb(209 213 219/var(--tw-bg-opacity))}.hover\:bg-red-200:hover{--tw-bg-opacity:1;background-color:rgb(254 202 202/var(--tw-bg-opacity))}.hover\:text-3xl:hover{font-size:1.875rem;line-height:2.25rem}.hover\:font-bold:hover{font-weight:700}@media (min-width:768px){.md\:w-1\/2{width:50%}}@media (min-width:1024px){.lg\:w-1\/3{width:33.333333%}} +/*# sourceMappingURL=main.456c3b59.css.map*/ \ No newline at end of file diff --git a/static/css/main.66e7bbdc.css.map b/static/css/main.456c3b59.css.map similarity index 50% rename from static/css/main.66e7bbdc.css.map rename to static/css/main.456c3b59.css.map index 4ca80c9..3ceef94 100644 --- a/static/css/main.66e7bbdc.css.map +++ b/static/css/main.456c3b59.css.map @@ -1 +1 @@ -{"version":3,"file":"static/css/main.66e7bbdc.css","mappings":"AAAA;;CAAc,CAAd,uCAAc,CAAd,qBAAc,CAAd,8BAAc,CAAd,kCAAc,CAAd,oCAAc,CAAd,4BAAc,CAAd,gMAAc,CAAd,8BAAc,CAAd,eAAc,CAAd,UAAc,CAAd,wBAAc,CAAd,uBAAc,CAAd,aAAc,CAAd,QAAc,CAAd,4DAAc,CAAd,gCAAc,CAAd,mCAAc,CAAd,mBAAc,CAAd,eAAc,CAAd,uBAAc,CAAd,2BAAc,CAAd,qHAAc,CAAd,aAAc,CAAd,mBAAc,CAAd,qBAAc,CAAd,aAAc,CAAd,iBAAc,CAAd,sBAAc,CAAd,iBAAc,CAAd,aAAc,CAAd,8BAAc,CAAd,oBAAc,CAAd,aAAc,CAAd,2EAAc,CAAd,6BAAc,CAAd,aAAc,CAAd,mBAAc,CAAd,cAAc,CAAd,+BAAc,CAAd,mBAAc,CAAd,mBAAc,CAAd,QAAc,CAAd,SAAc,CAAd,iCAAc,CAAd,yEAAc,CAAd,wBAAc,CAAd,qBAAc,CAAd,4BAAc,CAAd,gCAAc,CAAd,+BAAc,CAAd,mEAAc,CAAd,0CAAc,CAAd,mBAAc,CAAd,mDAAc,CAAd,sDAAc,CAAd,YAAc,CAAd,yBAAc,CAAd,2DAAc,CAAd,iBAAc,CAAd,yBAAc,CAAd,0BAAc,CAAd,QAAc,CAAd,SAAc,CAAd,gBAAc,CAAd,wBAAc,CAAd,kFAAc,CAAd,SAAc,CAAd,sDAAc,CAAd,SAAc,CAAd,mCAAc,CAAd,wBAAc,CAAd,4DAAc,CAAd,qBAAc,CAAd,qBAAc,CAAd,cAAc,CAAd,qBAAc,CAAd,wCAAc,CAAd,uBAAc,CAAd,kBAAc,CAAd,kBAAc,CAAd,aAAc,CAAd,aAAc,CAAd,aAAc,CAAd,cAAc,CAAd,cAAc,CAAd,YAAc,CAAd,YAAc,CAAd,iBAAc,CAAd,qCAAc,CAAd,6BAAc,CAAd,4BAAc,CAAd,2BAAc,CAAd,cAAc,CAAd,mBAAc,CAAd,qBAAc,CAAd,sBAAc,CAAd,uBAAc,CAAd,iBAAc,CAAd,0BAAc,CAAd,2BAAc,CAAd,mCAAc,CAAd,iCAAc,CAAd,0BAAc,CAAd,qBAAc,CAAd,6BAAc,CAAd,WAAc,CAAd,iBAAc,CAAd,eAAc,CAAd,gBAAc,CAAd,iBAAc,CAAd,aAAc,CAAd,eAAc,CAAd,YAAc,CAAd,kBAAc,CAAd,oBAAc,CAAd,0BAAc,CAAd,wBAAc,CAAd,yBAAc,CAAd,0BAAc,CAAd,sBAAc,CAAd,uBAAc,CAAd,wBAAc,CAAd,qBAAc,CAAd,0CAAc,CAAd,uBAAc,CAAd,kBAAc,CAAd,kBAAc,CAAd,aAAc,CAAd,aAAc,CAAd,aAAc,CAAd,cAAc,CAAd,cAAc,CAAd,YAAc,CAAd,YAAc,CAAd,iBAAc,CAAd,qCAAc,CAAd,6BAAc,CAAd,4BAAc,CAAd,2BAAc,CAAd,cAAc,CAAd,mBAAc,CAAd,qBAAc,CAAd,sBAAc,CAAd,uBAAc,CAAd,iBAAc,CAAd,0BAAc,CAAd,2BAAc,CAAd,mCAAc,CAAd,iCAAc,CAAd,0BAAc,CAAd,qBAAc,CAAd,6BAAc,CAAd,WAAc,CAAd,iBAAc,CAAd,eAAc,CAAd,gBAAc,CAAd,iBAAc,CAAd,aAAc,CAAd,eAAc,CAAd,YAAc,CAAd,kBAAc,CAAd,oBAAc,CAAd,0BAAc,CAAd,wBAAc,CAAd,yBAAc,CAAd,0BAAc,CAAd,sBAAc,CAAd,uBAAc,CAAd,wBAAc,CAAd,qBAAc,CAAd,kCAAc,CAAd,uBAAc,CAAd,kBAAc,CAAd,kBAAc,CAAd,aAAc,CAAd,aAAc,CAAd,aAAc,CAAd,cAAc,CAAd,cAAc,CAAd,YAAc,CAAd,YAAc,CAAd,iBAAc,CAAd,qCAAc,CAAd,6BAAc,CAAd,4BAAc,CAAd,2BAAc,CAAd,cAAc,CAAd,mBAAc,CAAd,qBAAc,CAAd,sBAAc,CAAd,uBAAc,CAAd,iBAAc,CAAd,0BAAc,CAAd,2BAAc,CAAd,mCAAc,CAAd,iCAAc,CAAd,0BAAc,CAAd,qBAAc,CAAd,6BAAc,CAAd,WAAc,CAAd,iBAAc,CAAd,eAAc,CAAd,gBAAc,CAAd,iBAAc,CAAd,aAAc,CAAd,eAAc,CAAd,YAAc,CAAd,kBAAc,CAAd,oBAAc,CAAd,0BAAc,CAAd,wBAAc,CAAd,yBAAc,CAAd,0BAAc,CAAd,sBAAc,CAAd,uBAAc,CAAd,wBAAc,CAAd,qBAAc,CAEd,qBAAmB,CAAnB,gBAAmB,CAAnB,mBAAmB,CAAnB,sBAAmB,CAAnB,kCAAmB,CAAnB,kBAAmB,CAAnB,oBAAmB,CAAnB,gBAAmB,CAAnB,iBAAmB,CAAnB,gBAAmB,CAAnB,iBAAmB,CAAnB,iBAAmB,CAAnB,gNAAmB,CAAnB,6LAAmB,CAAnB,8BAAmB,CAAnB,oCAAmB,CAAnB,sCAAmB,CAAnB,6BAAmB,CAAnB,+BAAmB,CAAnB,wBAAmB,CAAnB,mCAAmB,CAAnB,gDAAmB,CAAnB,sCAAmB,CAAnB,qDAAmB,CAAnB,sCAAmB,CAAnB,sDAAmB,CAAnB,sCAAmB,CAAnB,sDAAmB,CAAnB,sCAAmB,CAAnB,sDAAmB,CAAnB,sCAAmB,CAAnB,mDAAmB,CAAnB,qCAAmB,CAAnB,oDAAmB,CAAnB,2BAAmB,CAAnB,gDAAmB,CAAnB,8BAAmB,CAAnB,sDAAmB,CAAnB,8BAAmB,CAAnB,sDAAmB,CAAnB,8BAAmB,CAAnB,sDAAmB,CAAnB,8BAAmB,CAAnB,sDAAmB,CAAnB,6BAAmB,CAAnB,sDAAmB,CAAnB,2BAAmB,CAAnB,sDAAmB,CAAnB,kCAAmB,CAAnB,iBAAmB,CAAnB,yBAAmB,CAAnB,oBAAmB,CAAnB,uBAAmB,CAAnB,kBAAmB,CAAnB,8CAAmB,CAAnB,4CAAmB,CAAnB,2BAAmB,CAAnB,0BAAmB,CAAnB,wBAAmB,CAAnB,uBAAmB,CAAnB,yBAAmB,CAAnB,uBAAmB,CAAnB,yBAAmB,CAAnB,wBAAmB,CAAnB,0BAAmB,CAAnB,wBAAmB,CAAnB,wBAAmB,CAAnB,sBAAmB,CAAnB,8BAAmB,CAAnB,8GAAmB,CAAnB,4EAAmB,CAAnB,0BAAmB,CAAnB,gBAAmB,CAAnB,yBAAmB,CAAnB,kBAAmB,CAAnB,0BAAmB,CAAnB,mBAAmB,CAAnB,yBAAmB,CAAnB,gBAAmB,CAAnB,0BAAmB,CAAnB,8BAAmB,CAAnB,+BAAmB,CAAnB,uCAAmB,CAAnB,kCAAmB,CAAnB,2CAAmB,CAAnB,kCAAmB,CAAnB,2CAAmB,CAAnB,kCAAmB,CAAnB,0CAAmB,CAAnB,iCAAmB,CAAnB,2CAAmB,CAAnB,yCAAmB,CAAnB,qFAAmB,CAAnB,kGAAmB,CAAnB,+CAAmB,CAAnB,kGAAmB,CAAnB,oFAAmB,CAAnB,6BAAmB,CAAnB,+CAAmB,CAAnB,kDAAmB,CAAnB,qCAAmB,CACnB,KAKE,kCAAmC,CACnC,iCAAkC,CAJlC,mIAEY,CAHZ,QAMF,CAEA,KACE,uEAEF,CAfA,wCAgBA,CAhBA,gBAgBA,CAhBA,qMAgBA,CAhBA,6LAgBA,CAhBA,mDAgBA,CAhBA,uCAgBA,CAhBA,gDAgBA,CAhBA,gDAgBA,CAhBA,2CAgBA,CAhBA,sDAgBA,CAhBA,2CAgBA,CAhBA,sDAgBA,CAhBA,0CAgBA,CAhBA,sDAgBA,CAhBA,yCAgBA,CAhBA,mBAgBA,CAhBA,uCAgBA,CAhBA,8CAgBA,EAhBA,sDAgBA","sources":["index.css"],"sourcesContent":["@tailwind base;\n@tailwind components;\n@tailwind utilities;\nbody {\n margin: 0;\n font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen',\n 'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue',\n sans-serif;\n -webkit-font-smoothing: antialiased;\n -moz-osx-font-smoothing: grayscale;\n}\n\ncode {\n font-family: source-code-pro, Menlo, Monaco, Consolas, 'Courier New',\n monospace;\n}\n"],"names":[],"sourceRoot":""} \ No newline at end of file +{"version":3,"file":"static/css/main.456c3b59.css","mappings":"AAAA;;CAAc,CAAd,uCAAc,CAAd,qBAAc,CAAd,8BAAc,CAAd,kCAAc,CAAd,oCAAc,CAAd,4BAAc,CAAd,gMAAc,CAAd,8BAAc,CAAd,eAAc,CAAd,UAAc,CAAd,wBAAc,CAAd,uBAAc,CAAd,aAAc,CAAd,QAAc,CAAd,4DAAc,CAAd,gCAAc,CAAd,mCAAc,CAAd,mBAAc,CAAd,eAAc,CAAd,uBAAc,CAAd,2BAAc,CAAd,qHAAc,CAAd,aAAc,CAAd,mBAAc,CAAd,qBAAc,CAAd,aAAc,CAAd,iBAAc,CAAd,sBAAc,CAAd,iBAAc,CAAd,aAAc,CAAd,8BAAc,CAAd,oBAAc,CAAd,aAAc,CAAd,2EAAc,CAAd,6BAAc,CAAd,aAAc,CAAd,mBAAc,CAAd,cAAc,CAAd,+BAAc,CAAd,mBAAc,CAAd,mBAAc,CAAd,QAAc,CAAd,SAAc,CAAd,iCAAc,CAAd,yEAAc,CAAd,wBAAc,CAAd,qBAAc,CAAd,4BAAc,CAAd,gCAAc,CAAd,+BAAc,CAAd,mEAAc,CAAd,0CAAc,CAAd,mBAAc,CAAd,mDAAc,CAAd,sDAAc,CAAd,YAAc,CAAd,yBAAc,CAAd,2DAAc,CAAd,iBAAc,CAAd,yBAAc,CAAd,0BAAc,CAAd,QAAc,CAAd,SAAc,CAAd,gBAAc,CAAd,wBAAc,CAAd,kFAAc,CAAd,SAAc,CAAd,sDAAc,CAAd,SAAc,CAAd,mCAAc,CAAd,wBAAc,CAAd,4DAAc,CAAd,qBAAc,CAAd,qBAAc,CAAd,cAAc,CAAd,qBAAc,CAAd,wCAAc,CAAd,uBAAc,CAAd,kBAAc,CAAd,kBAAc,CAAd,aAAc,CAAd,aAAc,CAAd,aAAc,CAAd,cAAc,CAAd,cAAc,CAAd,YAAc,CAAd,YAAc,CAAd,iBAAc,CAAd,qCAAc,CAAd,6BAAc,CAAd,4BAAc,CAAd,2BAAc,CAAd,cAAc,CAAd,mBAAc,CAAd,qBAAc,CAAd,sBAAc,CAAd,uBAAc,CAAd,iBAAc,CAAd,0BAAc,CAAd,2BAAc,CAAd,mCAAc,CAAd,iCAAc,CAAd,0BAAc,CAAd,qBAAc,CAAd,6BAAc,CAAd,WAAc,CAAd,iBAAc,CAAd,eAAc,CAAd,gBAAc,CAAd,iBAAc,CAAd,aAAc,CAAd,eAAc,CAAd,YAAc,CAAd,kBAAc,CAAd,oBAAc,CAAd,0BAAc,CAAd,wBAAc,CAAd,yBAAc,CAAd,0BAAc,CAAd,sBAAc,CAAd,uBAAc,CAAd,wBAAc,CAAd,qBAAc,CAAd,0CAAc,CAAd,uBAAc,CAAd,kBAAc,CAAd,kBAAc,CAAd,aAAc,CAAd,aAAc,CAAd,aAAc,CAAd,cAAc,CAAd,cAAc,CAAd,YAAc,CAAd,YAAc,CAAd,iBAAc,CAAd,qCAAc,CAAd,6BAAc,CAAd,4BAAc,CAAd,2BAAc,CAAd,cAAc,CAAd,mBAAc,CAAd,qBAAc,CAAd,sBAAc,CAAd,uBAAc,CAAd,iBAAc,CAAd,0BAAc,CAAd,2BAAc,CAAd,mCAAc,CAAd,iCAAc,CAAd,0BAAc,CAAd,qBAAc,CAAd,6BAAc,CAAd,WAAc,CAAd,iBAAc,CAAd,eAAc,CAAd,gBAAc,CAAd,iBAAc,CAAd,aAAc,CAAd,eAAc,CAAd,YAAc,CAAd,kBAAc,CAAd,oBAAc,CAAd,0BAAc,CAAd,wBAAc,CAAd,yBAAc,CAAd,0BAAc,CAAd,sBAAc,CAAd,uBAAc,CAAd,wBAAc,CAAd,qBAAc,CAAd,kCAAc,CAAd,uBAAc,CAAd,kBAAc,CAAd,kBAAc,CAAd,aAAc,CAAd,aAAc,CAAd,aAAc,CAAd,cAAc,CAAd,cAAc,CAAd,YAAc,CAAd,YAAc,CAAd,iBAAc,CAAd,qCAAc,CAAd,6BAAc,CAAd,4BAAc,CAAd,2BAAc,CAAd,cAAc,CAAd,mBAAc,CAAd,qBAAc,CAAd,sBAAc,CAAd,uBAAc,CAAd,iBAAc,CAAd,0BAAc,CAAd,2BAAc,CAAd,mCAAc,CAAd,iCAAc,CAAd,0BAAc,CAAd,qBAAc,CAAd,6BAAc,CAAd,WAAc,CAAd,iBAAc,CAAd,eAAc,CAAd,gBAAc,CAAd,iBAAc,CAAd,aAAc,CAAd,eAAc,CAAd,YAAc,CAAd,kBAAc,CAAd,oBAAc,CAAd,0BAAc,CAAd,wBAAc,CAAd,yBAAc,CAAd,0BAAc,CAAd,sBAAc,CAAd,uBAAc,CAAd,wBAAc,CAAd,qBAAc,CAEd,qBAAmB,CAAnB,gBAAmB,CAAnB,mBAAmB,CAAnB,sBAAmB,CAAnB,sBAAmB,CAAnB,kCAAmB,CAAnB,kBAAmB,CAAnB,oBAAmB,CAAnB,gBAAmB,CAAnB,iBAAmB,CAAnB,gBAAmB,CAAnB,iBAAmB,CAAnB,iBAAmB,CAAnB,gNAAmB,CAAnB,6LAAmB,CAAnB,8BAAmB,CAAnB,oCAAmB,CAAnB,sCAAmB,CAAnB,+DAAmB,CAAnB,oHAAmB,CAAnB,6BAAmB,CAAnB,+BAAmB,CAAnB,wBAAmB,CAAnB,mCAAmB,CAAnB,gDAAmB,CAAnB,sCAAmB,CAAnB,qDAAmB,CAAnB,sCAAmB,CAAnB,sDAAmB,CAAnB,sCAAmB,CAAnB,sDAAmB,CAAnB,sCAAmB,CAAnB,sDAAmB,CAAnB,sCAAmB,CAAnB,mDAAmB,CAAnB,qCAAmB,CAAnB,oDAAmB,CAAnB,2BAAmB,CAAnB,gDAAmB,CAAnB,8BAAmB,CAAnB,sDAAmB,CAAnB,8BAAmB,CAAnB,sDAAmB,CAAnB,8BAAmB,CAAnB,sDAAmB,CAAnB,8BAAmB,CAAnB,sDAAmB,CAAnB,6BAAmB,CAAnB,sDAAmB,CAAnB,2BAAmB,CAAnB,sDAAmB,CAAnB,kCAAmB,CAAnB,kBAAmB,CAAnB,iBAAmB,CAAnB,yBAAmB,CAAnB,oBAAmB,CAAnB,uBAAmB,CAAnB,kBAAmB,CAAnB,8CAAmB,CAAnB,4CAAmB,CAAnB,2BAAmB,CAAnB,0BAAmB,CAAnB,wBAAmB,CAAnB,uBAAmB,CAAnB,yBAAmB,CAAnB,uBAAmB,CAAnB,yBAAmB,CAAnB,wBAAmB,CAAnB,0BAAmB,CAAnB,wBAAmB,CAAnB,wBAAmB,CAAnB,sBAAmB,CAAnB,8BAAmB,CAAnB,8GAAmB,CAAnB,4EAAmB,CAAnB,0BAAmB,CAAnB,gBAAmB,CAAnB,yBAAmB,CAAnB,kBAAmB,CAAnB,0BAAmB,CAAnB,mBAAmB,CAAnB,0BAAmB,CAAnB,mBAAmB,CAAnB,yBAAmB,CAAnB,gBAAmB,CAAnB,0BAAmB,CAAnB,8BAAmB,CAAnB,+BAAmB,CAAnB,uCAAmB,CAAnB,kCAAmB,CAAnB,2CAAmB,CAAnB,kCAAmB,CAAnB,2CAAmB,CAAnB,kCAAmB,CAAnB,0CAAmB,CAAnB,iCAAmB,CAAnB,2CAAmB,CAAnB,yCAAmB,CAAnB,qFAAmB,CAAnB,kGAAmB,CAAnB,+CAAmB,CAAnB,kGAAmB,CAAnB,gMAAmB,CAAnB,gLAAmB,CAAnB,oFAAmB,CAAnB,6BAAmB,CAAnB,+CAAmB,CAAnB,kDAAmB,CAAnB,qCAAmB,CACnB,KAKE,kCAAmC,CACnC,iCAAkC,CAJlC,mIAEY,CAHZ,QAMF,CAEA,KACE,uEAEF,CAfA,wCAgBA,CAhBA,gBAgBA,CAhBA,qMAgBA,CAhBA,6LAgBA,CAhBA,mDAgBA,CAhBA,uCAgBA,CAhBA,gDAgBA,CAhBA,gDAgBA,CAhBA,2CAgBA,CAhBA,sDAgBA,CAhBA,2CAgBA,CAhBA,sDAgBA,CAhBA,2CAgBA,CAhBA,sDAgBA,CAhBA,0CAgBA,CAhBA,sDAgBA,CAhBA,yCAgBA,CAhBA,mBAgBA,CAhBA,uCAgBA,CAhBA,8CAgBA,EAhBA,sDAgBA","sources":["index.css"],"sourcesContent":["@tailwind base;\n@tailwind components;\n@tailwind utilities;\nbody {\n margin: 0;\n font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen',\n 'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue',\n sans-serif;\n -webkit-font-smoothing: antialiased;\n -moz-osx-font-smoothing: grayscale;\n}\n\ncode {\n font-family: source-code-pro, Menlo, Monaco, Consolas, 'Courier New',\n monospace;\n}\n"],"names":[],"sourceRoot":""} \ No newline at end of file diff --git a/static/js/main.8b24599d.js b/static/js/main.48479645.js similarity index 70% rename from static/js/main.8b24599d.js rename to static/js/main.48479645.js index b14840d..9dbfd02 100644 --- a/static/js/main.8b24599d.js +++ b/static/js/main.48479645.js @@ -1,3 +1,3 @@ -/*! For license information please see main.8b24599d.js.LICENSE.txt */ -!function(){var e={618:function(e,t,n){var r;!function(){"use strict";var l=!("undefined"===typeof window||!window.document||!window.document.createElement),o={canUseDOM:l,canUseWorkers:"undefined"!==typeof Worker,canUseEventListeners:l&&!(!window.addEventListener&&!window.attachEvent),canUseViewport:l&&!!window.screen};void 0===(r=function(){return o}.call(t,n,t,e))||(e.exports=r)}()},888:function(e,t,n){"use strict";var r=n(47);function l(){}function o(){}o.resetWarningCache=l,e.exports=function(){function e(e,t,n,l,o,a){if(a!==r){var i=new Error("Calling PropTypes validators directly is not supported by the `prop-types` package. Use PropTypes.checkPropTypes() to call them. Read more at http://fb.me/use-check-prop-types");throw i.name="Invariant Violation",i}}function t(){return e}e.isRequired=e;var n={array:e,bigint:e,bool:e,func:e,number:e,object:e,string:e,symbol:e,any:e,arrayOf:t,element:e,elementType:e,instanceOf:t,node:e,objectOf:t,oneOf:t,oneOfType:t,shape:t,exact:t,checkPropTypes:o,resetWarningCache:l};return n.PropTypes=n,n}},7:function(e,t,n){e.exports=n(888)()},47:function(e){"use strict";e.exports="SECRET_DO_NOT_PASS_THIS_OR_YOU_WILL_BE_FIRED"},463:function(e,t,n){"use strict";var r=n(791),l=n(296);function o(e){for(var t="https://reactjs.org/docs/error-decoder.html?invariant="+e,n=1;n