diff --git a/all_configs.json b/all_configs.json index 4393ecd..d0dc5cb 100644 --- a/all_configs.json +++ b/all_configs.json @@ -1 +1 @@ -{"NousResearch/Llama-2-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "google/flan-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "google/flan-t5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "t5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "PascalNotin/Tranception_Small": {"architectures": ["TranceptionLMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 12, "vocab_size": 25}, "bigscience/bloom-560m": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 24, "num_attention_heads": 16, "vocab_size": 250880}, "distilgpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 6, "vocab_size": 50257}, "hf-internal-testing/tiny-random-gpt2": {"intermediate_size": 37, "n_embd": 32, "n_head": 4, "n_inner": null, "n_layer": 5, "vocab_size": 1000}, "tiiuae/falcon-7b": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "t5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "bigscience/bloomz-1b1": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 24, "num_attention_heads": 16, "vocab_size": 250880}, "gpt2-medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_layer": 24, "vocab_size": 50257}, "mrm8488/t5-base-finetuned-common_gen": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "lmsys/fastchat-t5-3b-v1.0": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32110}, "gpt2-xl": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_layer": 48, "vocab_size": 50257}, "meta-llama/Llama-2-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "hf-internal-testing/tiny-random-t5": {"d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 1103}, "EleutherAI/pythia-6.9b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "databricks/dolly-v2-3b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50280}, "hf-internal-testing/tiny-random-GPTNeoXForCausalLM": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 32, "intermediate_size": 37, "num_attention_heads": 4, "num_hidden_layers": 5, "vocab_size": 1024}, "meta-llama/Llama-2-7b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "microsoft/DialoGPT-medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_layer": 24, "vocab_size": 50257}, "google/mt5-base": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "TheBloke/Wizard-Vicuna-7B-Uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "hf-internal-testing/tiny-random-BloomModel": {"architectures": ["BloomModel"], "hidden_size": 32, "n_head": 4, "n_layer": 5, "vocab_size": 1024}, "google/flan-t5-xxl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "lmsys/vicuna-7b-v1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "huggyllama/llama-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "mrm8488/t5-base-finetuned-summarize-news": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "google/flan-t5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "tiiuae/falcon-40b-instruct": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "ramsrigouthamg/t5_sentence_paraphraser": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "flexudy/t5-base-multi-sentence-doctor": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "lewtun/tiny-random-mt5": {"architectures": ["MT5Model"], "d_ff": 1024, "d_model": 16, "num_heads": 4, "num_layers": 2, "vocab_size": 250112}, "gpt2-large": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_layer": 36, "vocab_size": 50257}, "valhalla/t5-base-e2e-qg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32102}, "sshleifer/tiny-gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2, "n_head": 2, "n_layer": 2, "vocab_size": 50257}, "fxmarty/tiny-llama-fast-tokenizer": {"architectures": ["LlamaForCausalLM"], "hidden_size": 16, "intermediate_size": 64, "num_attention_heads": 4, "num_hidden_layers": 2, "vocab_size": 32000}, "decapoda-research/llama-7b-hf": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "stabilityai/StableBeluga2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "syzymon/long_llama_3b": {"architectures": ["LongLlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "NousResearch/Llama-2-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Llama-2-70B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "tiiuae/falcon-7b-instruct": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "google/flan-t5-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "meta-llama/Llama-2-13b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "petals-team/StableBeluga2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "meta-llama/Llama-2-70b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "EleutherAI/gpt-neox-20b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 64, "num_hidden_layers": 44, "vocab_size": 50432}, "hf-internal-testing/tiny-random-GPTBigCodeForCausalLM": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "nferruz/ProtGPT2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50257}, "philschmid/flan-t5-xxl-sharded-fp16": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "HuggingFaceM4/tiny-random-LlamaForCausalLM": {"architectures": ["LlamaForCausalLM"], "hidden_size": 16, "intermediate_size": 64, "num_attention_heads": 4, "num_hidden_layers": 2, "vocab_size": 32000}, "hf-internal-testing/tiny-random-BloomForCausalLM": {"architectures": ["BloomForCausalLM"], "hidden_size": 32, "n_head": 4, "n_layer": 5, "vocab_size": 1024}, "hf-internal-testing/tiny-random-GPT2LMHeadModel": {"architectures": ["GPT2LMHeadModel"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "Vamsi/T5_Paraphrase_Paws": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "lmsys/vicuna-7b-v1.3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "meta-llama/Llama-2-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ybelkada/tiny-random-T5ForConditionalGeneration-calibrated": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 32100}, "prithivida/parrot_paraphraser_on_T5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "hf-internal-testing/tiny-random-GPTBigCodeModel": {"architectures": ["GPTBigCodeModel"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "hkunlp/instructor-large": {"architectures": ["T5EncoderModel"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "lmsys/vicuna-7b-v1.5": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "fabiochiu/t5-small-medium-title-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "TheBloke/Llama-2-7b-Chat-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "skt/kogpt2-base-v2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 51200}, "google/t5-v1_1-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Maykeye/TinyLLama-v0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 64, "intermediate_size": 256, "num_attention_heads": 16, "num_hidden_layers": 8, "vocab_size": 32000}, "TheBloke/Llama-2-13B-chat-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "tiiuae/falcon-40b": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "sonoisa/t5-base-japanese-title-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Rostlab/prot_t5_xl_uniref50": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 128}, "TheBloke/vicuna-7B-v1.3-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "daryl149/llama-2-7b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "stabilityai/StableBeluga-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "meta-llama/Llama-2-70b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TheBloke/MythoMax-L2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "czurita/nsql-llama-2-7B-sharded-bf16-2GB": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "vennify/t5-base-grammar-correction": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "czearing/story-to-title": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "google/byt5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3840, "d_model": 1536, "num_heads": 16, "num_layers": 36, "vocab_size": 384}, "HuggingFaceH4/starchat-beta": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49156}, "codellama/CodeLlama-34b-Instruct-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "openlm-research/open_llama_13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "optimum/t5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "t5-3b": {"architectures": ["T5WithLMHeadModel"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "humarin/chatgpt_paraphraser_on_T5_base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Gustavosta/MagicPrompt-Stable-Diffusion": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "bigscience/bloomz-7b1": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 30, "num_attention_heads": 32, "vocab_size": 250880}, "trl-internal-testing/tiny-random-GPTNeoXForCausalLM": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 32, "intermediate_size": 37, "num_attention_heads": 4, "num_hidden_layers": 5, "vocab_size": 1024}, "NousResearch/Yarn-Llama-2-7b-64k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "khhuang/zerofec-qa2claim-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b-v3": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "khhuang/zerofec-daqa-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "declare-lab/flan-alpaca-gpt4-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "codellama/CodeLlama-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "lmsys/vicuna-13b-v1.3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Rostlab/prot_t5_xl_half_uniref50-enc": {"architectures": ["T5EncoderModel"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 128}, "google/mt5-small": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "Salesforce/safety-flan-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "patrickvonplaten/t5-tiny-random": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 256, "d_model": 64, "num_heads": 2, "num_layers": 2, "vocab_size": 32128}, "google/flan-ul2": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 16384, "d_model": 4096, "num_heads": 16, "num_layers": 32, "vocab_size": 32128}, "EleutherAI/pythia-70m": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "bigscience/mt0-large": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 250112}, "stevhliu/my_awesome_billsum_model": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "EleutherAI/pythia-70m-deduped": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "lmsys/vicuna-13b-v1.5": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "PAIXAI/Astrid-1B": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 50304}, "Phind/Phind-CodeLlama-34B-Python-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "pszemraj/flan-t5-large-grammar-synthesis": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "Voicelab/vlt5-base-keywords": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50048}, "togethercomputer/Llama-2-7B-32K-Instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "huggyllama/llama-65b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "ai-forever/ruGPT-3.5-13B": {"architectures": ["GPT2LMHeadModel"], "n_embd": 5120, "n_head": 40, "n_inner": null, "n_layer": 40, "vocab_size": 50272}, "Einmalumdiewelt/T5-Base_GNAD": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "google/t5-v1_1-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "Austism/chronos-hermes-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "upstage/SOLAR-0-70b-16bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "bigscience/bloom-7b1": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "nlpai-lab/kullm-polyglot-12.8b-v2": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 30080}, "codellama/CodeLlama-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "hf-internal-testing/tiny-random-GPT2Model": {"architectures": ["GPT2Model"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "Gryphe/MythoMax-L2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "openlm-research/open_llama_3b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/Llama-2-70B-chat-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "trl-internal-testing/dummy-GPT2-correct-vocab": {"architectures": ["GPT2LMHeadModel"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 50257}, "charsiu/g2p_multilingual_byT5_small_100": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3584, "d_model": 1472, "num_heads": 6, "num_layers": 12, "vocab_size": 384}, "EleutherAI/pythia-160m": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50304}, "ElnaggarLab/ankh-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 48, "vocab_size": 144}, "trl-internal-testing/tiny-random-GPT2LMHeadModel": {"architectures": ["GPT2LMHeadModel"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "openlm-research/open_llama_7b_v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/OpenAssistant-Llama2-13B-Orca-v2-8K-3166-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "codellama/CodeLlama-7b-Instruct-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "WizardLM/WizardCoder-Python-34B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32001}, "pszemraj/grammar-synthesis-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "TheBloke/llama-2-70b-Guanaco-QLoRA-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "openlm-research/open_llama_3b_v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "IDEA-CCNL/Wenzhong-GPT2-110M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 12, "vocab_size": 50304}, "microsoft/DialoGPT-small": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50257}, "trl-internal-testing/tiny-random-BloomForCausalLM": {"architectures": ["BloomForCausalLM"], "hidden_size": 32, "n_head": 4, "n_layer": 5, "vocab_size": 1024}, "trl-internal-testing/tiny-random-T5ForConditionalGeneration": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 1302}, "hf-internal-testing/tiny-random-onnx-mt5": {"architectures": ["MT5Model"], "d_ff": 1024, "d_model": 16, "num_heads": 4, "num_layers": 2, "vocab_size": 250112}, "NousResearch/Llama-2-7b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "trl-internal-testing/tiny-random-MT5ForConditionalGeneration": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 256, "d_model": 64, "num_heads": 4, "num_layers": 8, "vocab_size": 5100}, "fxmarty/tiny-testing-gpt2-remote-code": {"architectures": ["GPT2CustomLMHeadModel"], "intermediate_size": 37, "n_embd": 32, "n_head": 4, "n_inner": null, "n_layer": 5, "vocab_size": 1000}, "castorini/monot5-base-msmarco-10k": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "microsoft/DialoGPT-large": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_layer": 36, "vocab_size": 50257}, "bigscience/bloomz-560m": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 24, "num_attention_heads": 16, "vocab_size": 250880}, "Open-Orca/OpenOrca-Platypus2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "google/byt5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3584, "d_model": 1472, "num_heads": 6, "num_layers": 12, "vocab_size": 384}, "bigscience/bloom-1b7": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 24, "num_attention_heads": 16, "vocab_size": 250880}, "elinas/chronos-13b-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "google/t5-efficient-tiny": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 256, "num_heads": 4, "num_layers": 4, "vocab_size": 32128}, "bigscience/bloom-1b1": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 24, "num_attention_heads": 16, "vocab_size": 250880}, "EleutherAI/polyglot-ko-1.3b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 30080}, "bigscience/bloom-3b": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 30, "num_attention_heads": 32, "vocab_size": 250880}, "TinyPixel/Llama-2-7B-bf16-sharded": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "edumunozsala/llama-2-7b-int4-python-code-20k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "yahma/llama-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "beomi/KoAlpaca-Polyglot-12.8B": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 30003}, "stanfordnlp/backpack-gpt2": {"architectures": ["BackpackGPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50264}, "prithivida/grammar_error_correcter_v1": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "lvkaokao/llama2-7b-hf-chat-lora-v3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "google/t5-v1_1-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "TheBloke/gpt4-alpaca-lora_mlp-65B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "google/mt5-large": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 250112}, "EleutherAI/pythia-2.8b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50304}, "cyberagent/open-calm-7b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 52224}, "lvwerra/gpt2-imdb": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50257}, "WizardLM/WizardLM-13B-V1.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "KoboldAI/GPT-NeoX-20B-Erebus": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 64, "num_hidden_layers": 44, "vocab_size": 50432}, "togethercomputer/RedPajama-INCITE-Instruct-3B-v1": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "aditi2222/automatic_title_generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "shibing624/chinese-alpaca-plus-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 49954}, "optimum/gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "togethercomputer/LLaMA-2-7B-32K": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "amazon/FalconLite": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65040}, "EleutherAI/polyglot-ko-5.8b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 16, "num_hidden_layers": 28, "vocab_size": 30080}, "databricks/dolly-v2-7b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50280}, "snrspeaks/t5-one-line-summary": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "lmsys/vicuna-33b-v1.3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/OpenOrca-Platypus2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "TheBloke/Llama-2-13B-Chat-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "sdadas/mt5-base-translator-pl-en": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250100}, "TheBloke/Llama-2-7b-chat-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "bigcode/gpt_bigcode-santacoder": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 49280}, "h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b-v2": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "TheBloke/airoboros-l2-70B-GPT4-2.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "lmsys/vicuna-13b-v1.5-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "bigcode/santacoder": {"architectures": ["GPT2LMHeadCustomModel"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 49280}, "togethercomputer/RedPajama-INCITE-Chat-3B-v1": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "ai-forever/mGPT": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2048, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 100000}, "openlm-research/open_llama_7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "decapoda-research/llama-13b-hf": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "OpenAssistant/codellama-13b-oasst-sft-v10": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "rinna/bilingual-gpt-neox-4b-instruction-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2816, "intermediate_size": 11264, "num_attention_heads": 22, "num_hidden_layers": 36, "vocab_size": 65536}, "KoboldAI/LLaMA2-13B-Holomax-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "MBZUAI/LaMini-T5-61M": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "google/t5-v1_1-xxl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "EleutherAI/pythia-1.4b-deduped": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "stabilityai/StableBeluga-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "tiiuae/falcon-rw-1b": {"architectures": ["FalconForCausalLM"], "hidden_size": 2048, "num_attention_heads": 32, "num_hidden_layers": 24, "vocab_size": 50304}, "ClueAI/ChatYuan-large-v2": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "af1tang/personaGPT": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50263}, "google/t5-large-lm-adapt": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "vilsonrodrigues/falcon-7b-instruct-sharded": {"architectures": ["FalconForCausalLM"], "hidden_size": 4544, "num_attention_heads": 71, "num_hidden_layers": 32, "vocab_size": 65024}, "petals-team/falcon-rw-1b": {"architectures": ["FalconForCausalLM"], "hidden_size": 2048, "num_attention_heads": 32, "num_hidden_layers": 24, "vocab_size": 50304}, "bigscience/T0_3B": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "TheTravellingEngineer/llama2-7b-hf-guanaco": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Salesforce/codet5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "EleutherAI/pythia-2.8b-deduped": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50304}, "The-Face-Of-Goonery/Huginn-13b-v1.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "FredZhang7/distilgpt2-stable-diffusion-v2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 6, "vocab_size": 50257}, "WizardLM/WizardCoder-15B-V1.0": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49153}, "EleutherAI/pythia-410m-deduped": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 1024, "intermediate_size": 4096, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "huggyllama/llama-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ybelkada/falcon-7b-sharded-bf16": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "MingZhong/unieval-sum": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "NousResearch/Nous-Hermes-Llama2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "csebuetnlp/mT5_multilingual_XLSum": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "hkunlp/instructor-xl": {"architectures": ["T5EncoderModel"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "h2oai/h2ogpt-4096-llama2-13b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "HuggingFaceH4/tiny-random-LlamaForCausalLM": {"architectures": ["LlamaForCausalLM"], "hidden_size": 16, "intermediate_size": 64, "num_attention_heads": 4, "num_hidden_layers": 2, "vocab_size": 32000}, "EleutherAI/polyglot-ko-12.8b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 30080}, "databricks/dolly-v2-12b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50280}, "mrm8488/t5-base-finetuned-span-sentiment-extraction": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "WizardLM/WizardLM-70B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32001}, "codellama/CodeLlama-34b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "Salesforce/codet5-base-multi-sum": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "MBZUAI/LaMini-T5-738M": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "codellama/CodeLlama-13b-Instruct-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "h2oai/h2ogpt-4096-llama2-7b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50288}, "bigscience/bloom": {"architectures": ["BloomForCausalLM"], "n_layer": 70, "num_attention_heads": 112, "vocab_size": 250880}, "TigerResearch/tigerbot-13b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 60928}, "TheBloke/airoboros-l2-70B-gpt4-1.4.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Austism/chronos-hermes-13b-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "snrspeaks/KeyPhraseTransformer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Rocketknight1/tiny-random-falcon-7b": {"architectures": ["FalconForCausalLM"], "hidden_size": 1136, "num_attention_heads": 71, "num_hidden_layers": 2, "vocab_size": 65024}, "hf-internal-testing/tiny-random-T5Model": {"architectures": ["T5Model"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 32100}, "sambanovasystems/BLOOMChat-176B-v1": {"architectures": ["BloomForCausalLM"], "hidden_size": 14336, "n_head": 112, "n_layer": 70, "vocab_size": 250880}, "huggyllama/llama-30b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "elyza/ELYZA-japanese-Llama-2-7b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "lcw99/t5-base-korean-text-summary": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50358}, "it5/it5-base-news-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32103}, "uer/gpt2-chinese-cluecorpussmall": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 21128}, "t5-11b": {"architectures": ["T5WithLMHeadModel"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "KoboldAI/LLaMA2-13B-Holomax": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Llama-2-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "bigscience/bloomz-3b": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 30, "num_attention_heads": 32, "vocab_size": 250880}, "lmsys/vicuna-7b-v1.5-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "sonoisa/t5-base-japanese": {"architectures": ["T5Model"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "line-corporation/japanese-large-lm-3.6b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 3072, "intermediate_size": 12288, "num_attention_heads": 32, "num_hidden_layers": 30, "vocab_size": 51200}, "TheBloke/Llama-2-7B-32K-Instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "EleutherAI/pythia-410m": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 1024, "intermediate_size": 4096, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "NousResearch/Llama-2-70b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TheBloke/falcon-7b-instruct-GPTQ": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "eachadea/vicuna-13b-1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "beomi/llama-2-ko-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 46336}, "TheBloke/falcon-40b-instruct-GPTQ": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "TheBloke/Llama-2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "garage-bAInd/Platypus2-70B-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "rajkumarrrk/gpt2-fine-tuned-on-imdb-positive-reviews": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "cerebras/Cerebras-GPT-13B": {"architectures": ["GPT2Model"], "n_embd": 5120, "n_head": 40, "n_inner": 20480, "n_layer": 40, "vocab_size": 50257}, "rinna/japanese-gpt2-medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": 4096, "n_layer": 24, "vocab_size": 32000}, "bigscience/T0pp": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "Phind/Phind-CodeLlama-34B-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "beomi/kykim-gpt3-kor-small_based_on_gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 42000}, "Pi3141/DialoGPT-medium-elon-3": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "EleutherAI/pythia-1b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 50304}, "ai-forever/rugpt3large_based_on_gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "jondurbin/airoboros-l2-13b-gpt4-m2.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "codellama/CodeLlama-13b-Python-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "AUTOMATIC/promptgen-lexart": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 6, "vocab_size": 50257}, "Salesforce/codet5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "h2oai/h2ogpt-oig-oasst1-512-6_9b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "rinna/japanese-gpt-neox-3.6b-instruction-ppo": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2816, "intermediate_size": 11264, "num_attention_heads": 22, "num_hidden_layers": 36, "vocab_size": 32000}, "prithivida/informal_to_formal_styletransfer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "TheBloke/Wizard-Vicuna-13B-Uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "matsuo-lab/weblab-10b-instruction-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4864, "intermediate_size": 19456, "num_attention_heads": 38, "num_hidden_layers": 36, "vocab_size": 50277}, "succinctly/text2image-prompt-generator": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TheBloke/Llama-2-7B-Chat-GGML": {}, "TheBloke/Llama-2-70B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "sentence-transformers/gtr-t5-large": {"architectures": ["T5EncoderModel"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "h2oai/h2ogpt-gm-oasst1-en-2048-falcon-40b-v2": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "togethercomputer/RedPajama-INCITE-Base-3B-v1": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "rinna/bilingual-gpt-neox-4b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2816, "intermediate_size": 11264, "num_attention_heads": 22, "num_hidden_layers": 36, "vocab_size": 65536}, "TheBloke/Llama-2-13B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "pankajmathur/orca_mini_v3_70b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "OpenAssistant/llama2-13b-orca-8k-3319": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "TheBloke/StableBeluga-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "defog/sqlcoder": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49152}, "WizardLM/WizardCoder-Python-13B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "stabilityai/stablelm-tuned-alpha-3b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 16, "vocab_size": 50688}, "cyberagent/open-calm-small": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 52096}, "TheBloke/Wizard-Vicuna-30B-Uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/WizardLM-70B-V1.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32001}, "bigscience/bigscience-small-testing": {"architectures": ["BloomModel"], "hidden_size": 64, "n_head": 8, "n_inner": null, "n_layer": 2, "vocab_size": 250880}, "cyberagent/open-calm-1b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 52096}, "lamini/lamini_docs_finetuned": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "EnglishVoice/t5-base-uk-to-us-english": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "codellama/CodeLlama-7b-Python-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/WizardLM-13B-V1.2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "EleutherAI/pythia-160m-deduped": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50304}, "jphme/Llama-2-13b-chat-german": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "EleutherAI/pythia-12b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50688}, "TheBloke/WizardLM-1.0-Uncensored-Llama2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Salesforce/codet5p-220m": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "google/mt5-xl": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 250112}, "cerebras/Cerebras-GPT-111M": {"n_inner": 3072, "n_embd": 768, "n_head": 12, "n_layer": 10, "vocab_size": 50257}, "google/t5-v1_1-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TheBloke/vicuna-7B-v1.5-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "chavinlo/alpaca-native": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "kimnt93/kmv-7b-03": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "NumbersStation/nsql-llama-2-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "cerebras/Cerebras-GPT-1.3B": {"n_inner": 8192, "n_embd": 2048, "n_head": 16, "n_layer": 24, "vocab_size": 50257}, "trl-internal-testing/tiny-T5ForConditionalGeneration-correct-vocab": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 32128}, "akreal/tiny-random-LlamaForCausalLM": {"architectures": ["LlamaForCausalLM"], "hidden_size": 16, "intermediate_size": 64, "num_attention_heads": 4, "num_hidden_layers": 2, "vocab_size": 32000}, "akreal/tiny-random-BloomForCausalLM": {"architectures": ["BloomForCausalLM"], "hidden_size": 32, "n_head": 4, "n_layer": 5, "vocab_size": 1024}, "NousResearch/Nous-Hermes-llama-2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ai-forever/rugpt3small_based_on_gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50264}, "VMware/open-llama-7b-v2-open-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "robertmyers/targon-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "TheBloke/Nous-Hermes-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "TheBloke/WizardLM-33B-V1.0-Uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/WizardLM-7B-uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "ramsrigouthamg/t5_paraphraser": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "tinkoff-ai/ruDialoGPT-medium": {"n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50261}, "OpenAssistant/falcon-7b-sft-mix-2000": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65040}, "bigcode/tiny_starcoder_py": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 20, "vocab_size": 49152}, "rinna/japanese-gpt-1b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 44928}, "TheBloke/orca_mini_v3_70B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "UBC-NLP/turjuman": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 110080}, "h2oai/h2ogpt-4096-llama2-70b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Phind/Phind-CodeLlama-34B-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "elyza/ELYZA-japanese-Llama-2-7b-fast-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 45043}, "iarfmoose/t5-base-question-generator": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32102}, "TheBloke/Llama-2-7B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "mrm8488/t5-base-finetuned-emotion": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "hkunlp/instructor-base": {"architectures": ["T5EncoderModel"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "fxmarty/onnx-tiny-random-gpt2-without-merge": {"architectures": ["GPT2Model"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "fxmarty/onnx-tiny-random-gpt2-with-merge": {"architectures": ["GPT2Model"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "microsoft/GODEL-v1_1-large-seq2seq": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32102}, "rinna/japanese-gpt-neox-3.6b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2816, "intermediate_size": 11264, "num_attention_heads": 22, "num_hidden_layers": 36, "vocab_size": 32000}, "cyberagent/open-calm-3b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 52224}, "eachadea/vicuna-7b-1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "beomi/KoAlpaca-Polyglot-5.8B": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 16, "num_hidden_layers": 28, "vocab_size": 30080}, "grammarly/coedit-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "TheBloke/Platypus2-70B-Instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "codellama/CodeLlama-34b-Python-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "noamwies/llama-test-gqa-with-better-transformer": {"architectures": ["LlamaForCausalLM"], "hidden_size": 128, "intermediate_size": 344, "num_attention_heads": 8, "num_hidden_layers": 2, "vocab_size": 2000}, "bigscience/bloomz-7b1-mt": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 30, "num_attention_heads": 32, "vocab_size": 250880}, "Salesforce/codet5p-770m": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "OpenAssistant/pythia-12b-sft-v8-7k-steps": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50288}, "augtoma/qCammel-70-x": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "NousResearch/Llama-2-13b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "plguillou/t5-base-fr-sum-cnndm": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32102}, "WeOpenML/PandaLM-7B-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "VMware/open-llama-7b-open-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "pankajmathur/orca_mini_v3_7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "google/t5-xl-lm-adapt": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "LinkSoul/Chinese-Llama-2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "line-corporation/japanese-large-lm-3.6b-instruction-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 3072, "intermediate_size": 12288, "num_attention_heads": 32, "num_hidden_layers": 30, "vocab_size": 51200}, "OpenAssistant/oasst-sft-1-pythia-12b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50288}, "ehartford/WizardLM-7B-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "upstage/llama-30b-instruct-2048": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "cyberagent/open-calm-large": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 1536, "intermediate_size": 6144, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 52096}, "Gryphe/MythoLogic-L2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "eenzeenee/t5-small-korean-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 50358}, "google/t5-xxl-lm-adapt": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "mywateriswet/ShuanBot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "hf-internal-testing/tiny-random-bloom": {"architectures": ["BloomModel"], "hidden_size": 64, "n_head": 8, "n_inner": null, "n_layer": 2, "vocab_size": 250880}, "TheBloke/Llama-2-13B-chat-GGML": {}, "decapoda-research/llama-30b-hf": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "lmsys/longchat-7b-v1.5-32k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ziqingyang/chinese-alpaca-2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 55296}, "nlpai-lab/kullm-polyglot-5.8b-v2": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 16, "num_hidden_layers": 28, "vocab_size": 30080}, "google/byt5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3968, "d_model": 1536, "num_heads": 12, "num_layers": 18, "vocab_size": 384}, "stabilityai/stablelm-tuned-alpha-7b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 48, "num_hidden_layers": 16, "vocab_size": 50432}, "PygmalionAI/pygmalion-1.3b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "stanford-crfm/BioMedLM": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2560, "n_head": 20, "n_inner": null, "n_layer": 32, "vocab_size": 28896}, "PY007/TinyLlama-1.1B-step-50K-105b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 2048, "intermediate_size": 5632, "num_attention_heads": 32, "num_hidden_layers": 22, "vocab_size": 32000}, "georgesung/llama2_7b_chat_uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "bigscience/mt0-small": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "TheBloke/WizardCoder-15B-1.0-GPTQ": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49153}, "google/t5-base-lm-adapt": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "OpenAssistant/falcon-40b-sft-top1-560": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65040}, "TheBloke/WizardLM-30B-Uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "TheBloke/WizardCoder-Python-34B-V1.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32001}, "garage-bAInd/Camel-Platypus2-70B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "DeepFloyd/t5-v1_1-xxl": {"architectures": ["T5EncoderModel"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "EleutherAI/pythia-1b-deduped": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 50304}, "TheBloke/CodeLlama-7B-Instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "kfkas/Llama-2-ko-7b-Chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 46336}, "valhalla/t5-small-qa-qg-hl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32102}, "FlagAlpha/Llama2-Chinese-13b-Chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Open-Orca/OpenOrcaxOpenChat-Preview2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "trl-internal-testing/tiny-random-LlamaForCausalLM": {"architectures": ["LlamaForCausalLM"], "hidden_size": 16, "intermediate_size": 64, "num_attention_heads": 4, "num_hidden_layers": 2, "vocab_size": 32000}, "abhishek/llama-2-7b-hf-small-shards": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "togethercomputer/RedPajama-INCITE-7B-Base": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "Salesforce/codegen25-7b-multi": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 51200}, "fabiochiu/t5-base-tag-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "MBZUAI/LaMini-Flan-T5-248M": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "bigscience/bloomz-1b7": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 24, "num_attention_heads": 16, "vocab_size": 250880}, "valhalla/t5-base-qg-hl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "Wi/gptp": {"architectures": ["GPTPModel"], "n_embd": 128, "n_head": 4, "n_inner": null, "n_layer": 4, "vocab_size": 1000}, "medalpaca/medalpaca-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "yentinglin/Taiwan-LLaMa-v1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "rinna/japanese-gpt-neox-small": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 44416}, "TheBloke/llama2_7b_chat_uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "EleutherAI/pythia-1.4b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "daryl149/llama-2-13b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "flax-community/gpt-2-spanish": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "KoboldAI/LLAMA2-13B-Holodeck-1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-13b-gpt4-1.4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "mrm8488/t5-base-finetuned-question-generation-ap": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "OpenBuddy/openbuddy-llama2-13b-v8.1-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 37632}, "EleutherAI/pythia-6.9b-deduped": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "tscholak/3vnuv1vf": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32102}, "OpenAssistant/llama2-70b-oasst-sft-v10": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32007}, "TheBloke/vicuna-13B-v1.5-16K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "OpenAssistant/falcon-7b-sft-top1-696": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65040}, "sentence-transformers/sentence-t5-large": {"architectures": ["T5EncoderModel"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TheBloke/Nous-Hermes-Llama2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "mesolitica/finetune-translation-t5-super-super-tiny-standard-bahasa-cased": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 512, "d_model": 128, "num_heads": 6, "num_layers": 2, "vocab_size": 32100}, "Henk717/spring-dragon": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "openchat/openchat_v3.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "WizardLM/WizardMath-70B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32002}, "potsawee/t5-large-generation-squad-QuestionAnswer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TheBloke/Phind-CodeLlama-34B-v2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "pankajmathur/orca_mini_3b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "fffrrt/ruGPT-3.5-13B-GPTQ": {"architectures": ["GPT2LMHeadModel"], "n_embd": 5120, "n_head": 40, "n_inner": null, "n_layer": 40, "vocab_size": 50272}, "kykim/gpt3-kor-small_based_on_gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 42000}, "PAIXAI/Astrid-1B-CPU": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 50304}, "ElnaggarLab/ankh-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3840, "d_model": 1536, "num_heads": 16, "num_layers": 48, "vocab_size": 144}, "togethercomputer/RedPajama-INCITE-7B-Chat": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "ramsrigouthamg/t5_squad_v1": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "KETI-AIR/ke-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 64128}, "sentence-transformers/gtr-t5-base": {"architectures": ["T5EncoderModel"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "ramsrigouthamg/t5-large-paraphraser-diverse-high-quality": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "rinna/japanese-gpt2-small": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 12, "vocab_size": 32000}, "rinna/bilingual-gpt-neox-4b-instruction-ppo": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2816, "intermediate_size": 11264, "num_attention_heads": 22, "num_hidden_layers": 36, "vocab_size": 65536}, "ramsrigouthamg/t5_boolean_questions": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "philschmid/flan-t5-base-samsum": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "google/t5-small-lm-adapt": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "matsuo-lab/weblab-10b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4864, "intermediate_size": 19456, "num_attention_heads": 38, "num_hidden_layers": 36, "vocab_size": 50304}, "stabilityai/stablecode-completion-alpha-3b-4k": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 49152}, "IDEA-CCNL/Ziya-LLaMA-7B-Reward": {"architectures": ["LlamaRewardModel"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "ichitaka/falcon-40b-instruct-8bit": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "TheBloke/WizardCoder-Python-13B-V1.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "togethercomputer/Pythia-Chat-Base-7B": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "TheBloke/wizardLM-7B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "AUTOMATIC/promptgen-majinai-unsafe": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 6, "vocab_size": 50257}, "pinkmanlove/llama-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "lmsys/longchat-13b-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "togethercomputer/RedPajama-INCITE-7B-Instruct": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "lmsys/vicuna-13b-v1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Salesforce/codet5-large": {"architectures": ["T5WithLMHeadModel"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "FredZhang7/anime-anything-promptgen-v2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 6, "vocab_size": 50257}, "Salesforce/xgen-7b-8k-inst": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 51200}, "jojo0217/step3_mk7": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 30008}, "EleutherAI/pythia-14m": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 128, "intermediate_size": 512, "num_attention_heads": 4, "num_hidden_layers": 6, "vocab_size": 50304}, "cerebras/Cerebras-GPT-590M": {"n_inner": 6144, "n_embd": 1536, "n_head": 12, "n_layer": 18, "vocab_size": 50257}, "dbmdz/german-gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50265}, "KoboldAI/GPT-NeoX-20B-Skein": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 64, "num_hidden_layers": 44, "vocab_size": 50432}, "beomi/polyglot-ko-12.8b-safetensors": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 30080}, "sentence-transformers/sentence-t5-base": {"architectures": ["T5EncoderModel"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "decapoda-research/llama-65b-hf": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "mesolitica/finetune-translation-t5-small-standard-bahasa-cased": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "marcsun13/bloom-1b7_with_lm_head": {"architectures": ["BloomForCausalLM"], "hidden_size": 2048, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "MBZUAI/LaMini-Flan-T5-783M": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "medalpaca/medalpaca-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "JulesBelveze/t5-small-headline-generator": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "Michau/t5-base-en-generate-headline": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "TheBloke/Falcon-180B-Chat-GPTQ": {"architectures": ["FalconForCausalLM"], "hidden_size": 14848, "num_attention_heads": 232, "num_hidden_layers": 80, "vocab_size": 65024}, "Salesforce/xgen-7b-8k-base": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 51200}, "ai-forever/ruT5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "KRAFTON/KORani-v3-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "bigscience/mt0-xxl-mt": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 250112}, "garage-bAInd/Stable-Platypus2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Wizard-Vicuna-13B-Uncensored-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "h2oai/h2ogpt-oasst1-512-12b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50688}, "Parth/result": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "declare-lab/flan-alpaca-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "sdadas/mt5-base-translator-en-pl": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250100}, "ziqingyang/chinese-llama-2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 55296}, "NousResearch/Nous-Hermes-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "pragmatic-programs/listener-suffix-idx-300k": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3584, "d_model": 1472, "num_heads": 6, "num_layers": 12, "vocab_size": 384}, "jinaai/jina-embedding-l-en-v1": {"architectures": ["T5EncoderModel"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "stabilityai/stablelm-base-alpha-3b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 16, "vocab_size": 50688}, "razent/SciFive-base-Pubmed_PMC": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "uer/gpt2-chinese-poem": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 22557}, "openchat/openchat_v3.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "IDEA-CCNL/Ziya-LLaMA-13B-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 39424}, "Sao10K/Mythical-Destroyer-V2-L2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "juierror/text-to-sql-with-table-schema": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "MingZhong/unieval-fact": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "TheBloke/vicuna-13B-v1.5-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "cerebras/Cerebras-GPT-256M": {"n_inner": 4352, "n_embd": 1088, "n_head": 17, "n_layer": 14, "vocab_size": 50257}, "declare-lab/flan-alpaca-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "ehartford/WizardLM-1.0-Uncensored-Llama2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "aubmindlab/aragpt2-base": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 64000}, "valhalla/t5-small-e2e-qg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32102}, "elinas/llama-7b-hf-transformers-4.29": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "lmsys/vicuna-13b-delta-v1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "garage-bAInd/Platypus2-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "PKU-Alignment/beaver-7b-v1.0-cost": {"architectures": ["LlamaModelForScore"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "allenai/unifiedqa-t5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "JackFram/llama-160m": {"architectures": ["LlamaForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 32000}, "daryl149/llama-2-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "akreal/tiny-random-t5": {"d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 99}, "cyberagent/open-calm-medium": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 1024, "intermediate_size": 4096, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 52096}, "The-Face-Of-Goonery/Huginn-13b-FP16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "facebook/tart-full-flan-t5-xl": {"architectures": ["EncT5ForSequenceClassification"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "csebuetnlp/banglat5_banglaparaphrase": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "FlagAlpha/Llama2-Chinese-7b-Chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "jerryjalapeno/Llama-2-1b-0-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 32000}, "NousResearch/Redmond-Puffin-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "bigscience/bloomz": {"architectures": ["BloomForCausalLM"], "n_layer": 70, "num_attention_heads": 112, "vocab_size": 250880}, "allenai/unifiedqa-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "WizardLM/WizardMath-7B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "pragmatic-programs/speaker-prefix-idx-300k": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3584, "d_model": 1472, "num_heads": 6, "num_layers": 12, "vocab_size": 384}, "TheBloke/CodeLlama-13B-Instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "TheBloke/Upstage-Llama-2-70B-instruct-v2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "pinkmanlove/llama-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "VietAI/envit5-translation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50048}, "cerebras/Cerebras-GPT-2.7B": {"n_inner": 10240, "n_embd": 2560, "n_head": 32, "n_layer": 32, "vocab_size": 50257}, "Open-Orca/LlongOrca-7B-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32003}, "hf-internal-testing/tiny-random-T5ForConditionalGeneration": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 32100}, "juierror/flan-t5-text2sql-with-schema-v2": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "BeIR/query-gen-msmarco-t5-base-v1": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "conceptofmind/LLongMA-2-13b-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "NousResearch/Yarn-Llama-2-13b-128k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "wangrongsheng/MiniGPT-4-LLaMA": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "hf-internal-testing/tiny-random-GPT2ForSequenceClassification": {"architectures": ["GPT2ForSequenceClassification"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "zenham/wail_m_e4_16h_2k": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "h2oai/h2ogpt-4096-llama2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ai-forever/FRED-T5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 50364}, "FreedomIntelligence/phoenix-inst-chat-7b": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "castorini/monot5-base-msmarco": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "minlik/chinese-alpaca-plus-7b-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 49954}, "joaogante/tiny-random-gpt2-with-generation-config": {"intermediate_size": 37, "n_embd": 32, "n_head": 4, "n_inner": null, "n_layer": 5, "vocab_size": 1000}, "neulab/gpt2-finetuned-wikitext103": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "jarradh/llama2_70b_chat_uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TigerResearch/tigerbot-13b-base": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 60928}, "rinna/japanese-gpt-neox-3.6b-instruction-sft-v2": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2816, "intermediate_size": 11264, "num_attention_heads": 22, "num_hidden_layers": 36, "vocab_size": 32000}, "bofenghuang/vigogne-2-7b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/stable-vicuna-13B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "aiplanet/effi-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-33b-gpt4-m2.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/orca_mini_v3_13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "HuggingFaceH4/starchat-alpha": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49156}, "WizardLM/WizardMath-13B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "upstage/Llama-2-70b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "anushehchaudry/llama-2-tiny-random": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8, "intermediate_size": 32, "num_attention_heads": 2, "num_hidden_layers": 1, "vocab_size": 32000}, "fangloveskari/ORCA_LLaMA_70B_QLoRA": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "HyperbeeAI/Tulpar-7b-v0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Llama-2-70B-Chat-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "csebuetnlp/mT5_m2m_crossSum_enhanced": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "TheBloke/Genz-70b-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "cerebras/Cerebras-GPT-6.7B": {"n_embd": 4096, "vocab_size": 50257, "n_layer": 32, "n_head": 32, "n_inner": 16384}, "ziqingyang/chinese-alpaca-2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 55296}, "google/t5-small-ssm-nq": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "EleutherAI/polyglot-ko-3.8b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 3072, "intermediate_size": 12288, "num_attention_heads": 24, "num_hidden_layers": 32, "vocab_size": 30080}, "kashif/stack-llama-2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "line-corporation/japanese-large-lm-1.7b": {"vocab_size": 51200, "n_embd": 2304, "n_layer": 24, "n_head": 24, "n_inner": 9216, "architectures": ["GPT2LMHeadModel"]}, "microsoft/codereviewer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32216}, "TheBloke/guanaco-7B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "circulus/Llama-2-7b-orca-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "FlagAlpha/Atom-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 65000}, "Tap-M/Luna-AI-Llama2-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "K024/mt5-zh-ja-en-trimmed": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 85292}, "deep-learning-analytics/automatic-title-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "luodian/llama-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "stabilityai/stablelm-base-alpha-7b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 48, "num_hidden_layers": 16, "vocab_size": 50432}, "OpenLemur/lemur-70b-chat-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32005}, "rahular/varta-t5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 128128}, "rinna/japanese-gpt-neox-3.6b-instruction-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2816, "intermediate_size": 11264, "num_attention_heads": 22, "num_hidden_layers": 36, "vocab_size": 32000}, "garage-bAInd/Platypus-30B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "WizardLM/WizardCoder-Python-7B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "chavinlo/gpt4-x-alpaca": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "sentence-transformers/gtr-t5-xl": {"architectures": ["T5EncoderModel"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "wangrongsheng/MiniGPT-4-LLaMA-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "EleutherAI/pythia-12b-deduped": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50688}, "unicamp-dl/translation-pt-en-t5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "bigscience/mt0-base": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "Pirr/pythia-13b-deduped-green_devil": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50277}, "trl-internal-testing/tiny-random-GPT2Model": {"architectures": ["GPT2Model"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "MBZUAI/LaMini-GPT-1.5B": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_inner": null, "n_layer": 48, "vocab_size": 50258}, "Universal-NER/UniNER-7B-all": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/koala-13B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Rostlab/prot_t5_xl_bfd": {"architectures": ["T5WithLMHeadModel"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 128}, "Voicelab/trurl-2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "explosion-testing/llama2-kv-sharing": {"architectures": ["LlamaForCausalLM"], "hidden_size": 256, "intermediate_size": 512, "num_attention_heads": 4, "num_hidden_layers": 5, "vocab_size": 1024}, "inpars/monot5-3b-inpars-v2-nq-promptagator": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "upstage/llama-65b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "microsoft/CodeGPT-small-py": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50001}, "VietAI/vit5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 36096}, "TheBloke/CodeUp-Llama-2-13B-Chat-HF-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/CodeLlama-34B-Instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "elyza/ELYZA-japanese-Llama-2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "jondurbin/airoboros-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/CodeLlama-7B-Python-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "FlagAlpha/Llama2-Chinese-13b-Chat-4bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/CodeLlama-13B-Python-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Enoch/llama-65b-hf": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "chargoddard/platypus-2-22b-relora": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "togethercomputer/GPT-NeoXT-Chat-Base-20B": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 64, "num_hidden_layers": 44, "vocab_size": 50432}, "porkorbeef/Llama-2-13b-sf": {"architectures": ["LlamaModel"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "ehartford/Wizard-Vicuna-13B-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "doas/test5": {"architectures": ["LlamaModel"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "klosax/open_llama_3b_350bt_preview": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "Writer/camel-5b-hf": {"architectures": ["GPT2LMHeadModel"], "n_embd": 4096, "n_head": 32, "n_inner": 16384, "n_layer": 24, "vocab_size": 50258}, "Filosofas/DialoGPT-medium-PALPATINE2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "nomic-ai/gpt4all-falcon": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "reciprocate/llama2-7b-gsm8k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/CodeLlama-13B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "pankajmathur/orca_mini_v3_13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "projecte-aina/aguila-7b": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 50257}, "TheBloke/WizardLM-13B-V1.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "MBZUAI/LaMini-GPT-124M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50258}, "google/mt5-xxl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 250112}, "MaRiOrOsSi/t5-base-finetuned-question-answering": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "satvikag/chatbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "LMFlow/Robin-7b-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "daryl149/llama-2-70b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "acrastt/Puma-3B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/orca_mini_v3_7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "taeminlee/kogpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50000}, "NousResearch/Llama-2-70b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "rinna/japanese-gpt2-xsmall": {"architectures": ["GPT2LMHeadModel"], "n_embd": 512, "n_head": 8, "n_inner": 2304, "n_layer": 6, "vocab_size": 32000}, "ziqingyang/chinese-llama-2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 55296}, "hf-internal-testing/tiny-random-t5-v1.1": {"d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 1103}, "pankajmathur/Lima_Unchained_70b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "chargoddard/llama2-22b-blocktriangular": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/CodeLlama-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "BeIR/query-gen-msmarco-t5-large-v1": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TheBloke/CodeLlama-13B-Instruct-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "acrastt/Marx-3B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "PygmalionAI/pygmalion-2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "shibing624/chinese-alpaca-plus-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 49954}, "TheBloke/OpenOrcaxOpenChat-Preview2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "syzymon/long_llama_3b_instruct": {"architectures": ["LongLlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "bofenghuang/vigogne-2-7b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Gustavosta/MagicPrompt-Dalle": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "muchad/idt5-qa-qg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 30002}, "TheBloke/vicuna-13b-v1.3.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TigerResearch/tigerbot-13b-base-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 60928}, "ehartford/WizardLM-13B-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "clibrain/Llama-2-7b-ft-instruct-es": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "google/t5_xxl_true_nli_mixture": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "unikei/t5-base-split-and-rephrase": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "microsoft/Promptist": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50257}, "stas/mt5-tiny-random": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 256, "d_model": 64, "num_heads": 4, "num_layers": 8, "vocab_size": 5100}, "AIDC-ai-business/Luban-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "microsoft/GODEL-v1_1-base-seq2seq": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32102}, "CalderaAI/30B-Lazarus": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "acrastt/Marx-3B-V2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "h2oai/h2ogpt-4096-llama2-70b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "ajibawa-2023/scarlett-33b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "jondurbin/airoboros-l2-70b-2.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "rubentito/vt5-base-spdocvqa": {"architectures": ["HF_VT5"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "aisquared/dlite-v2-774m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50260}, "elyza/ELYZA-japanese-Llama-2-7b-fast": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 45043}, "quantumaikr/llama-2-70b-fb16-korean": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TheBloke/CodeLlama-34B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "microsoft/DialogRPT-updown": {"architectures": ["GPT2ForSequenceClassification"], "n_embd": 1024, "n_head": 16, "n_layer": 24, "vocab_size": 50257}, "TheBloke/CodeLlama-34B-Instruct-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "garage-bAInd/Platypus2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "trl-internal-testing/tiny-BloomForCausalLM-correct-vocab": {"architectures": ["BloomForCausalLM"], "hidden_size": 32, "n_head": 4, "n_layer": 5, "vocab_size": 250880}, "TheBloke/Llama-2-7B-GGML": {}, "TheBloke/Wizard-Vicuna-7B-Uncensored-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "wenge-research/yayi-7b-llama2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32005}, "coffeeee/nsfw-story-generator2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "jondurbin/airoboros-33b-gpt4-2.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "totally-not-an-llm/EverythingLM-13b-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "datificate/gpt2-small-spanish": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "mrm8488/t5-base-finetuned-wikiSQL": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "bofenghuang/vigogne-2-13b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "OpenAssistant/stablelm-7b-sft-v7-epoch-3": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 48, "num_hidden_layers": 16, "vocab_size": 50288}, "bhenrym14/airoboros-33b-gpt4-1.4.1-lxctx-PI-16384-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "flozi00/codellama-34b-german-assistant-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "WizardLM/WizardCoder-1B-V1.0": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 49153}, "upstage/llama-30b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "ehartford/dolphin-llama2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Open-Orca/LlongOrca-13B-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32004}, "NousResearch/Nous-Hermes-Llama2-70b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32001}, "ml6team/mt5-small-german-query-generation": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "bigscience/mt0-xxl": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 250112}, "EleutherAI/pythia-2.8b-deduped-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50304}, "TheBloke/wizardLM-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "conceptofmind/LLongMA-2-7b-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "lmsys/vicuna-7b-delta-v1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "bofenghuang/vigogne-7b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "csebuetnlp/banglat5_nmt_en_bn": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "trl-internal-testing/tiny-random-T5Model": {"architectures": ["T5Model"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 1302}, "OpenBuddy/openbuddy-llama2-70b-v10.1-bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 37632}, "TheBloke/wizard-vicuna-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "JosephusCheung/Guanaco": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "openchat/opencoderplus": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49153}, "jacobmorrison/tk-instruct-large-lora-experiments": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "PygmalionAI/metharme-1.3b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "TheBloke/orca_mini_13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "EleutherAI/pythia-70m-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "project-baize/baize-v2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "line-corporation/japanese-large-lm-1.7b-instruction-sft": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2304, "n_head": 24, "n_inner": 9216, "n_layer": 24, "vocab_size": 51200}, "TheBloke/WizardLM-13B-V1-0-Uncensored-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/llama-2-70b-Guanaco-QLoRA-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "MBZUAI/LaMini-Flan-T5-77M": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "csebuetnlp/banglat5_nmt_bn_en": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Trelis/Llama-2-7b-chat-hf-function-calling-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ehartford/Wizard-Vicuna-7B-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "llSourcell/medllama2_7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "WizardLM/WizardLM-13B-V1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "Gryphe/MythoMix-L2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/StableBeluga2-70B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "VietAI/vit5-large-vietnews-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 36096}, "adasnew/t5-small-xsum": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "Intel/t5-small-xsum-int8-dynamic": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "daspartho/prompt-extend": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 52000}, "EleutherAI/pythia-160m-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50304}, "Ar4ikov/gpt2-650k-stable-diffusion-prompt-generator": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "ehartford/WizardLM-Uncensored-Falcon-7b": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65025}, "CobraMamba/mamba-gpt-3b-v3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/llama2_70b_chat_uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "ai-forever/FRED-T5-1.7B": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1536, "num_heads": 24, "num_layers": 24, "vocab_size": 50364}, "MBZUAI/LaMini-Cerebras-590M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 12, "n_inner": 6144, "n_layer": 18, "vocab_size": 50258}, "mrm8488/llama-2-coder-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "guardrail/llama-2-7b-guanaco-instruct-sharded": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "rinna/bilingual-gpt-neox-4b-8k": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2816, "intermediate_size": 11264, "num_attention_heads": 22, "num_hidden_layers": 36, "vocab_size": 65536}, "mrm8488/falcoder-7b": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "circulus/Llama-2-13b-orca-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "allenai/tk-instruct-3b-def": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32100}, "pierreguillou/gpt2-small-portuguese": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50257}, "junelee/wizard-vicuna-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "castorini/monot5-3b-msmarco-10k": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "TheBloke/Llama-2-70B-Chat-GGML": {}, "TheBloke/CodeLlama-7B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "yeontaek/llama-2-13B-ensemble-v5": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "ybelkada/flan-t5-xl-sharded-bf16": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "WizardLM/WizardCoder-3B-V1.0": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 2816, "n_head": 22, "n_inner": 11264, "n_layer": 36, "vocab_size": 49153}, "Langboat/mengzi-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "MBZUAI/LaMini-GPT-774M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50258}, "ToddGoldfarb/Cadet-Tiny": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "TigerResearch/tigerbot-7b-base": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 60928}, "UrukHan/t5-russian-spell": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "LinkSoul/Chinese-Llama-2-7b-4bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Vicuna-13B-CoT-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "EleutherAI/pythia-1.4b-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "MayaPH/GodziLLa2-70B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TheBloke/wizardLM-13B-1.0-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "Gryphe/MythoBoros-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "abacusai/Giraffe-v2-13b-32k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-l2-13b-gpt4-1.4.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "razent/SciFive-base-Pubmed": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "TehVenom/Pygmalion-13b-Merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "garage-bAInd/SuperPlatty-30B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "jondurbin/airoboros-l2-7b-gpt4-2.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Rostlab/ProstT5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 150}, "TheBloke/guanaco-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "JackFram/llama-68m": {"architectures": ["LlamaForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 2, "vocab_size": 32000}, "MBZUAI/LaMini-Cerebras-111M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 10, "vocab_size": 50258}, "ehartford/Wizard-Vicuna-30B-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "stockmark/gpt-neox-japanese-1.4b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50000}, "TheBloke/MythoMax-L2-13B-GGML": {}, "MBZUAI/LaMini-Cerebras-256M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1088, "n_head": 17, "n_inner": 4352, "n_layer": 14, "vocab_size": 50258}, "jondurbin/airoboros-l2-13b-gpt4-2.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "The-Face-Of-Goonery/Chronos-Beluga-v2-13bfp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "lmqg/t5-base-squad-qag": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "Voicelab/trurl-2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "ehartford/Samantha-1.11-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "clibrain/Llama-2-13b-ft-instruct-es": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "deepse/CodeUp-Llama-2-13b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "mrm8488/t5-base-finetuned-sarcasm-twitter": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "ToolBench/ToolLLaMA-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "marella/gpt-2-ggml": {}, "Henk717/airochronos-33B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "stanford-crfm/alias-gpt2-small-x21": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "WizardLM/WizardLM-30B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "timdettmers/guanaco-33b-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "xkianteb/alg_ppo_separate_lr_1e-6_n_epochs_10_v_epochs_10_kl_target_1.0_clip_range_0.2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TheBloke/wizard-mega-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "bigscience/mt0-xl": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 250112}, "luffycodes/nash-vicuna-13b-v1dot5-ep2-w-rag-w-simple": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "h2oai/h2ogpt-oig-oasst1-256-6_9b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "fabiochiu/t5-base-medium-title-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "OpenAssistant/falcon-40b-sft-mix-1226": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65040}, "Writer/palmyra-base": {"architectures": ["GPT2LMHeadModel"], "n_embd": 4096, "n_head": 32, "n_inner": 16384, "n_layer": 24, "vocab_size": 50257}, "TheBloke/llama-2-70b-Guanaco-QLoRA-GGML": {}, "Rostlab/prot_t5_base_mt_uniref50": {"architectures": ["T5WithLMHeadModel"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 256}, "Lajonbot/Llama-2-13b-hf-instruct-pl-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "WizardLM/WizardLM-7B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "pankajmathur/orca_mini_7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "yhyhy3/open_llama_7b_v2_med_instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "NousResearch/CodeLlama-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "OpenBuddy/openbuddy-llama2-13b-v11.1-bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 37632}, "hf-internal-testing/tiny-random-GPT2ForQuestionAnswering": {"architectures": ["GPT2ForQuestionAnswering"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "explosion-testing/llama2-fewer-kv-heads": {"architectures": ["LlamaForCausalLM"], "hidden_size": 256, "intermediate_size": 512, "num_attention_heads": 4, "num_hidden_layers": 5, "vocab_size": 1024}, "hetpandya/t5-base-tapaco": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "PygmalionAI/pygmalion-2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "mrm8488/t5-base-finetuned-imdb-sentiment": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "explosion-testing/falcon-test": {"architectures": ["FalconForCausalLM"], "hidden_size": 32, "num_attention_heads": 4, "num_hidden_layers": 5, "vocab_size": 1024}, "ehartford/WizardLM-33B-V1.0-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/StableBeluga-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/WizardLM-Uncensored-SuperCOT-StoryTelling-30B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "jinaai/jina-embedding-s-en-v1": {"architectures": ["T5EncoderModel"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "FelixChao/vicuna-33b-coder": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/llama-30b-supercot-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "quantumaikr/llama-2-70b-fb16-orca-chat-10k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TheBloke/airoboros-l2-13B-gpt4-1.4.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "EleutherAI/pythia-31m": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 256, "intermediate_size": 1024, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "hf-internal-testing/tiny-random-GPT2ForTokenClassification": {"architectures": ["GPT2ForTokenClassification"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "jondurbin/airoboros-l2-70b-gpt4-1.4.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "kimsan0622/gpt2-medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 64007}, "TheBloke/EverythingLM-13B-16K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Linly-AI/Chinese-LLaMA-2-13B-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 40076}, "BlackSamorez/rudialogpt3_medium_based_on_gpt2_2ch": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "EleutherAI/pythia-2.8b-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50304}, "TheBloke/llama-2-7B-Guanaco-QLoRA-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "google/byt5-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 6720, "d_model": 2560, "num_heads": 32, "num_layers": 36, "vocab_size": 384}, "TheBloke/wizard-vicuna-13B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TehVenom/Pygmalion-Vicuna-1.1-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "openaccess-ai-collective/wizard-mega-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-l2-7b-gpt4-m2.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "openchat/openchat_v3.2_super": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "openaccess-ai-collective/manticore-13b-chat-pyg": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Neko-Institute-of-Science/pygmalion-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "unicamp-dl/ptt5-small-portuguese-vocab": {"architectures": ["T5WithLMHeadModel"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "hf-internal-testing/tiny-random-T5ForQuestionAnswering": {"architectures": ["T5ForQuestionAnswering"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 32100}, "microsoft/CodeGPT-small-java-adaptedGPT2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50260}, "unicamp-dl/ptt5-base-portuguese-vocab": {"architectures": ["T5WithLMHeadModel"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Fredithefish/ScarletPajama-3B-HF": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "hf-internal-testing/tiny-random-T5ForSequenceClassification": {"architectures": ["T5ForSequenceClassification"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 32100}, "TheBloke/Nous-Hermes-Llama-2-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "calvindoingstuff/DialoGPT-medium-luffy": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "lvkaokao/llama2-7b-hf-chat-lora-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "skt/ko-gpt-trinity-1.2B-v0.5": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1920, "n_head": 16, "n_inner": 7680, "n_layer": 24, "vocab_size": 51200}, "saibo/llama-1B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 2, "vocab_size": 32000}, "vonjack/Qwen-LLaMAfied-HFTok-7B-Chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 151936}, "TheBloke/CodeLlama-34B-Python-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "GAIR/rst-all-11b": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "GeorgiaTechResearchInstitute/starcoder-gpteacher-code-instruct": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49153}, "jondurbin/airoboros-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "aisquared/dlite-v2-1_5b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_inner": null, "n_layer": 48, "vocab_size": 50260}, "aiassociates/t5-small-grammar-correction-german": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "asi/gpt-fr-cased-small": {"n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50000}, "csebuetnlp/mT5_m2o_chinese_simplified_crossSum": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "openthaigpt/openthaigpt-1.0.0-alpha-7b-chat-ckpt-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "jondurbin/airoboros-l2-13b-2.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "sentence-transformers/sentence-t5-xl": {"architectures": ["T5EncoderModel"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "OpenBuddy/openbuddy-openllama-3b-v10-bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 37120}, "TheBloke/guanaco-33B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "h2oai/h2ogpt-oasst1-512-20b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 64, "num_hidden_layers": 44, "vocab_size": 50432}, "Open-Orca/OpenOrca-Preview1-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "WizardLM/WizardLM-13B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "garage-bAInd/Camel-Platypus2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "wxjiao/alpaca-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "TheBloke/Wizard-Vicuna-13B-Uncensored-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "FelixChao/vicuna-7B-chemical": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Arc53/docsgpt-14b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "OpenAssistant/llama2-13b-megacode2-oasst": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32007}, "TheBloke/Lemur-70B-Chat-v1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32005}, "EleutherAI/pythia-6.9b-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "grimpep/L2-MythoMax22b-instruct-Falseblock": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "Austism/chronos-hermes-13b-v2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "UBC-NLP/AraT5v2-base-1024": {"d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 110208}, "fireballoon/baichuan-vicuna-chinese-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 64000}, "abeja/gpt2-large-japanese": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 32000}, "TheBloke/Airoboros-L2-70B-2.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "vicgalle/gpt2-alpaca-gpt4": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50260}, "flax-community/gpt2-small-indonesian": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TheBloke/Nous-Hermes-13B-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "imone/LLaMA2_13B_with_EOT_token": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "Corianas/111m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 10, "vocab_size": 50257}, "The-Face-Of-Goonery/Huginn-v3-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ehartford/Samantha-1.11-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "heegyu/WizardVicuna-3B-0719": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "acrastt/Griffin-3B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "totally-not-an-llm/EverythingLM-13b-V2-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ikala/bloom-zh-3b-chat": {"architectures": ["BloomForCausalLM"], "hidden_size": 2560, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250688}, "Gryphe/MythoLogic-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "AlekseyKorshuk/vicuna-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "flax-community/gpt2-medium-persian": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50000}, "ehartford/samantha-1.1-llama-33b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "garage-bAInd/Platypus2-70B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "OpenLemur/lemur-70b-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32024}, "ausboss/llama-30b-supercot": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "jondurbin/airoboros-l2-70b-gpt4-m2.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "lmqg/mt5-small-koquad-qg-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "TheBloke/OpenAssistant-SFT-7-Llama-30B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32016}, "TheBloke/h2ogpt-gm-oasst1-en-2048-falcon-40b-v2-GPTQ": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "GOAT-AI/GOAT-7B-Community": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "h2oai/h2ogpt-gm-oasst1-en-1024-20b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 64, "num_hidden_layers": 44, "vocab_size": 50432}, "beaugogh/pythia-1.4b-deduped-sharegpt": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50280}, "amurshak/llama-2-7b-miniguanaco": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "psyche/kollama2-7b-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "IlyaGusev/fred_t5_ru_turbo_alpaca": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1536, "num_heads": 24, "num_layers": 24, "vocab_size": 50365}, "potsawee/t5-large-generation-race-Distractor": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "heegyu/WizardVicuna-Uncensored-3B-0719": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/openchat_v2_openorca_preview-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "CalderaAI/13B-Legerdemain-L2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "SebastianSchramm/Cerebras-GPT-111M-instruction": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 10, "vocab_size": 50258}, "Mikael110/llama-2-7b-guanaco-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Locutusque/gpt2-large-conversational": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50260}, "CalderaAI/13B-Ouroboros": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "chaoyi-wu/MedLLaMA_13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "YeungNLP/firefly-llama2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "garage-bAInd/GPlatty-30B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "pankajmathur/orca_mini_v2_13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "pankajmathur/model_007_13b_v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "chargoddard/Chronorctypus-Limarobormes-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "timdettmers/guanaco-65b-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "digitous/13B-HyperMantis": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ckiplab/gpt2-base-chinese": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 21128}, "ehartford/dolphin-llama-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jphme/orca_mini_v2_ger_7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "malhajar/Platypus2-70B-instruct-4bit-gptq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "heegyu/WizardVicuna-open-llama-3b-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "pankajmathur/model_007": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "vicgalle/gpt2-alpaca": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50260}, "h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-300bt-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "stabilityai/stablecode-completion-alpha-3b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 49152}, "aisquared/dlite-v2-355m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50260}, "google/byt5-xxl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 12352, "d_model": 4672, "num_heads": 64, "num_layers": 36, "vocab_size": 384}, "ehartford/Samantha-1.11-CodeLlama-34b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "h2oai/h2ogpt-gm-oasst1-multilang-1024-20b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 64, "num_hidden_layers": 44, "vocab_size": 50432}, "TheBloke/koala-7B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ehartford/WizardLM-30B-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "clibrain/Llama-2-ft-instruct-es": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "quantumaikr/llama-2-70b-fb16-guanaco-1k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "psyche/kogpt": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 32002}, "wenge-research/yayi-7b": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250684}, "Aspik101/WizardVicuna-Uncensored-3B-instruct-PL-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "rombodawg/LosslessMegaCoder-llama2-7b-mini": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32007}, "TurkuNLP/gpt3-finnish-medium": {"architectures": ["BloomModel"], "hidden_size": 1024, "n_head": 16, "n_layer": 24, "vocab_size": 131072}, "pankajmathur/orca_mini_13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Mikael110/llama-2-13b-guanaco-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "totally-not-an-llm/PuddleJumper-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "jondurbin/airoboros-13b-gpt4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "CobraMamba/mamba-gpt-3b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "zarakiquemparte/zarablend-l2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Locutusque/gpt2-conversational-or-qa": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50262}, "frank098/Wizard-Vicuna-13B-juniper": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-gpt-3.5-turbo-100k-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "jondurbin/airoboros-33b-gpt4-1.4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "jondurbin/airoboros-l2-70b-gpt4-2.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "MBZUAI/LaMini-Cerebras-1.3B": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 50258}, "h2oai/h2ogpt-research-oasst1-llama-65b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "nkpz/llama2-22b-daydreamer-v3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "Aspik101/trurl-2-13b-pl-instruct_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "OpenAssistant/pythia-12b-pre-v8-12.5k-steps": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50288}, "breadlicker45/dough-instruct-base-001": {"architectures": ["LlamaForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50402}, "OpenBuddy/openbuddy-llama-30b-v7.1-bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 37632}, "andreaskoepf/llama2-13b-megacode2_min100": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32007}, "ehartford/Samantha-1.11-70b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "flax-community/t5-recipe-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "BreadAi/PM_modelV2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1088, "n_head": 17, "n_inner": 4352, "n_layer": 14, "vocab_size": 50257}, "minlik/chinese-alpaca-33b-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 49954}, "jordiclive/Llama-2-70b-oasst-1-200": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32016}, "Lajonbot/tableBeluga-7B-instruct-pl-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "sia-ai/llama-2-7b-1-percent-open-orca-1000-steps-v0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "h2oai/h2ogpt-gm-oasst1-en-1024-12b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50688}, "jondurbin/airoboros-33b-gpt4-1.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "openchat/openchat_8192": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "TaylorAI/Flash-Llama-3B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "yeontaek/llama-2-13B-ensemble-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Kirili4ik/ruDialoGpt3-medium-finetuned-telegram": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "WangZeJun/bloom-820m-chat": {"architectures": ["BloomForCausalLM"], "hidden_size": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 46145}, "4bit/Llama-2-70b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "chargoddard/llama2-22b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "augtoma/qCammel-13": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "NlpHUST/gpt2-vietnamese": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Monero/Manticore-13b-Chat-Pyg-Guanaco": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "NousResearch/CodeLlama-34b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "aisquared/dlite-v2-124m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50260}, "pankajmathur/orca_mini_v2_7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "The-Face-Of-Goonery/Huginn-22b-Prototype": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "DevaMalla/llama7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "openaccess-ai-collective/manticore-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "nkpz/llama2-22b-chat-wizard-uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "davzoku/cria-llama2-7b-v1.3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TaylorAI/Flash-Llama-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Fredithefish/ReasonixPajama-3B-HF": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "TheBloke/Platypus-30B-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "LoupGarou/WizardCoder-Guanaco-15B-V1.1": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49153}, "TheBloke/guanaco-65B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "hakurei/lotus-12B": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50688}, "bofenghuang/vigogne-33b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "grimpep/llama2-22B-GPLATTY": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "concedo/Pythia-70M-ChatSalad": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50278}, "rombodawg/LosslessMegaCoder-llama2-13b-mini": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32007}, "TaylorAI/Flash-Llama-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/chronos-wizardlm-uc-scot-st-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "OpenBuddy/openbuddy-llama-65b-v8-bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 37632}, "ajibawa-2023/scarlett-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/medalpaca-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "elinas/chronos-33b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "OpenBuddy/openbuddy-atom-13b-v9-bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 49953}, "OpenAssistant/pythia-12b-sft-v8-rlhf-2k-steps": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50282}, "TheTravellingEngineer/llama2-7b-chat-hf-v4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Ejafa/vicuna_7B_vanilla_1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "yulan-team/YuLan-Chat-2-13b-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 51200}, "huashiyiqike/testmodel": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 10, "vocab_size": 50257}, "TheBloke/WizardLM-30B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "notstoic/PygmalionCoT-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Vicuna-33B-1-3-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "FelixChao/vicuna-7B-physics": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/tulu-30B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "jondurbin/airoboros-65b-gpt4-1.4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "uukuguy/speechless-llama2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "digitous/13B-Chimera": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-7b-gpt4-1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "grimpep/llama2-28B-Airo03": {"architectures": ["LlamaForCausalLM"], "hidden_size": 7296, "intermediate_size": 22016, "num_attention_heads": 57, "num_hidden_layers": 40, "vocab_size": 32000}, "ehartford/CodeLlama-34b-Instruct-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "YeungNLP/firefly-ziya-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 39424}, "TheTravellingEngineer/bloom-560m-RLHF-v2": {"architectures": ["BloomForCausalLM"], "hidden_size": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "TheTravellingEngineer/llama2-7b-chat-hf-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "uukuguy/speechless-hermes-coig-lite-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "BreadAi/gpt-Youtube": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50304}, "Aspik101/llama-30b-instruct-2048-PL-lora": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "beaugogh/Llama2-13b-sharegpt4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "gaodrew/gaodrew-gorgonzola-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "OpenBuddy/openbuddy-llama2-13b-v11-bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 37632}, "TheBloke/guanaco-13B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/WizardLM-13B-V1-1-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "NousResearch/CodeLlama-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "BreadAi/MusePy-1-2": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "jondurbin/airoboros-33b-gpt4-1.3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "YeungNLP/firefly-bloom-7b1": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "grimpep/llama2-22b-wizard_vicuna": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "Fredithefish/Guanaco-3B-Uncensored": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "digitous/Alpacino13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "mncai/SGPT-1.3B-insurance-epoch10": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 30080}, "TheTravellingEngineer/llama2-7b-chat-hf-dpo": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "yeontaek/Platypus2xOpenOrca-13B-LoRa": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "TheBloke/gpt4-alpaca-lora-30b-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "bhenrym14/airophin-13b-pntk-16k-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Kimiko-13B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "porkorbeef/Llama-2-13b-12_153950": {"architectures": ["LlamaModel"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "PSanni/Deer-3b": {"architectures": ["BloomForCausalLM"], "hidden_size": 2560, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250683}, "IGeniusDev/llama13B-quant8-testv1-openorca-customdataset": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Neko-Institute-of-Science/metharme-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "alibidaran/medical_transcription_generator": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Panchovix/airoboros-33b-gpt4-1.2-SuperHOT-8k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "digitous/Alpacino30b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "lgaalves/gpt2-dolly": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TaylorAI/FLAN-Llama-7B-2_Llama2-7B-Flash_868_full_model": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "zarakiquemparte/zarafusionex-1.1-l2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "OpenAssistant/pythia-12b-sft-v8-2.5k-steps": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50288}, "TheBloke/airoboros-13B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/robin-33B-v2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "Aspik101/trurl-2-7b-pl-instruct_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "llama-anon/petra-13b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "TFLai/gpt2-turkish-uncased": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "health360/Healix-3B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/Mythalion-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-300bt": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "pe-nlp/llama-2-13b-vicuna-wizard": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "yeontaek/Platypus2-13B-QLoRa": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "acrastt/OmegLLaMA-3B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "jslin09/bloom-560m-finetuned-fraud": {"architectures": ["BloomForCausalLM"], "hidden_size": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "YeungNLP/firefly-bloom-2b6-v2": {"architectures": ["BloomForCausalLM"], "hidden_size": 2560, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 46145}, "xzuyn/LLaMa-1-MedicWizard-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "Azure99/blossom-v2-3b": {"architectures": ["BloomForCausalLM"], "hidden_size": 2560, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "TheBloke/Airoboros-L2-13B-2.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "MetaIX/GPT4-X-Alpasta-30b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32016}, "PocketDoc/Dans-PersonalityEngine-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "vicgalle/alpaca-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Corianas/590m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 12, "n_inner": 6144, "n_layer": 18, "vocab_size": 50257}, "OpenBuddy/openbuddy-openllama-13b-v7-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 38656}, "gywy/llama2-13b-chinese-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 49376}, "Corianas/Quokka_590m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 12, "n_inner": 6144, "n_layer": 18, "vocab_size": 50260}, "aisquared/dlite-v1-355m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50260}, "aisquared/dlite-v1-774m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50260}, "Fredithefish/RedPajama-INCITE-Chat-3B-Instruction-Tuning-with-GPT-4": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "project-baize/baize-v2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Project-Baize-v2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "FabbriSimo01/GPT_Large_Quantized": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50257}, "ajibawa-2023/carl-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Azure99/blossom-v1-3b": {"architectures": ["BloomForCausalLM"], "hidden_size": 2560, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "Aspik101/30B-Lazarus-instruct-PL-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "yeontaek/Platypus2xOpenOrca-13B-IA3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "Enno-Ai/ennodata-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "FabbriSimo01/Cerebras_1.3b_Quantized": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 50257}, "migtissera/Synthia-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "pe-nlp/llama-2-13b-platypus-vicuna-wizard": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "yeontaek/Platypus2xOpenOrca-13B-IA3-ensemble": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "Corianas/1.3b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 50257}, "Rachneet/gpt2-xl-alpaca": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_inner": null, "n_layer": 48, "vocab_size": 50257}, "Aeala/VicUnlocked-alpaca-30b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/VicUnlocked-30B-LoRA-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "Monero/WizardLM-Uncensored-SuperCOT-StoryTelling-30b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "bavest/fin-llama-33b-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "openchat/openchat_v2_w": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "FabbriSimo01/Bloom_1b_Quantized": {"architectures": ["BloomForCausalLM"], "hidden_size": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "Aspik101/tulu-7b-instruct-pl-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "TheTravellingEngineer/llama2-7b-chat-hf-v3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "yeontaek/llama-2-70b-IA3-guanaco": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Kunhao/pile-7b-250b-tokens": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 49152}, "yeontaek/llama-2-13b-QLoRA": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "yeontaek/llama-2-13b-Beluga-QLoRA": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "yeontaek/Platypus2xOpenOrca-13B-IA3-v3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "ausboss/llama7b-wizardlm-unfiltered": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/h2ogpt-oasst1-512-30B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "bofenghuang/vigogne-7b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "NYTK/PULI-GPTrio": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 150016}, "LLMs/WizardLM-30B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "openaccess-ai-collective/minotaur-13b-fixed": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheTravellingEngineer/bloom-1b1-RLHF-v2": {"architectures": ["BloomForCausalLM"], "hidden_size": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "yeontaek/Platypus2xOpenOrca-13B-IA3-v4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "BreadAi/DiscordPy": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1088, "n_head": 17, "n_inner": 4352, "n_layer": 14, "vocab_size": 50257}, "TehVenom/oasst-sft-6-llama-33b-xor-MERGED-16bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32016}, "gaodrew/gaodrew-llama-30b-instruct-2048-Open-Platypus-100steps": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "yeontaek/Platypus2xOpenOrca-13B-IA3-v2.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "yeontaek/Platypus2xOpenOrca-13B-LoRa-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "dvruette/oasst-pythia-12b-6000-steps": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50288}, "LoupGarou/WizardCoder-Guanaco-15B-V1.0": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49153}, "KnutJaegersberg/gpt-2-xl-EvolInstruct": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_inner": null, "n_layer": 48, "vocab_size": 50257}, "Lajonbot/WizardLM-13B-V1.2-PL-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "yeontaek/Platypus2-13B-IA3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "zarakiquemparte/zaraxe-l2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "BreadAi/gpt-YA-1-1_70M": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "dvruette/oasst-pythia-12b-reference": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50288}, "Panchovix/WizardLM-33B-V1.0-Uncensored-SuperHOT-8k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "titan087/OpenLlama13B-Guanaco": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "w601sxs/b1ade-1b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 50304}, "Andron00e/YetAnother_Open-Llama-3B-LoRA": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "quantumaikr/QuantumLM": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-65b-gpt4-2.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Aspik101/llama-30b-2048-instruct-PL-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "yeontaek/Platypus2-13B-LoRa": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "zarakiquemparte/zarafusionix-l2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "huggingtweets/gladosystem": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "eachadea/legacy-vicuna-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "Aeala/GPT4-x-AlpacaDente2-30b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32016}, "shibing624/chinese-llama-plus-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 49953}, "euclaise/gpt-neox-122m-minipile-digits": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 48262}, "TheBloke/UltraLM-13B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "lvkaokao/llama2-7b-hf-instruction-lora": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "BreadAi/StoryPy": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "dvruette/oasst-pythia-12b-flash-attn-5000-steps": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50288}, "aisquared/dlite-v1-124m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "ewof/koishi-instruct-3b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "h2oai/h2ogpt-gm-oasst1-en-1024-open-llama-7b-preview-400bt": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/airoboros-7b-gpt4-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "jondurbin/airoboros-13b-gpt4-1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/tulu-13B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "yhyhy3/med-orca-instruct-33b": {"architectures": ["LlamaModel"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "heegyu/LIMA-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "abhishek/llama2guanacotest": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "heegyu/LIMA2-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Corianas/Quokka_256m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1088, "n_head": 17, "n_inner": 4352, "n_layer": 14, "vocab_size": 50260}, "golaxy/gogpt-560m": {"architectures": ["BloomForCausalLM"], "hidden_size": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "OptimalScale/robin-7b-v2-delta": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "bofenghuang/vigogne-13b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "klosax/pythia-160m-deduped-step92k-193bt": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50304}, "golaxy/gogpt2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 68420}, "YeungNLP/firefly-llama2-13b-v1.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "WhoTookMyAmogusNickname/NewHope_HF_not_official": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "ehartford/CodeLlama-34b-Python-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "KnutJaegersberg/megatron-GPT-2-345m-EvolInstruct": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": 4096, "n_layer": 24, "vocab_size": 50257}, "Aeala/Alpaca-elina-65b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Monero/WizardLM-30B-Uncensored-Guanaco-SuperCOT-30b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "csitfun/llama-7b-logicot": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "OptimalScale/robin-65b-v2-delta": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "LLMs/WizardLM-13B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "CobraMamba/mamba-gpt-3b-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "aisquared/dlite-v1-1_5b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_inner": null, "n_layer": 48, "vocab_size": 50257}, "nthngdy/pythia-owt2-70m-100k": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "LLMs/AlpacaGPT4-7B-elina": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Yhyu13/oasst-rlhf-2-llama-30b-7k-steps-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32006}, "jondurbin/airoboros-7b-gpt4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "grantprice/Cerebras-GPT-590M-finetuned-DND": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 12, "n_inner": 6144, "n_layer": 18, "vocab_size": 50257}, "TheBloke/robin-13B-v2-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/robin-65b-v2-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "FPHam/Free_Sydney_13b_HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "acrastt/RedPajama-INCITE-Chat-Instruct-3B-V1": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "jondurbin/airoboros-65b-gpt4-m2.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "heegyu/LIMA2-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "BreadAi/MuseCan": {"architectures": ["GPT2LMHeadModel"], "n_embd": 960, "n_head": 15, "n_inner": 9, "n_layer": 5, "vocab_size": 50304}, "ausboss/llama-13b-supercot": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "openaccess-ai-collective/manticore-30b-chat-pyg-alpha": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "OptimalScale/robin-13b-v2-delta": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "quantumaikr/llama-2-7b-hf-guanaco-1k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Fredithefish/RedPajama-INCITE-Chat-3B-ShareGPT-11K": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "CalderaAI/13B-BlueMethod": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "SaylorTwift/gpt2_test": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50257}, "WeOpenML/PandaLM-Alpaca-7B-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "WeOpenML/Alpaca-7B-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "sumo43/lora_moe_7b_baseline": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "wenge-research/yayi-13b-llama2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32005}, "golaxy/gowizardlm": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 68420}, "abhiramtirumala/DialoGPT-sarcastic-medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Corianas/Quokka_2.7b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2560, "n_head": 32, "n_inner": 10240, "n_layer": 32, "vocab_size": 50260}, "Corianas/256_5epoch": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1088, "n_head": 17, "n_inner": 4352, "n_layer": 14, "vocab_size": 50257}, "dvruette/llama-13b-pretrained": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "TheBloke/alpaca-lora-65B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "ashercn97/giraffe-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Aspik101/Vicuzard-30B-Uncensored-instruct-PL-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "TheBloke/dromedary-65b-lora-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Yhyu13/chimera-inst-chat-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "ehartford/based-30b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "concedo/Vicuzard-30B-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "64bits/LexPodLM-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "MayaPH/GodziLLa-30B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "Aspik101/vicuna-7b-v1.3-instruct-pl-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "julianweng/Llama-2-7b-chat-orcah": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "heegyu/RedTulu-Uncensored-3B-0719": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "Aspik101/Llama-2-7b-hf-instruct-pl-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "quantumaikr/QuantumLM-70B-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "BreadAi/gpt-YA-1-1_160M": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50304}, "dvruette/oasst-pythia-12b-pretrained-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50288}, "Aeala/GPT4-x-AlpacaDente-30b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32016}, "TehVenom/Pygmalion_AlpacaLora-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "LLMs/Stable-Vicuna-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "quantumaikr/open_llama_7b_hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Aeala/GPT4-x-Alpasta-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "Fredithefish/CrimsonPajama": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "openaccess-ai-collective/hippogriff-30b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "camel-ai/CAMEL-13B-Role-Playing-Data": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/landmark-attention-llama7b-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32002}, "TheBloke/robin-33B-v2-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/GPlatty-30B-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/Chinese-Alpaca-33B-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 49954}, "TheBloke/CAMEL-33B-Combined-Data-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "klosax/open_llama_13b_600bt_preview": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Aspik101/Nous-Hermes-13b-pl-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "jondurbin/airoboros-l2-7b-gpt4-1.4.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "YeungNLP/firefly-llama-30b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "ashercn97/manatee-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "lizhuang144/starcoder_mirror": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49152}, "Aspik101/vicuna-13b-v1.5-PL-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Aspik101/Redmond-Puffin-13B-instruct-PL-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "Aspik101/StableBeluga-13B-instruct-PL-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "RoversX/llama-2-7b-hf-small-shards-Samantha-V1-SFT": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Corianas/Quokka_1.3b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 50260}, "nthngdy/pythia-owt2-70m-50k": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "danielhanchen/open_llama_3b_600bt_preview": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/VicUnlocked-alpaca-65B-QLoRA-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "IDEA-CCNL/Ziya-LLaMA-13B-Pretrain-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 39424}, "kevinpro/Vicuna-13B-CoT": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "wahaha1987/llama_7b_sharegpt94k_fastchat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "openaccess-ai-collective/minotaur-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/tulu-7B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "golaxy/gogpt-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 68420}, "Aeala/Enterredaas-33b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "kingbri/chronolima-airo-grad-l2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheTravellingEngineer/bloom-560m-RLHF": {"architectures": ["BloomForCausalLM"], "hidden_size": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "HWERI/Llama2-7b-sharegpt4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "l3utterfly/llama2-7b-layla": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "yeontaek/llama-2-13b-Guanaco-QLoRA": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "duliadotio/dulia-13b-8k-alpha": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "yeontaek/llama-2-13B-ensemble-v3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "dvruette/oasst-gpt-neox-20b-3000-steps": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 64, "num_hidden_layers": 44, "vocab_size": 50288}, "dvruette/oasst-gpt-neox-20b-1000-steps": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 64, "num_hidden_layers": 44, "vocab_size": 50288}, "huggingtweets/jerma985": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Dampish/Dante-2.8B": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "TheBloke/Planner-7B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "klosax/pythia-70m-deduped-step44k-92bt": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "klosax/open_llama_7b_400bt_preview": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "golaxy/gogpt2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 68420}, "Lajonbot/Llama-2-7b-chat-hf-instruct-pl-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheTravellingEngineer/llama2-7b-chat-hf-guanaco": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Lajonbot/vicuna-7b-v1.5-PL-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "kingbri/airolima-chronos-grad-l2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "yeontaek/llama-2-70B-ensemble-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "dvruette/oasst-llama-13b-2-epochs": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "dvruette/llama-13b-pretrained-sft-epoch-1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "dvruette/llama-13b-pretrained-dropout": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "hakurei/instruct-12b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50277}, "dvruette/gpt-neox-20b-full-precision": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 64, "num_hidden_layers": 44, "vocab_size": 50288}, "Monero/WizardLM-13b-OpenAssistant-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "Yhyu13/llama-30B-hf-openassitant": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "jondurbin/airoboros-65b-gpt4-1.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "camel-ai/CAMEL-33B-Combined-Data": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "MBZUAI/bactrian-x-llama-13b-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "dsvv-cair/alpaca-cleaned-llama-30b-bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "YeungNLP/firefly-llama-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "YeungNLP/firefly-llama-13b-v1.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "heegyu/WizardVicuna2-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "dvruette/oasst-llama-13b-1000-steps": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "dvruette/llama-13b-pretrained-sft-do2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "pillowtalks-ai/delta13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "illuin/test-custom-llama": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "MrNJK/gpt2-xl-sft": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_inner": null, "n_layer": 48, "vocab_size": 50257}, "PocketDoc/Dans-PileOfSets-Mk1-llama-13b-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-65b-gpt4-1.3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "bhenrym14/airoboros-33b-gpt4-1.4.1-PI-8192-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "frank098/WizardLM_13B_juniper": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "golaxy/goims": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 68420}, "dvruette/oasst-pythia-6.9b-4000-steps": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50288}, "mncai/chatdoctor": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "wannaphong/openthaigpt-0.1.0-beta-full-model_for_open_llm_leaderboard": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "golaxy/gogpt-3b-bloom": {"architectures": ["BloomForCausalLM"], "hidden_size": 2560, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "golaxy/gogpt-7b-bloom": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "jondurbin/airoboros-33b-gpt4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "jondurbin/airoboros-13b-gpt4-1.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-7b-gpt4-1.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "jondurbin/airoboros-13b-gpt4-1.3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-7b-gpt4-1.4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "jondurbin/airoboros-13b-gpt4-1.4-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-7b-gpt4-1.4.1-qlora": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "frank098/orca_mini_3b_juniper": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "Lajonbot/vicuna-13b-v1.3-PL-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jxhong/CAlign-alpaca-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "quantumaikr/KoreanLM-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "keyfan/vicuna-chinese-replication-v1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 49953}, "jondurbin/airoboros-7b-gpt4-1.3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "jerryjalapeno/nart-100k-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "xzuyn/Alpacino-SuperCOT-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "wahaha1987/llama_13b_sharegpt94k_fastchat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "quantumaikr/QuantumLM-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Undi95/ReMM-SLERP-L2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "huggingtweets/bladeecity-jerma985": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "pszemraj/pythia-6.9b-HC3": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "CalderaAI/30B-Epsilon": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TFLai/OpenOrca-Platypus2-13B-QLoRA-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "alpindale/pygmalion-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "jondurbin/airoboros-c34b-2.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "abacaj/starcoderbase-1b-sft": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 49153}, "bongchoi/test-llama2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TinyPixel/lima-test": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "quantumaikr/llama-2-70B-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "yeontaek/llama-2-13B-ensemble-v4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "cointegrated/rut5-base-absum": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 30000}, "pankajmathur/model_420_preview": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Fredithefish/Guanaco-3B-Uncensored-v2": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "yeontaek/llama-2-70B-ensemble-v4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Writer/palmyra-large": {"architectures": ["GPT2LMHeadModel"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 44, "vocab_size": 50257}, "RobbeD/OpenLlama-Platypus-3B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TFLai/OrcaMini-Platypus2-13B-QLoRA-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "NobodyExistsOnTheInternet/PuffedConvo13bLoraE4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Sao10K/Medusa-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Manticore-13B-Chat-Pyg-Guanaco-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TFLai/Nous-Hermes-Platypus2-13B-QLoRA-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "dhmeltzer/llama-7b-SFT_eli5_wiki65k_1024_r_64_alpha_16_merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TFLai/MythoMix-Platypus2-13B-QLoRA-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "chargoddard/llama-2-34b-uncode": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "zarakiquemparte/zaraxls-l2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TFLai/Stable-Platypus2-13B-QLoRA-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Danielbrdz/Barcenas-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "alan-turing-institute/mt5-large-finetuned-mnli-xtreme-xnli": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 250112}, "TFLai/Limarp-Platypus2-13B-QLoRA-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TFLai/PuddleJumper-Platypus2-13B-QLoRA-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "The-Face-Of-Goonery/Huginn-13b-v4.5": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-l2-7b-2.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "allenai/unifiedqa-v2-t5-large-1363200": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TheBloke/OpenAssistant-Llama2-13B-Orca-8K-3319-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "synapsoft/Llama-2-7b-hf-flan2022-1.2M": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "yeontaek/Platypus2-13B-LoRa-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "KES/T5-KES": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "robowaifudev/megatron-gpt2-345m": {"vocab_size": 50257, "n_embd": 1024, "n_layer": 24, "n_head": 16, "n_inner": 4096, "architectures": ["GPT2LMHeadModel"]}, "Sao10K/Mythical-Destroyer-L2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "CHIH-HUNG/llama-2-13b-dolphin_20w": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "The-Face-Of-Goonery/Huginn-13b-V4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "haining/scientific_abstract_simplification": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "ChanonUtupon/openthaigpt-merge-lora-llama-2-7B-3470k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 56554}, "chaoyi-wu/PMC_LLAMA_7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "CHIH-HUNG/llama-2-13b-OpenOrca_5w": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "clibrain/lince-zero": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "TheBloke/Project-Baize-v2-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "uukuguy/speechless-codellama-platypus-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "CHIH-HUNG/llama-2-13b-dolphin_5w": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "yeontaek/airoboros-2.1-llama-2-13B-QLoRa": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "uukuguy/speechless-llama2-luban-orca-platypus-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Norquinal/llama-2-7b-claude-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TFLai/Luban-Platypus2-13B-QLora-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "conceptofmind/Open-LLongMA-3b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "Norquinal/llama-2-7b-claude-chat-rp": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "lgaalves/llama-2-7b-hf_open-platypus": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ziqingyang/chinese-llama-2-7b-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 55296}, "yeontaek/llama-2-13B-ensemble-v6": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "yeontaek/llama-2-70B-ensemble-v7": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "ubikpt/t5-small-finetuned-cnn": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "rajkumarrrk/t5-base-fine-tuned-on-cnn-dm": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "google/t5-efficient-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "TFLai/Airboros2.1-Platypus2-13B-QLora-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "dhmeltzer/llama-7b-SFT_ds_eli5_1024_r_64_alpha_16_merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "h2oai/h2ogpt-4096-llama2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "dhmeltzer/llama-7b-SFT_ds_wiki65k_1024_r_64_alpha_16_merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TFLai/Ensemble5-Platypus2-13B-QLora-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "TFLai/Athena-Platypus2-13B-QLora-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "4bit/Llama-2-7b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TFLai/MythicalDestroyerV2-Platypus2-13B-QLora-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TFLai/OpenOrcaPlatypus2-Platypus2-13B-QLora-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "Salesforce/codegen25-7b-mono": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 51200}, "Sao10K/Stheno-L2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "yeontaek/WizardCoder-Python-13B-LoRa": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "uukuguy/speechless-orca-platypus-coig-lite-2k-0.6e-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "casperhansen/vicuna-7b-v1.5-awq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "luffycodes/nash-vicuna-33b-v1dot3-ep2-w-rag-w-simple": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "CHIH-HUNG/llama-2-13b-OpenOrca_20w": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "google/t5-efficient-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "TheBloke/orca_mini_v2_7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "tianyil1/denas-llama2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Sao10K/Stheno-Inverted-L2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "junelee/ko_vicuna_7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Seungyoun/codellama-7b-instruct-pad": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32025}, "TheBloke/Kimiko-v2-13B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "uukuguy/speechless-orca-platypus-coig-lite-4k-0.5e-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "uukuguy/speechless-orca-platypus-coig-lite-4k-0.6e-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "Undi95/UndiMix-v1-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "yeontaek/llama-2-70B-ensemble-v6": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TheBloke/vicuna-13B-v1.5-16K-GGML": {}, "KnutJaegersberg/black_goo_recipe_a": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "PKU-Alignment/beaver-7b-v1.0-reward": {"architectures": ["LlamaModelForScore"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "KnutJaegersberg/black_goo_recipe_b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "lgaalves/gpt2_open-platypus": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "cointegrated/rut5-base-multitask": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 30000}, "h2oai/h2ogpt-gm-oasst1-multilang-2048-falcon-7b": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "Cheng98/llama-160m": {"architectures": ["LlamaForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 32000}, "Andron00e/YetAnother_Open-Llama-3B-LoRA-OpenOrca": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "lgaalves/gpt2_guanaco-dolly-platypus": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "gagan3012/k2t-base": {"architectures": ["T5WithLMHeadModel"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "marcchew/Platypus-2-7B-LaMini-14K": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "lgaalves/gpt2_platypus-dolly-guanaco": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "czearing/article-title-generator": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "luffycodes/mcq-vicuna-13b-v1.5": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Universal-NER/UniNER-7B-definition": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Meli/GPT2-Prompt": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50259}, "s-nlp/ruT5-base-detox": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "cointegrated/rut5-base-paraphraser": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 30000}, "DevaMalla/llama7b_alpaca_bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "Universal-NER/UniNER-7B-type": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/starchat-beta-GPTQ": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49156}, "bigscience/sgpt-bloom-7b1-msmarco": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 30, "num_attention_heads": 32, "vocab_size": 250682}, "4bit/Llama-2-13b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "uukuguy/speechless-llama2-hermes-orca-platypus-wizardlm-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ClueAI/PromptCLUE-base-v1-5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "budecosystem/genz-13b-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/LlongOrca-13B-16K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32004}, "ozcangundes/mt5-multitask-qa-qg-turkish": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250102}, "EleutherAI/pythia-410m-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 1024, "intermediate_size": 4096, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "CHIH-HUNG/llama-2-13b-FINETUNE2_3w": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "sonoisa/t5-base-japanese-v1.1": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "bolbolzaban/gpt2-persian": {"n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 25000, "architectures": ["GPT2LMHeadModel"]}, "google/t5-large-ssm": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "DeepPavlov/rudialogpt3_medium_based_on_gpt2_v2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "Mikivis/xuanxuan": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "uukuguy/speechless-llama2-hermes-orca-platypus-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "KnutJaegersberg/black_goo_recipe_c": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "beaugogh/Llama2-7b-sharegpt4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Salesforce/codet5p-770m-py": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "codefuse-ai/CodeFuse-CodeLlama-34B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "AUTOMATIC/promptgen-majinai-safe": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 6, "vocab_size": 50257}, "reciprocate/shepherd-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Devio/test-22B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "acrastt/Bean-3B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/L2-MythoMax22b-Instruct-Falseblock-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "vihangd/smartplat-3b-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "jinaai/jina-embedding-b-en-v1": {"architectures": ["T5EncoderModel"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "yahma/llama-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "uukuguy/speechless-codellama-orca-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "CHIH-HUNG/llama-2-13b-FINETUNE1_17w": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "VMware/open-llama-13b-open-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ToolBench/ToolLLaMA-2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "luffycodes/mcq-hal-vicuna-13b-v1.5": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "TheBloke/BigTranslate-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 53613}, "PeanutJar/LLaMa-2-PeanutButter_v18_A-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "openbmb/UltraLM-65b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Mikivis/gpt2-large-lora-sft": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50257}, "Devio/test-3b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 16, "vocab_size": 32000}, "akhooli/gpt2-small-arabic": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50257}, "Rardilit/Panther_v1": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ValiantLabs/ShiningValiant": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Devio/test100": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "Devio/testC": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Chronoboros-33B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/Pygmalion-13B-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "vihangd/smartplat-3b-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "laituan245/t5-v1_1-small-smiles2caption-ft-from-pretrained-c4": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "4bit/Llama-2-7b-Chat-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "CHIH-HUNG/llama-2-13b-FINETUNE2_3w-gate_up_down_proj": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "CHIH-HUNG/llama-2-13b-FINETUNE2_3w-q_k_v_o_proj": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/vicuna-33B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "Devio/test-1400": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/gpt4-alpaca-lora-30B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "notstoic/pygmalion-13b-4bit-128g": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "NousResearch/Yarn-Llama-2-7b-128k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Den4ikAI/FRED-T5-LARGE_text_qa": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 50364}, "valhalla/t5-base-qa-qg-hl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32102}, "Undi95/ReMM-L2-13B-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Zarablend-L2-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "KnutJaegersberg/black_goo_recipe_d": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "ckip-joint/bloom-1b1-zh": {"architectures": ["BloomModel"], "hidden_size": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "seonglae/llama-2-13b-chat-hf-gptq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Trelis/Llama-2-7b-chat-hf-sharded-bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "KnutJaegersberg/LLongMA-3b-LIMA": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "h2oai/h2ogpt-gm-oasst1-en-xgen-7b-8k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 51200}, "PeanutJar/LLaMa-2-PeanutButter_v18_B-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ehartford/WizardLM-1.0-Uncensored-CodeLlama-34b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "uukuguy/speechless-codellama-orca-platypus-13b-0.10e": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "DeepESP/gpt2-spanish": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "paust/pko-flan-t5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 50358}, "ThomasNLG/t5-qa_squad2neg-en": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "PharMolix/BioMedGPT-LM-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "eenzeenee/t5-base-korean-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50358}, "porkorbeef/Llama-2-13b-public": {"architectures": ["LlamaModel"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/WizardLM-Uncensored-Falcon-7B-GPTQ": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65025}, "dahara1/weblab-10b-instruction-sft-GPTQ": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4864, "intermediate_size": 19456, "num_attention_heads": 38, "num_hidden_layers": 36, "vocab_size": 50277}, "CHIH-HUNG/llama-2-13b-FINETUNE2_TEST_2.2w": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "mrm8488/t5-small-finetuned-emotion": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "gurgutan/saiga2-13b-4bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "IlyaGusev/rut5_base_sum_gazeta": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 30000}, "TheBloke/Llama-2-13B-German-Assistant-v4-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 37632}, "gaodrew/OpenOrca-Platypus2-13B-thera-1250": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "minlik/chinese-llama-7b-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 49953}, "TheBloke/Stable-Platypus2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Luna-AI-Llama2-Uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "allenai/t5-small-squad2-question-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "microsoft/bloom-deepspeed-inference-fp16": {"architectures": ["BloomModel"], "n_layer": 70, "num_attention_heads": 112, "vocab_size": 250880}, "csebuetnlp/banglat5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "potsawee/t5-large-generation-race-QuestionAnswer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "grammarly/coedit-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32100}, "Narrativaai/bloom-560m-finetuned-totto-table-to-text": {"architectures": ["BloomForCausalLM"], "hidden_size": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "jjaaaww/posi_13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "IDEA-CCNL/Randeng-T5-784M-MultiTask-Chinese": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32596}, "Undi95/Nous-Hermes-13B-Code": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "paust/pko-t5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 50358}, "learnanything/llama-7b-huggingface": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "weiren119/Taiwan-LLaMa-v1.0-4bits-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ml6team/keyphrase-generation-t5-small-inspec": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "TheBloke/CodeLlama-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "Undi95/MLewd-L2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "tscholak/cxmefzzi": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32102}, "Gaivoronsky/ruGPT-3.5-13B-8bit": {"architectures": ["GPT2LMHeadModel"], "n_embd": 5120, "n_head": 40, "n_inner": null, "n_layer": 40, "vocab_size": 50272}, "SatoruDano/llama-2-7b-finetuned_v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ClueAI/PromptCLUE-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "uukuguy/speechless-codellama-orca-airoboros-13b-0.10e": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "anonymous-german-nlp/german-gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 52000}, "fxmarty/gpt2-tiny-onnx": {"intermediate_size": 37, "n_embd": 32, "n_head": 4, "n_inner": null, "n_layer": 5, "vocab_size": 1000}, "prakharz/DIAL-FLANT5-XL": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32100}, "h2oai/h2ogpt-oasst1-falcon-40b": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "synapsoft/Llama-2-7b-chat-hf-flan2022-1.2M": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Undi95/ReMM-L2-13B-PIPPA": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "CHIH-HUNG/llama-2-13b-FINETUNE1_17w-gate_up_down_proj": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Fredithefish/Guanaco-7B-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "conceptofmind/Yarn-Llama-2-13b-128k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Undi95/LewdEngine": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/CodeLlama-7B-Instruct-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "CHIH-HUNG/llama-2-13b-Open_Platypus_and_ccp_2.6w": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "CHIH-HUNG/llama-2-13b-FINETUNE1_17w-q_k_v_o_proj": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-33b-2.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "Salesforce/codet5p-220m-py": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "Danielbrdz/CodeBarcenas-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "SJ-Ray/Re-Punctuate": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "EasthShin/Youth_Chatbot_Kogpt2-base": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 51200}, "ThomasNLG/t5-qg_squad1-en": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "EleutherAI/pythia-160m-deduped-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50304}, "MBZUAI/LaMini-T5-223M": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "HooshvareLab/gpt2-fa": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 42001}, "TFLai/Nova-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "conceptofmind/LLongMA-2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TDC2023/trojan-base-pythia-1.4b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "phpaiola/ptt5-base-summ-xlsum": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "TFLai/SpeechlessV1-Nova-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/stablecode-instruct-alpha-3b-GPTQ": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 49152}, "stanford-crfm/music-small-800k": {"vocab_size": 55028, "n_embd": 768, "n_layer": 12, "n_head": 12, "n_inner": null, "architectures": null}, "TFLai/EnsembleV5-Nova-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "declare-lab/flan-alpaca-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "jpwahle/t5-large-word-sense-disambiguation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "lizhuang144/flan-t5-large-factual-sg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "DKYoon/mt5-base-lm-adapt": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "TheBloke/guanaco-65B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Salesforce/codegen25-7b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 51200}, "bigscience-data/sgpt-bloom-1b7-nli": {"architectures": ["BloomModel"], "n_inner": null, "n_layer": 24, "num_attention_heads": 16, "vocab_size": 250880}, "TurkuNLP/gpt3-finnish-small": {"architectures": ["BloomModel"], "hidden_size": 768, "n_head": 12, "n_layer": 12, "vocab_size": 131072}, "jordiclive/flan-t5-3b-summarizer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "marblyso/DialoGPT-small-what-the-fuck": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "retrieva-jp/t5-small-short": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "codeparrot/codeparrot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_inner": null, "n_layer": 48, "vocab_size": 32768}, "openthaigpt/openthaigpt-1.0.0-beta-7b-chat-ckpt-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 56554}, "Rocketknight1/falcon-rw-1b": {"architectures": ["FalconForCausalLM"], "hidden_size": 2048, "num_attention_heads": 32, "num_hidden_layers": 24, "vocab_size": 50304}, "TaylorAI/Flash-Llama-30M-20001": {"architectures": ["LlamaForCausalLM"], "hidden_size": 384, "intermediate_size": 1024, "num_attention_heads": 12, "num_hidden_layers": 4, "vocab_size": 32000}, "castorini/t5-base-canard": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "r3dhummingbird/DialoGPT-medium-joshua": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "IDEA-CCNL/Wenzhong2.0-GPT2-110M-BertTokenizer-chinese": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 12, "vocab_size": 21133}, "TigerResearch/tigerbot-13b-chat-8bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 60928}, "pranavpsv/gpt2-genre-story-generator": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50266}, "Photolens/llama-2-7b-langchain-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ck46/t5-base-hotpot-qa-qg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32102}, "castorini/monot5-small-msmarco-10k": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "yujiepan/llama-2-tiny-random": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8, "intermediate_size": 32, "num_attention_heads": 2, "num_hidden_layers": 1, "vocab_size": 32000}, "castorini/doc2query-t5-base-msmarco": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "oliverguhr/spelling-correction-multilingual-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "allenai/unifiedqa-t5-11b": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "TheBloke/CodeLlama-34B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "snorkelai/sdnet": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "SiberiaSoft/SiberianFRED-T5-XL": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1536, "num_heads": 24, "num_layers": 24, "vocab_size": 50365}, "sultan/ArabicT5-Base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 20, "vocab_size": 32000}, "nikaashpuri/gpt-expt-sp-v3-K-600-MA-Mac-actions-kmeans-v16": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 1913}, "TheBloke/Yarn-Llama-2-13B-128K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "allenai/cosmo-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "flax-community/gpt2-bengali": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "EleutherAI/pythia-410m-deduped-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 1024, "intermediate_size": 4096, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "Writer/palmyra-small": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 12, "vocab_size": 50257}, "LukasStankevicius/t5-base-lithuanian-news-summaries-175": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "laituan245/molt5-large-caption2smiles": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "google/ul2": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 16384, "d_model": 4096, "num_heads": 16, "num_layers": 32, "vocab_size": 32128}, "Suva/uptag-keyphrase-model": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "TheBloke/orca_mini_7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TusharJoshi89/title-generator": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "juierror/flan-t5-text2sql-with-schema": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "hf-tiny-model-private/tiny-random-T5ForConditionalGeneration": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 32100}, "stacked-summaries/flan-t5-large-stacked-samsum-1024": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TheBloke/WizardLM-33B-V1-0-Uncensored-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "persiannlp/mt5-base-parsinlu-opus-translation_fa_en": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "gurgutan/ruGPT-13B-4bit": {"architectures": ["GPT2LMHeadModel"], "n_embd": 5120, "n_head": 40, "n_inner": null, "n_layer": 40, "vocab_size": 50272}, "TheBloke/upstage-llama-30b-instruct-2048-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "sdadas/polish-gpt2-medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": 4096, "n_layer": 24, "vocab_size": 51200}, "aubmindlab/aragpt2-medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 64000}, "SEBIS/code_trans_t5_large_source_code_summarization_python_multitask_finetune": {"architectures": ["T5Model"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "maximxls/text-normalization-ru-terrible": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 256, "num_heads": 4, "num_layers": 3, "vocab_size": 5120}, "TheBloke/llama-2-13B-Guanaco-QLoRA-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ziqingyang/chinese-alpaca-2-13b-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 55296}, "KETI-AIR/ke-t5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 64128}, "ibm/qcpg-sentences": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32158}, "tiiuae/falcon-rw-7b": {"architectures": ["FalconForCausalLM"], "hidden_size": 4096, "num_attention_heads": 64, "num_hidden_layers": 36, "vocab_size": 65024}, "timdettmers/guanaco-13b-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "h2oai/h2ogpt-oig-oasst1-falcon-40b": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "coffeeee/nsfw-story-generator": {"architectures": ["GPT2Model"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "zpn/llama-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "crumb/bloom-560m-RLHF-SD2-prompter-aesthetic": {"architectures": ["BloomForCausalLM"], "hidden_size": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "kalpeshk2011/dipper-paraphraser-xxl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "TheBloke/WizardLM-13B-V1.0-Uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/WizardLM-13B-V1-1-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "allenai/unifiedqa-t5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "oliverguhr/spelling-correction-german-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "ThomasSimonini/t5-end2end-question-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "asi/gpt-fr-cased-base": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1792, "n_head": 14, "n_inner": null, "n_layer": 24, "vocab_size": 50000}, "lora-x/backpack-gpt2": {"architectures": ["BackpackGPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50264}, "TheBloke/Vigogne-2-13B-Instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ai-forever/ruT5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "ml6team/keyphrase-generation-t5-small-openkp": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "mrm8488/t5-base-finetuned-e2m-intent": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "nikaashpuri/gpt-expt-sp-v3-K-600-MA-Mac-actions-kmeans-v14": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 1902}, "TheBloke/Marx-3b-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/Dolphin-Llama2-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "oscorrea/scores-falcon40b-sm-merged": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "lmqg/t5-small-squad-qag": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32101}, "ehartford/WizardLM-Uncensored-Falcon-40b": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65025}, "persiannlp/mt5-base-parsinlu-sentiment-analysis": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "VietAI/vit5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 36100}, "thanathorn/mt5-cpe-kmutt-thai-sentence-sum": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "Blackroot/Hermes-Kimiko-13B-f16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "CarperAI/stable-vicuna-13b-delta": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "j5ng/kullm-12.8b-GPTQ-8bit": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 30080}, "TheBloke/ReMM-SLERP-L2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Weni/WeniGPT-L-70": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "valhalla/t5-small-qg-hl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32102}, "retrieva-jp/t5-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "TheBloke/Wizard-Vicuna-30B-Superhot-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "openllmplayground/openalpaca_3b_600bt_preview": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "ArmelR/starcoder-gradio-v0": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49152}, "chanind/frame-semantic-transformer-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "akreal/tiny-random-gpt2": {"intermediate_size": 37, "n_embd": 32, "n_head": 4, "n_inner": null, "n_layer": 5, "vocab_size": 99}, "Neko-Institute-of-Science/LLaMA-7B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Writer/palmyra-med-20b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 44, "vocab_size": 50259}, "SiberiaSoft/SiberianPersonaFred": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1536, "num_heads": 24, "num_layers": 24, "vocab_size": 50364}, "mrm8488/spanish-gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "unicamp-dl/translation-en-pt-t5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "OFA-Sys/gsm8k-rft-llama7b-u13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "liuhaotian/LLaVA-13b-delta-v0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32003}, "huggingface/falcon-40b-gptq": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "Ravi07bec/llama-qlora-65b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "PKU-Alignment/alpaca-7b-reproduced": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "Unbabel/gec-t5_small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "TheBloke/Speechless-Llama2-Hermes-Orca-Platypus-WizardLM-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "MIIB-NLP/Arabic-question-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 110080}, "google/t5-large-ssm-nq": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "stanford-crfm/arwen-gpt2-medium-x21": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "sentence-transformers/gtr-t5-xxl": {"architectures": ["T5EncoderModel"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "TheBloke/Nous-Hermes-Llama2-70B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32001}, "paust/pko-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50358}, "allenai/tk-instruct-11b-def": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "amphora/FinABSA": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32105}, "TurkuNLP/gpt3-finnish-13B": {"architectures": ["BloomModel"], "hidden_size": 5120, "n_head": 40, "n_layer": 40, "vocab_size": 131072}, "PAIXAI/Astrid-LLama-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Aalaa/opt-125m-wikitext2": {"architectures": ["OPTForCausalLM"], "hidden_size": 768, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50272}, "hf-internal-testing/tiny-random-GPTNeoXForQuestionAnswering": {"architectures": ["GPTNeoXForQuestionAnswering"], "hidden_size": 32, "intermediate_size": 37, "num_attention_heads": 4, "num_hidden_layers": 5, "vocab_size": 1024}, "cssupport/t5-small-awesome-text-to-sql": {"vocab_size": 32128, "d_model": 512, "d_ff": 2048, "num_layers": 6, "num_heads": 8, "architectures": ["T5ForConditionalGeneration"]}, "TheBloke/MythoMix-L2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "conceptofmind/Hermes-LLongMA-2-13b-8k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "lysandre/arxiv-nlp": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50257}, "Pcik/DialoGPT-medium-Kirby": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "PY007/SLM_1-4B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 2048, "intermediate_size": 5632, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50432}, "ceshine/t5-paraphrase-paws-msrp-opinosis": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "microsoft/bloom-deepspeed-inference-int8": {"architectures": ["BloomModel"], "n_layer": 70, "num_attention_heads": 112, "vocab_size": 250880}, "TheBloke/PuddleJumper-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "gorilla-llm/gorilla-falcon-7b-hf-v0": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "TheBloke/starcoder-GPTQ": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49152}, "lmsys/longchat-7b-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "DAMO-NLP-MT/polylm-1.7b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 256000}, "Salesforce/xgen-7b-4k-base": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 51200}, "DAMO-NLP-MT/polylm-13b": {"architectures": ["PolyLMHeadModel"], "n_embd": 5120, "n_head": 40, "n_inner": 20480, "n_layer": 40, "vocab_size": 256000}, "dbddv01/gpt2-french-small": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "EleutherAI/pythia-70m-deduped-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "algolet/mt5-base-chinese-qg": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250101}, "hf-internal-testing/tiny-random-BloomForQuestionAnswering": {"architectures": ["BloomForQuestionAnswering"], "hidden_size": 32, "n_head": 4, "n_layer": 5, "vocab_size": 1024}, "hf-internal-testing/tiny-random-BloomForTokenClassification": {"architectures": ["BloomForTokenClassification"], "hidden_size": 32, "n_head": 4, "n_layer": 5, "vocab_size": 1024}, "flax-community/t5-base-cnn-dm": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "hf-internal-testing/tiny-random-BloomForSequenceClassification": {"architectures": ["BloomForSequenceClassification"], "hidden_size": 32, "n_head": 4, "n_layer": 5, "vocab_size": 1024}, "tau/t5-v1_1-large-rss": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TheBloke/airoboros-l2-13b-gpt4-m2.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "hf-internal-testing/tiny-random-GPTNeoXForSequenceClassification": {"architectures": ["GPTNeoXForSequenceClassification"], "hidden_size": 32, "intermediate_size": 37, "num_attention_heads": 4, "num_hidden_layers": 5, "vocab_size": 1024}, "allegro/plt5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 50048}, "TheBloke/stable-vicuna-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "hf-internal-testing/tiny-random-GPTNeoXForTokenClassification": {"architectures": ["GPTNeoXForTokenClassification"], "hidden_size": 32, "intermediate_size": 37, "num_attention_heads": 4, "num_hidden_layers": 5, "vocab_size": 1024}, "TheBloke/WizardLM-7B-V1-0-Uncensored-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "HuggingFaceH4/tiny-random-LlamaForSequenceClassification": {"architectures": ["LlamaForSequenceClassification"], "hidden_size": 16, "intermediate_size": 64, "num_attention_heads": 4, "num_hidden_layers": 2, "vocab_size": 32000}, "hf-internal-testing/tiny-random-GPTNeoXModel": {"architectures": ["GPTNeoXModel"], "hidden_size": 32, "intermediate_size": 37, "num_attention_heads": 4, "num_hidden_layers": 5, "vocab_size": 1024}, "IlyaGusev/rut5_base_headline_gen_telegram": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 30000}, "lgaalves/gpt2_camel_physics-platypus": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "lightonai/alfred-40b-0723": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "KETI-AIR/ke-t5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 64128}, "ibm/regen-disambiguation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "vihangd/smartplat-3b-v3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/OpenBuddy-Llama2-13B-v11.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 37632}, "BlinksFly/Harry_Potter-Ai": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "conceptofmind/Yarn-Llama-2-7b-128k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "liujch1998/vera": {"architectures": ["T5EncoderModel"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "kaist-ai/CoT-T5-11B": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "lintang/t5-v1_1-base-flan": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "sentence-transformers/sentence-t5-xxl": {"architectures": ["T5EncoderModel"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "TheBloke/vicuna-7B-v1.5-16K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "retrieva-jp/t5-large-long": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "retrieva-jp/t5-base-long": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "upstage/SOLAR-0-70b-8bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "jerteh/gpt2-vrabac": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 49152}, "Parth/boolean": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "hf-internal-testing/tiny-random-GPTBigCodeForSequenceClassification": {"architectures": ["GPTBigCodeForSequenceClassification"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "hf-internal-testing/tiny-random-GPTBigCodeForTokenClassification": {"architectures": ["GPTBigCodeForTokenClassification"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "megagonlabs/t5-base-japanese-web": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32000}, "MisguidedKerbal/DialoGPT-kerbalV3": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "praeclarum/cuneiform": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "uw-hai/polyjuice": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "reciprocate/tiny-llama": {"architectures": ["LlamaForCausalLM"], "hidden_size": 64, "intermediate_size": 64, "num_attention_heads": 1, "num_hidden_layers": 1, "vocab_size": 32000}, "luqh/ClinicalT5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "stanford-crfm/celebrimbor-gpt2-medium-x81": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/CodeLlama-13B-Python-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "laituan245/molt5-large-smiles2caption": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TurkuNLP/gpt3-finnish-8B": {"architectures": ["BloomModel"], "hidden_size": 4096, "n_head": 32, "n_layer": 32, "vocab_size": 131072}, "NeuML/t5-small-txtsql": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "malteos/bloom-6b4-clp-german": {"hidden_size": 4096, "n_head": 32, "n_layer": 30, "vocab_size": 50304}, "GT4SD/multitask-text-and-chemistry-t5-base-augm": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "allenai/open-instruct-stanford-alpaca-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "CarperAI/randomwalks": {"architectures": ["GPT2LMHeadModel"], "n_embd": 144, "n_head": 6, "n_inner": null, "n_layer": 6, "vocab_size": 23}, "unicamp-dl/mt5-13b-mmarco-100k": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 250112}, "lmqg/t5-small-squad-qg-ae": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32101}, "naltukhov/joke-generator-rus-t5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "hf-internal-testing/tiny-random-UMT5Model": {"architectures": ["UMT5Model"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 256300}, "rentcarsAI/falcon-7b-codegenerator-qlora-merged": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "panggi/t5-base-indonesian-summarization-cased": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "hf-internal-testing/tiny-random-UMT5ForQuestionAnswering": {"architectures": ["UMT5ForQuestionAnswering"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 256300}, "UBC-NLP/AraT5-base": {"d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 110080}, "kmewhort/stable-diffusion-prompt-bolster": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 52000}, "TheBloke/Llama-2-13B-GGML": {}, "gaussalgo/T5-LM-Large-text2sql-spider": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "DAMO-NLP-MT/polylm-multialpaca-13b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 5120, "n_head": 40, "n_inner": 20480, "n_layer": 40, "vocab_size": 256000}, "hf-internal-testing/tiny-random-UMT5ForSequenceClassification": {"architectures": ["UMT5ForSequenceClassification"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 256300}, "tinkoff-ai/ruDialoGPT-small": {"n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50261}, "indonesian-nlp/gpt2-medium-indonesian": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "Salesforce/mixqg-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "EleutherAI/pythia-1b-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 50304}, "NinedayWang/PolyCoder-2.7B": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50304}, "zanchat/falcon-1b": {"architectures": ["RWForCausalLM"], "hidden_size": 2048, "n_head": 32, "n_layer": 24, "vocab_size": 50304}, "Goodnoway/DialoGPT-nerbalV2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "crumb/llama2-7b-shard-bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "sagawa/ReactionT5-retrosynthesis": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 268}, "DKYoon/mt5-large-lm-adapt": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 250112}, "lintang/t5-v1_1-xl-flan": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "castorini/monot5-large-msmarco-10k": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "Ichsan2895/Merak-7B-v3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "stanford-crfm/caprica-gpt2-small-x81": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "vicgalle/gpt2-open-instruct-v1": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50260}, "philschmid/llama-2-7b-instruction-generator": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "aubmindlab/aragpt2-large": {"architectures": ["GPT2LMHeadModel"], "intermediate_size": 5120, "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 64000}, "NonzeroCornet34/DialoGPT-small-philbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Trelis/Llama-2-7b-chat-hf-sharded-bf16-5GB": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "deep-learning-analytics/wikihow-t5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "JDBN/t5-base-fr-qg-fquad": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32102}, "stanford-crfm/durin-gpt2-medium-x343": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "abjbpi/Dwight_Schrute": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "Spico/Humback-Myx": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "T-Systems-onsite/mt5-small-sum-de-en-v2": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250100}, "kaiyuy/leandojo-lean3-tacgen-byt5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3584, "d_model": 1472, "num_heads": 6, "num_layers": 12, "vocab_size": 384}, "pinkmanlove/llama-33b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "lintang/t5-v1_1-large-flan": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "Naseej/noon-7b": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "chizhikchi/sci-five-radsum23": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "impyadav/GPT2-FineTuned-Hinglish-Song-Generation": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "elinas/llama-13b-hf-transformers-4.29": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/GodziLLa2-70B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TheBloke/Llama-2-70B-OASST-1-200-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32016}, "jacobmorrison/tk-instruct-base-lora-experiments": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-3b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "ingen51/DialoGPT-medium-GPT4": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "cointegrated/rut5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 30000}, "PocketDoc/Dans-CreepingSenseOfDoom": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "tsmatz/mt5_summarize_japanese": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "domenicrosati/QA2D-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "gorkemgoknar/gpt2chatbotenglish": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50262}, "DeliveryBoy/DiabloGPT-medium-Kurisu": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "philschmid/instruct-igel-001": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_layer": 30, "vocab_size": 50304}, "xDAN2099/xDAN_13B_Zh_Base": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 60928}, "codeparrot/codeparrot-small": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 32768}, "paust/pko-t5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 50358}, "flozi00/Llama-2-13b-german-assistant-v4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 37632}, "doc2query/msmarco-t5-base-v1": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "microsoft/DialogRPT-depth": {"architectures": ["GPT2ForSequenceClassification"], "n_embd": 1024, "n_head": 16, "n_layer": 24, "vocab_size": 50257}, "nomic-ai/gpt4all-13b-snoozy": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "NousResearch/Yarn-Llama-2-13b-64k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "mrm8488/t5-base-e2e-question-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "postbot/gpt2-medium-emailgen": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "vanilladucky/Friends_chatting_bot_redefined": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/LlongOrca-7B-16K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32003}, "mutamuta/DialoGPT-spongebob-small": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Ar4ikov/gpt2-medium-650k-stable-diffusion-prompt-generator": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/HermesLimaRP-L2-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "clibrain/Llama-2-7b-ft-instruct-es-gptq-4bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Yarn-Llama-2-7B-128K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "lmqg/mt5-small-jaquad-qg-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "allenai/tk-instruct-base-def-pos": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "davidkim205/komt-Llama-2-7b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "tangy0/llama-2-7b-dtlpy_v0.4chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TigerResearch/tigerbot-70b-base": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 60928}, "hadifar/eventextraction": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "TintinMeimei/NousResearch-Llama-2-7b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/airoboros-l2-13b-gpt4-2.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Nekochu/Llama-2-13B-fp16-french": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "minhtoan/t5-translation-vietnamese-nom": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 300, "num_heads": 8, "num_layers": 6, "vocab_size": 30100}, "BELLE-2/BELLE-Llama2-13B-chat-0.4M": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "bigscience/T0": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "andreaskoepf/pythia-1.4b-gpt4all-pretrain": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50288}, "Salesforce/codet5-base-codexglue-clone": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "Chae/scottbot_med": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/LLaMA-7b-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "sagard21/python-code-explainer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "stanfordnlp/SteamSHP-flan-t5-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "MarinHinawa/DialoGPT-medium-Ene": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "aiautomationlab/german-news-title-gen-mt5": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "TheBloke/vicuna-13B-1.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Chronos-Hermes-13B-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "microsoft/DialogRPT-human-vs-machine": {"architectures": ["GPT2ForSequenceClassification"], "n_embd": 1024, "n_head": 16, "n_layer": 24, "vocab_size": 50257}, "uer/gpt2-distil-chinese-cluecorpussmall": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 6, "vocab_size": 21128}, "h2oai/h2ogpt-gm-oasst1-en-2048-falcon-40b-v1": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "davidkim205/komt-Llama-2-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ibm/qcpg-questions": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32155}, "gavin124/gpt2-finetuned-cnn-summarization-v2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50260}, "hogru/MolReactGen-GuacaMol-Molecules": {"architectures": ["GPT2LMHeadModel"], "n_embd": 144, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 93}, "stanford-crfm/darkmatter-gpt2-small-x343": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "conceptofmind/Yarn-Llama-2-7b-64k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Radicalkiddo/DialoGPT-small-Radical": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Ninja5000/DialoGPT-medium-HarryPotter": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "theblackcat102/alpaca-title-generator-mt0-large": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 250112}, "transfaeries/Twilight-Sparkle-GPT": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/Vigogne-2-7B-Instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "markofhope/DialoGPT-medium-HarringtonBot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "seeksery/DialoGPT-calig3": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "beomi/kcgpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 55000}, "vilm/vietcuna-3b": {"architectures": ["BloomForCausalLM"], "hidden_size": 2560, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "IDEA-CCNL/Randeng-T5-784M": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32596}, "vwxyzjn/starcoderbase-triviaqa": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49152}, "IDEA-CCNL/Wenzhong2.0-GPT2-3.5B-chinese": {"architectures": ["GPT2LMHeadModel"], "n_embd": 3072, "n_head": 32, "n_inner": 12288, "n_layer": 30, "vocab_size": 50304}, "TheBloke/Llama-2-7b-Chat-GGUF": {}, "MingZhong/unieval-dialog": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "OpenAssistant/falcon-40b-megacode2-oasst": {"architectures": ["FalconForCausalLM"], "hidden_size": 8192, "num_attention_heads": 128, "num_hidden_layers": 60, "vocab_size": 65152}, "axiong/PMC_LLaMA_13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "codeparrot/codeparrot-small-multi": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 12, "vocab_size": 32768}, "EleutherAI/pythia-6.9b-deduped-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "Riiid/sheep-duck-llama-2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "alibaba-pai/pai-bloom-1b1-text2prompt-sd": {"architectures": ["BloomForCausalLM"], "hidden_size": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "TheBloke/Chronos-Beluga-v2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "malmarjeh/t5-arabic-text-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 110080}, "GarfExit/DialogGPT-medium-707": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "audreycl/DialoGPT-RPF": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "florentiino/DialoGPT-small-harrypotter": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "JazzyLucas/DialoGPT-small-TonyStark": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "marblyso/DialoGPT-medium-marina": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "polandball/GPT-Polen": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "GarrisonBot/DialoGPT-medium-herbertgarrison": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "XuYipei/kw-cutegpt-13b-ift": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 49954}, "TheBloke/Pygmalion-7B-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "timothykim04/DialoGPT-medium-harrypotter": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "allegro/plt5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50048}, "lengoctuong/gpt2-finetuned-wikitext2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "PyaeSoneK/Fine_Tuned_Pythia_smallest_140_legal": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 1024, "intermediate_size": 4096, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "psyche/KoT5-paraphrase-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "microsoft/DialogRPT-width": {"architectures": ["GPT2ForSequenceClassification"], "n_embd": 1024, "n_head": 16, "n_layer": 24, "vocab_size": 50257}, "Dahoas/pythia-1B-static-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "jerteh/gpt2-orao": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 49152}, "TheBloke/LosslessMegaCoder-Llama2-13B-Mini-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32007}, "Ngao/DialoGPT-small-ngao": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "4i-ai/Llama-2-7b-alpaca-es": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "asifhugs/open_llama_7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "RajuKandasamy/tamillama_tiny_30m": {"architectures": ["LlamaForCausalLM"], "hidden_size": 256, "intermediate_size": 786, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 32000}, "stabilityai/StableBeluga1-Delta": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Linly-AI/Chinese-LLaMA-2-7B-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 40076}, "flax-community/gpt2-base-thai": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "shalomma/llama-7b-embeddings": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/llama2-7b-chat-codeCherryPop-qLoRA-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "KhanAdeeb/model-tony-stark": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "spy24/autonlp-UK-to-US-600416931": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "DKYoon/mt5-small-lm-adapt": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "TheBloke/Llama-2-70B-GGML": {}, "TheBloke/model_007-70B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "abhi-8/DialoGPT-medium-Joshua-twevy": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "camenduru/MiniGPT4-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "paripi/Malishka": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "SiberiaSoft/SiberianPersonaFred_large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 50364}, "Alred/t5-small-finetuned-summarization-cnn": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "Leomas/DialoGPT-medium-Leomas": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TehVenom/Pygmalion-7b-Merged-Safetensors": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "marblyso/DialoGPT-medium-pearl": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "lmqg/mt5-small-dequad-qg-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "TheBloke/WizardLM-Uncensored-Falcon-40B-GPTQ": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65025}, "NlpHUST/t5-small-vi-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "Elucia/Diluc_Bot_1.3": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "h2oai/h2ogpt-16k-codellama-34b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "microsoft/CodeGPT-small-java": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 52000}, "Starry/COUNTNARC": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "OpenMEDLab/PULSE-7bv5": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "marblyso/DialoGPT-medium-aubrey": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "Ashypaws/DialoGPT-medium-Ashybot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "YTTD/DialoGPT-medium-sou": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "marblyso/DialoGPT-medium-hero": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "Trelis/Llama-2-7b-chat-hf-function-calling-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "NousResearch/CodeLlama-7b-hf-flash": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "TheBloke/CodeLlama-34B-Python-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "musabgultekin/functionary-7b-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "amasand/gpt2-imdb-pos-ppo": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "bigscience/bloomz-7b1-p3": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 30, "num_attention_heads": 32, "vocab_size": 250880}, "rirv938/wizard-vicuna-13b-uncensored-awq-4bit-g128": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "marblyso/DialoGPT-medium-marblesbagel": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "vilm/vietcuna-7b-v3": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "stas/t5-very-small-random": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 256, "d_model": 64, "num_heads": 4, "num_layers": 8, "vocab_size": 32128}, "KeLiu/Title-Gen": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "vampiregirl/DialoGPT-medium-lennoxram": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "sharpbai/Llama-2-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "sam2ai/openllama_odia_3b_base": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "lmqg/mt5-small-esquad-qg-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "stanfordnlp/SteamSHP-flan-t5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "allenai/tulu-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "JNDankwah/DialoGPT-small-ThorCB": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "lmqg/mt5-small-ruquad-qg": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "lmqg/mt5-small-ruquad-qg-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "Dinocroth/DialoGPT-medium-Trevor-PhilipsV2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "Speedemon/jake-peralta-ai": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "chanind/frame-semantic-transformer-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "stanford-crfm/music-medium-800k": {"vocab_size": 55028, "n_embd": 1024, "n_layer": 24, "n_head": 16, "n_inner": null, "architectures": null}, "h2oai/h2ogpt-16k-codellama-7b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "TheBloke/Pygmalion-2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "huggingface-course/codeparrot-ds": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50000}, "KakoSi/AcciGPT-smol": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "lmqg/mt5-small-itquad-qg-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "EggsInAJar/DialoGPT-small-MerrickBot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "razent/SciFive-large-Pubmed": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "armandnlp/gpt2-TOD_finetuned_SGD": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50271}, "RuterNorway/Llama-2-13b-chat-norwegian": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "AIDC-ai-business/Marcoroni-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "deep-learning-analytics/GrammarCorrector": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "redrussianarmy/gpt2-turkish-cased": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "lmqg/mt5-small-frquad-qg-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "psyche/KoT5-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "loitran/DialoGPT-medium-peppapig": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "openchat/openchat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "saikatc/NatGen": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "Coderhuynin/DialoGPT-large-TonyStark": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "declare-lab/flan-sharegpt-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "TheBloke/Chronos-Hermes-13B-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "usvsnsp/pythia-6.9b-rm-full-hh-rlhf": {"architectures": ["GPTNeoXForSequenceClassification"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50277}, "yujiepan/llama-2-tiny-3layers-random": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8, "intermediate_size": 32, "num_attention_heads": 2, "num_hidden_layers": 3, "vocab_size": 32000}, "allenai/unifiedqa-v2-t5-3b-1363200": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "gsarti/it5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32103}, "simple2312/DialoGPT-Ellie": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "kashif/llama-7b_stack-exchange_RM_peft-adapter-merged": {"architectures": ["LlamaForSequenceClassification"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "larryvrh/mt5-translation-ja_zh": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 250112}, "j5ng/et5-typos-corrector": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 45100}, "vilsonrodrigues/falcon-7b-sharded": {"architectures": ["FalconForCausalLM"], "hidden_size": 4544, "num_attention_heads": 71, "num_hidden_layers": 32, "vocab_size": 65024}, "felinecity/ScaraBot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "persiannlp/mt5-base-parsinlu-translation_en_fa": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "Jonesy/HomersNightOut": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "conceptofmind/LLongMA-2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/LoKuS-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "shibing624/mengzi-t5-base-chinese-correction": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Lamia/DialoGPT-small-Sundrop": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Blizzchor/DialoGPT-medium-gamora": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "jlsalty9999/DialoGPT-medium-Riddle": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "uer/gpt2-chinese-lyric": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 21128}, "LMFlow/Full-Robin-7b-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "llm-book/t5-base-long-livedoor-news-corpus": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "nuggster/DialoGPT-small-ianbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Tristan/gpt2_reward_summarization": {"architectures": ["GPT2ForSequenceClassification"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "MysteriousAmazon/DialoGPT-medium-freddy": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "wdidfau/Pygmalion-13b-Landmark-Attention-Merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "kaiyuy/leandojo-lean3-retriever-byt5-small": {"architectures": ["T5EncoderModel"], "d_ff": 3584, "d_model": 1472, "num_heads": 6, "num_layers": 12, "vocab_size": 384}, "kz919/ntk_scaled_open_llama_3b_32k": {"architectures": ["NTKScaledLlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "abhi-8/DialoGPT-medium-Rick": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "polymath707/llama-2-13b-miniguanaco": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Langboat/bloom-389m-zh": {"architectures": ["BloomForCausalLM"], "hidden_size": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 42437}, "Techcs002/DialoGPT-medium-AboTalkTest": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "AIDC-ai-business/Marcoroni-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "ybelkada/t5-3b-sharded": {"architectures": ["T5WithLMHeadModel"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "benjamin/gerpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "abhi-8/DialoGPT-medium-Michael": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "cahya/gpt2-small-indonesian-522M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "marianna13/flan-t5-base-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Lakoc/fisher_dec_6_layers": {"architectures": ["GPT2Model"], "n_embd": 512, "n_head": 4, "n_inner": null, "n_layer": 6, "vocab_size": 5000}, "simple2312/DialoGPT-nayeon": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "sjrhuschlee/flan-t5-base-squad2": {"architectures": ["T5ForQuestionAnswering"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "eqhylxx/full-vicuna-160m": {"architectures": ["LlamaForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 32000}, "Ashypaws/DialoGPT-medium-Kitaibot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/Wizard-Vicuna-7B-Uncensored-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "NHStudios/DialoGPT-small-jake": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "ZipperXYZ/DialoGPT-medium-TheWorldMachineExpressive2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "IIC/mt5-spanish-mlsum": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "mattymchen/gense-base-plus": {"architectures": ["T5Model"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "DAMO-NLP/SeqGPT-560M": {"architectures": ["BloomForCausalLM"], "hidden_size": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "AMHR/T5-for-Adversarial-Paraphrasing": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Blizzchor/DialoGPT-medium-HarryBotter": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "brianveebee/DialoGPT-medium-bender": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "YTTD/DialoGPT-medium-keiji": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "anon8231489123/gpt4-x-alpaca-13b-native-4bit-128g": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "Pcik/DialoGPT-medium-Dante": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "mHossain/bangla-para-v3-500000": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "TheBloke/Llama-2-7B-GGUF": {}, "diwas7777/HarryBot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "seduerr/t5-small-pytorch": {"architectures": ["T5WithLMHeadModel"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "felinecity/DioloGPT-small-KaeyaBot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "lmsys/vicuna-7b-delta-v0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "inu-ai/dolly-japanese-gpt-1b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 44928}, "TheBloke/Vicuna-33B-1-3-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "Dahoas/pythia-125M-static-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50304}, "Blizzchor/DialoGPT-medium-QuillLord": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "beomi/KoAlpaca-llama-1-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "patrickNLP/Graphix-3B": {"architectures": ["Model"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "Starry/HELLORUKAS": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "keans/DialoGPT-small-highjacker": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "DoesNoPro/DialoGPT-small-RaidenG": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "ThatSkyFox/DialoGPT-medium-whatsapp": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "EnterNameBros/Senko-san-medium-scl": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/CodeLlama-7B-Python-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "mrm8488/t5-small-finetuned-quora-for-paraphrasing": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "NonzeroCornet34/DialoGPT-small-hansolo": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "d0rj/rut5-base-summ": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "CAIRE-CedarsSinai/falcon-7b-qlora-chat-support-bot-faq-alzkb-version-2": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "el-profesor/code_t5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "Soumyajit1008/DialoGPT-small-harryPotterssen": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "malteos/bloom-1b5-clp-german": {"architectures": ["BloomForCausalLM"], "hidden_size": 2048, "n_head": 16, "n_layer": 24, "vocab_size": 50304}, "yesuns/DialoGPT-small-yesun": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Stevo/DiagloGPT-medium-spamton": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Vision-CAIR/vicuna-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "TheBloke/airoboros-33B-gpt4-1-4-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "tanishqvashisht/DialoGPT-small-Joshua": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TurkuNLP/gpt3-finnish-3B": {"architectures": ["BloomModel"], "hidden_size": 2560, "n_head": 32, "n_layer": 32, "vocab_size": 131072}, "lizhuang144/flan-t5-base-VG-factual-sg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "TheBloke/Athena-v1-GGUF": {}, "xxyyy123/test-28b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "pastlecry/DialoGPT-small-harrypotter": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "DiscordRequestsAPI/NurDeeps-Bot-2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "channashi/DialoGPT-small-rocket": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "ritog/bangla-gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TheBloke/Redmond-Puffin-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "Shakerlicious/DialoGPT-small-raquelbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "lmqg/mt5-base-jaquad-qag": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250101}, "anon8231489123/vicuna-13b-GPTQ-4bit-128g": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "jacobmorrison/tk-instruct-small-lora-experiments": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32100}, "TheBloke/open-llama-13b-open-instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "cedpsam/chatbot_fr": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "Photolens/llama-2-13b-langchain-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "avinashshrangee/DialoGPT-small-Ricky": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "YeungNLP/firefly-llama2-7b-pretrain": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 55296}, "efederici/it5-efficient-small-fanpage": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 32, "vocab_size": 32100}, "saikiranmaddukuri/chat_to_sql0.17": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "TheBloke/Llama2-28B-Air03-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 7296, "intermediate_size": 22016, "num_attention_heads": 57, "num_hidden_layers": 40, "vocab_size": 32000}, "crodri/falcon_aguila_meteocat": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 50257}, "Narsil/starcoder-gptq": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49152}, "CobraMamba/mamba-gpt-3b-v4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "YeungNLP/firefly-llama2-13b-pretrain": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 55296}, "TheBloke/airoboros-l2-7b-gpt4-1.4.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "DecafNosebleed/DialoGPT-small-ScaraBot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "yazdipour/text-to-sparql-t5-small-qald9": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "ClassCat/gpt2-base-french": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50000}, "TheBloke/airoboros-33B-GPT4-m2.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "quantumaikr/KoreanLM-1.5b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 1024, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "toyfreak/DialoGPT-small-addy": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "spursyy/mT5_multilingual_XLSum_rust": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "lengoctuong/gpt2-finetuned-chatbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50261}, "KnutJaegersberg/megatron-gpt2-345m-evol_instruct_v2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": 4096, "n_layer": 24, "vocab_size": 50257}, "zkdtckk/falcon40-instruct-qlora-tta-v1": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "TheBloke/Nous-Hermes-13B-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "TheBloke/Nous-Hermes-Llama2-GGML": {}, "IkariDev/Athena-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/WizardLM-7B-V1.0-Uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/llama-2-13B-German-Assistant-v2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "cahya/gpt2-large-indonesian-522M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50257}, "VietAI/envit5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50048}, "kam1run/DialoGPT-large-kami": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50257}, "uukuguy/speechless-codellama-dolphin-orca-platypus-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "aluserhuggingface/DialoGPT-small-harrypotter": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TheBloke/gpt4-x-vicuna-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "Pcik/DialoGPT-medium-Ruby": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/LLaMA-30b-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "sdadas/polish-gpt2-small": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 12, "vocab_size": 51200}, "ahxt/llama2_xs_460M_experimental": {"architectures": ["LlamaForCausalLM"], "hidden_size": 1024, "intermediate_size": 4096, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "lemon234071/t5-base-Chinese": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 35364}, "4bit/pyg-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "squarelike/Gugugo-koen-1.3B-V1.0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 30080}, "lvwerra/t5-imdb": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "psymon/KoLlama2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Maxwere/DiabloGPT-medium-maxbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "nafisehNik/mt5-persian-summary": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "nams/nams-bot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "lmqg/mt5-small-esquad-qag": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "mattbit/gpt2wb": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "ghazikhanihamed/TooT-PLM-P2S": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 48, "vocab_size": 144}, "lonewanderer27/YoshinoriBot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "VinVanGogh/Llama-2-7b-Aixiety-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "GroNLP/gpt2-medium-italian-embeddings": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 30001}, "IDEA-CCNL/Randeng-T5-784M-QA-Chinese": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32601}, "kingbri/airo-llongma-2-13B-16k-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "lvwerra/starcoderbase-gsm8k": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49152}, "mofawzy/gpt2-arabic-sentence-generator": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50000}, "lmqg/mt5-small-itquad-qag": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "sharpbai/Llama-2-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "lomahony/eleuther-pythia70m-hh-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "Salesforce/codet5-large-ntp-py": {"architectures": ["T5WithLMHeadModel"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "TheBloke/Samantha-1.11-CodeLlama-34B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "Lenza/DialoGPT-medium-Kobayashi": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "davidviriato/DialoGPT-small-joshua": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Shakerlicious/DialoGPT-small-descentbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TurkuNLP/gpt3-finnish-xl": {"architectures": ["BloomModel"], "hidden_size": 2064, "n_head": 24, "n_layer": 24, "vocab_size": 131072}, "TheBloke/starcoderplus-GPTQ": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49152}, "TheBloke/Airoboros-L2-7B-2.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Mikivis/gpt2-large-lora-sft1": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50257}, "gagan3012/k2t": {"architectures": ["T5WithLMHeadModel"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "MerlynMind/merlyn-education-safety": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50688}, "explosion-testing/refined-web-model-test": {"architectures": ["RWForCausalLM"], "hidden_size": 32, "n_head": 4, "n_layer": 5, "vocab_size": 1024}, "explosion-testing/falcon-no-parallel-attn-test": {"architectures": ["RWForCausalLM"], "hidden_size": 32, "n_head": 4, "n_layer": 5, "vocab_size": 1024}, "Marxav/frpron": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 268}, "AmbricJohnson5888/claura": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TheBloke/CodeLlama-7B-Instruct-GGUF": {}, "felinecity/DioloGPT-small-LisaBot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "lmqg/mt5-small-frquad-qag": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "RobiKenobi/DialoGPT-medium-pete": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/Vicuna-13B-CoT-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/airoboros-33B-gpt4-1.4-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "SEBIS/code_trans_t5_base_code_documentation_generation_java_multitask": {"architectures": ["T5Model"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "retrieva-jp/t5-base-medium": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "elinas/chronos-13b-4bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "abhinavkulkarni/meta-llama-Llama-2-7b-chat-hf-w4-g128-awq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Luban-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "uer/t5-base-chinese-cluecorpussmall": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 21228}, "ClueAI/ChatYuan-large-v1": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "helenai/gpt2-ov": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "fireballoon/baichuan-vicuna-chinese-7b-gptq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 64000}, "Trelis/Llama-2-7b-chat-hf-hosted-inference-8bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Starry/KARENTRIES": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "umm-maybe/SportsFanGhost": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/airoboros-13B-gpt4-1.4-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TabbyML/StarCoder-1B": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 49152}, "TFLai/Nova-13B-50-step": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "Mikivis/gpt2-large-lora-sft2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50257}, "CHIH-HUNG/llama-2-13b-Open_Platypus_and_ccp_2.6w-3_epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "BlackSamorez/falcon-40b-tiny-testing": {"architectures": ["RWForCausalLM"], "hidden_size": 256, "n_head": 4, "n_layer": 2, "vocab_size": 65024}, "Rocketknight1/tiny-random-falcon-40b": {"architectures": ["FalconForCausalLM"], "hidden_size": 1024, "num_attention_heads": 128, "num_hidden_layers": 2, "vocab_size": 65024}, "TheBloke/WizardLM-1.0-Uncensored-Llama2-13B-GGML": {}, "TheBloke/Zarafusionex-1.1-L2-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "lmqg/t5-large-squad-qg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32101}, "casperhansen/falcon-7b-awq": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "Azure99/blossom-v2-llama2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "DeepESP/gpt2-spanish-medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "StudentLLM/Alpagasus-2-13b-QLoRA-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Weni/WeniGPT": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "niicovila/llama-v2-tst-law": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Undi95/CreativityEngine": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "DB13067/Peterbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "EleutherAI/pythia-12b-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50688}, "allenai/tulu-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "TheBloke/airoboros-l2-13b-gpt4-m2.0-GGML": {}, "TheBloke/Griffin-3B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "imthanhlv/vigpt2medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "m3hrdadfi/gpt2-persian-qa": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50000}, "TheBloke/MythoMax-L2-Kimiko-v2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "CHIH-HUNG/llama-2-13b-FINETUNE1_17w-r16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ppn/DialoGPT-small-harrypotter": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "lmqg/mt5-base-ruquad-qag": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250101}, "TheBloke/Firefly-Llama2-13B-v1.2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "persiannlp/mt5-large-parsinlu-opus-translation_fa_en": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 250112}, "simple2312/DialoGPT-Twice": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "declare-lab/flan-alpaca-xxl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "ChanceFocus/finma-7b-nlp": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "osunlp/attrscore-flan-t5-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "likenneth/honest_llama2_chat_7B": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Hugherinit/hi": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32119}, "vaibhav9/GPT2-qa": {"architectures": ["GPT2ModelForQuestionAnswering"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "st3rl4nce/t5-small-finetuned-pubmed": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "uonlp/okapi-ro-llama": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ThomasNLG/t5-weighter_cnndm-en": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "google/t5-11b-ssm-tqa": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "lizhuang144/flan-t5-small-VG-factual-sg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "hyunjae/skt-kogpt2-kullm-v2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 51200}, "Mirage-Studio/llama-gaan-2-7b-chat-hf-dutch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/LosslessMegaCoder-Llama2-7B-Mini-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32007}, "lmqg/t5-small-squad-qg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32101}, "castorini/doc2query-t5-large-msmarco": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TheBloke/manticore-13b-chat-pyg-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "22h/open-cabrita3b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 52000}, "alzoubi36/priva_t5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "TheBloke/vicuna-7B-v0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "TheBloke/falcon-7b-instruct-GGML": {}, "Rozi05/QuoteVibes_Model_Trained": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Tidum/DialoGPT-large-Michael": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "valhalla/t5-small-qg-prepend": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32102}, "lmqg/t5-large-squad-qag": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32101}, "abhiramtirumala/DialoGPT-sarcastic": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "mindrage/Manticore-13B-Chat-Pyg-Guanaco-GGML": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Salesforce/dialogstudio-t5-base-v1.0": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32103}, "allenai/unifiedqa-v2-t5-base-1363200": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "kleinay/qanom-seq2seq-model-joint": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32101}, "puugz/DialoGPT-small-spiderman": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "UrukHan/t5-russian-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "HuggingFaceH4/tiny-random-LlamaForSeqClass": {"architectures": ["LlamaForSequenceClassification"], "hidden_size": 16, "intermediate_size": 64, "num_attention_heads": 4, "num_hidden_layers": 2, "vocab_size": 32000}, "JosephusCheung/Qwen-LLaMAfied-7B-Chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 151936}, "Abzu/orca-mini-v3-70b-gptq-q4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "wnic00/t5-small-finetune-bilingual-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "ChukSamuels/DialoGPT-small-Dr.FauciBot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "macavaney/doc2query-t5-base-msmarco": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "nlp-waseda/comet-t5-base-japanese": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32000}, "stjiris/t5-portuguese-legal-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Icaruas/V2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "imxly/t5-pegasus": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50000}, "stefan-it/german-gpt2-larger": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50265}, "noahkim/KoT5_news_summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50358}, "hoskinson-center/proofGPT-v0.1": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "TheBloke/WizardMath-7B-V1.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "FieldSu/distil_student_24": {"architectures": ["RWForCausalLM"], "hidden_size": 1136, "n_head": 71, "n_layer": 8, "vocab_size": 65024}, "shyamsn97/Mario-GPT2-700-context-length": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 6, "vocab_size": 50257}, "dgnk007/eagle": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "sharpbai/Llama-2-7b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "jackyv/DialoGPT-small-pinocchio": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "felinecity/DioloGPT-small-KaeyaBot2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "toyfreak/DialoGPT-small-shy": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "chavinlo/alpaca-13b": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "TehVenom/Pygmalion-7b-4bit-GPTQ-Safetensors": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "georgesung/open_llama_7b_qlora_uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ostorc/rick-sanchez-chatbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "polymath707/llama-2-7b-miniguanaco": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "KBlueLeaf/guanaco-7b-leh-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Chronos-Hermes-13B-v2-GGML": {}, "approach0/mathy-vicuna-13B-FFT-phase2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "gorilla-llm/gorilla-7b-hf-delta-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "j5ng/kullm-5.8b-GPTQ-8bit": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 16, "num_hidden_layers": 28, "vocab_size": 30080}, "bitadin/checkpoint-230167": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "NekoPunchBBB/Llama2-13b-hf-Open-Platypus-QLoRA-att": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "mrm8488/t5-small-finetuned-wikiSQL": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "ozcangundes/T5-base-for-BioQA": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "AriakimTaiyo/gpt2-chat": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_layer": 36, "vocab_size": 50257}, "TheBloke/WizardLM-13B-V1.2-GGML": {}, "TheBloke/Trurl-2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "ccore/opt-125-smart-test": {"architectures": ["OPTForCausalLM"], "hidden_size": 768, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50272}, "James-WYang/BigTranslate": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 53613}, "Trelis/Llama-2-7b-chat-hf-function-calling": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Wikidepia/IndoT5-base-paraphrase": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "csebuetnlp/mT5_m2m_crossSum": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "seanmor5/tiny-llama-test": {"architectures": ["LlamaForCausalLM"], "hidden_size": 32, "intermediate_size": 64, "num_attention_heads": 2, "num_hidden_layers": 2, "vocab_size": 32000}, "explosion-testing/refined-web-model-new-decoder-test": {"architectures": ["RWModel"], "hidden_size": 256, "n_head": 4, "n_layer": 5, "vocab_size": 1024}, "jondurbin/airocoder-34b-2.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "lmqg/t5-base-squad-qg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "PORTULAN/gervasio-ptpt-base": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 50304}, "UWB-AIR/barticzech-1.0": {"architectures": ["MBartForConditionalGeneration"], "d_model": 1024, "num_hidden_layers": 12, "vocab_size": 50265}, "TokenBender/llama2-7b-chat-hf-codeCherryPop-qLoRA-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Voicelab/trurl-2-7b-8bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "TheBloke/Llama-2-13B-chat-GGUF": {}, "VietAI/vit5-base-vietnews-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 36096}, "lmqg/t5-small-squad-ae": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32101}, "retrieva-jp/t5-base-short": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "grammarly/coedit-xxl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32100}, "heack/HeackMT5-ZhSum100k": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "TheBloke/LLaMA-13b-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TFMC/ELYZA-japanese-Llama-2-7b-instruct-GPTQ-4bit-64g": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "mxmax/Chinese_Chat_T5_Base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "elinas/chronos-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "kajdun/iubaris-13b-v3_GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jmeadows17/MathT5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32104}, "TheBloke/Kimiko-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "nlp-waseda/gpt2-small-japanese": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 32000}, "rshrott/description-together-ai": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "noah-ai/mt5-base-question-generation-vi": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "AI4PD/ZymCTRL": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 458}, "bitadin/gpt-4-long-titles-v2-flan-t5-base-llm-12": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "shorthillsai/flan-t5-large-absa": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TheBloke/CodeLlama-13B-oasst-sft-v10-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "prithivida/active_to_passive_styletransfer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "lcw99/t5-large-korean-text-summary": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 50358}, "EleutherAI/pythia-1.4b-deduped-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "sdadas/polish-gpt2-large": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": 5120, "n_layer": 36, "vocab_size": 51200}, "uonlp/okapi-vi-bloom": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "cenkersisman/gpt2-turkish-900m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "IlyaGusev/rugpt_large_turbo_instructed": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50258}, "Waterhorse/chessgpt-base-v1": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "jondurbin/spicyboros-13b-2.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "echarlaix/t5-small-openvino": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "mrm8488/santacoder-finetuned-the-stack-bash-shell": {"architectures": ["GPT2LMHeadCustomModel"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 49280}, "ckip-joint/bloom-3b-zh": {"architectures": ["BloomForCausalLM"], "hidden_size": 2560, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "Dawnstarhunter/DialoGPT-medium-Eveline": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "lmqg/t5-base-squad-ae": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "mesolitica/finetune-translation-t5-small-standard-bahasa-cased-v2": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "liuhaotian/LLaVA-7b-delta-v0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32004}, "yzhuang/autotree_llama_small_snxor_l1_2_vit": {"architectures": ["LlamaForAutoTree"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 6, "vocab_size": 32000}, "mrm8488/t5-base-finetuned-wikiSQL-sql-to-en": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "aleksickx/llama-7b-hf": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "yongzx/pythia-70m-sft-hh": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "sonoisa/t5-base-english-japanese": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "BramVanroy/Llama-2-13b-chat-dutch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Bhuvana/t5-base-spellchecker": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "PlanTL-GOB-ES/gpt2-base-bne": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50261}, "lmqg/mt5-small-jaquad-qg": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "Mirage-Studio/llama-gaan-2-7b-chat-hf-dutch-epoch-5": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "microsoft/DialogRPT-human-vs-rand": {"architectures": ["GPT2ForSequenceClassification"], "n_embd": 1024, "n_head": 16, "n_layer": 24, "vocab_size": 50257}, "aubmindlab/aragpt2-mega": {"architectures": ["GPT2LMHeadModel"], "intermediate_size": 6144, "n_embd": 1536, "n_head": 24, "n_inner": null, "n_layer": 48, "vocab_size": 64000}, "liyuesen/druggpt": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 53083}, "conceptofmind/Hermes-LLongMA-2-7b-8k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/scarlett-33B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/EverythingLM-13b-V2-16K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "sartmis1/starcoder-v2-openapi-special-tokens": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49152}, "TheBloke/Phind-CodeLlama-34B-v1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "TheBloke/Yarn-Llama-2-7B-64K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Dolphin-Llama-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "kfkas/Legal-Llama-2-ko-7b-Chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 46336}, "Ichsan2895/Merak-7B-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "allenai/unifiedqa-v2-t5-base-1251000": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "sagawa/ReactionT5-product-prediction": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 268}, "lmqg/mt5-small-jaquad-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "Narrativa/mT5-base-finetuned-tydiQA-xqa": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "allenai/macaw-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "gagan3012/k2t-new": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "google/t5-efficient-tiny-nl2": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 256, "num_heads": 4, "num_layers": 2, "vocab_size": 32128}, "sam2ai/open_llama_3b_odia_gptq_128_4bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "lmqg/mt5-small-dequad-qg": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "mrm8488/mT5-small-finetuned-tydiqa-for-xqa": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "zjunlp/knowlm-13b-zhixi": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "h2oai/h2ogpt-16k-codellama-13b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "mymusise/gpt2-medium-chinese": {"architectures": ["TFGPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 8021}, "ai-forever/mGPT-13B": {"architectures": ["GPT2LMHeadModel"], "n_embd": 5120, "n_head": 40, "n_inner": null, "n_layer": 40, "vocab_size": 100000}, "TinaLiHF/fined-tuned-T5small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "TheBloke/airoboros-l2-7B-gpt4-2.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Mihakram/AraT5-base-question-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 110080}, "fjungstedt/t5-criteria-text-to-json": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "luqh/ClinicalT5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "h2oai/h2ogpt-16k-codellama-13b-python": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "masakhane/afri-mt5-base": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "YeungNLP/bloom-1b4-zh": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 24, "num_attention_heads": 16, "vocab_size": 46145}, "shekharchatterjee/temp-model-174": {}, "TheBloke/Kimiko-v2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jeffwan/vicuna-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "kz919/ntk_scaled_open_llama_13b_32k": {"architectures": ["NTKScaledLlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "lmqg/t5-base-squad-qg-ae": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "r3dhummingbird/DialoGPT-small-harrypotter": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "camenduru/MiniGPT4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "TheBloke/open-llama-7b-open-instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "MoinFaisal/Llama-2-7b-chat-finetune": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/CodeLlama-13B-Instruct-GGUF": {}, "fbellame/llama2-pdf-to-quizz-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "fractalego/fact-checking": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "michelecafagna26/gpt2-medium-finetuned-sst2-sentiment": {"architectures": ["GPT2ForSequenceClassification"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/WizardLM-Uncensored-SuperCOT-StoryTelling-30B-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "TheBloke/Airoboros-7B-GPT4-1-4-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Airoboros-L2-70B-GPT4-m2.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Arc53/docsgpt-7b-falcon": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "alenusch/mt5large-ruparaphraser": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 250112}, "ApoTro/slovak-t5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32100}, "microsoft/dolly-v2-7b-olive-optimized": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50280}, "huggingtweets/gordonramsay": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "prithivida/formal_to_informal_styletransfer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "model-attribution-challenge/gpt2-xl": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_layer": 48, "vocab_size": 50257}, "saiful9379/Bangla_GPT2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 33391}, "deepse/CodeUp-Llama-2-7b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ziqingyang/chinese-llama-2-13b-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 55296}, "ChandlerU11/t5_fine": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "TheBloke/Guanaco-3B-Uncensored-v2-GPTQ": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "mamiksik/T5-commit-message-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32104}, "conceptofmind/Yarn-Llama-2-13b-64k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "mesolitica/llama-13b-hf-16384-fpf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Sao10K/Stheno-1.2-L2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "gsarti/it5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32103}, "Den4ikAI/FRED-T5-XL-interpreter": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1536, "num_heads": 24, "num_layers": 24, "vocab_size": 50364}, "TheBloke/WizardCoder-Guanaco-15B-V1.1-GPTQ": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49153}, "seonglae/llama-2-7b-chat-hf-gptq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/llama2_7b_chat_uncensored-GGML": {}, "ecosumit/gpt-model": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "allegro/plt5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 50048}, "cointegrated/rut5-small": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 20100}, "it5/it5-large-question-answering": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32103}, "tscholak/1zha5ono": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32102}, "optible/unifiedqa-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "CleverShovel/falcon-7b-instruct-sharded-bf16": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "TheBloke/Pygmalion-13B-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "BlackSamorez/llama-2-tiny-testing": {"architectures": ["LlamaForCausalLM"], "hidden_size": 128, "intermediate_size": 11008, "num_attention_heads": 8, "num_hidden_layers": 2, "vocab_size": 2000}, "ianagra/Llama-2-7b-ALLM-virtual-sales-assistant": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "quantumaikr/KoreanLM-3B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 2048, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "quantumaikr/llama-2-70B-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Deniskin/gpt3_medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50264}, "ozcangundes/mt5-small-turkish-summarization": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "EleutherAI/pythia-1b-deduped-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 50304}, "flozi00/Llama-2-7b-german-assistant-v3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Mikivis/gpt2-large-lora-stf4": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50257}, "AK270802/DialoGPT-small-harrypotter": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "EleutherAI/pythia-12b-deduped-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50688}, "EricPeter/Llama-2-multilingual": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Pygmalion-2-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "miguelvictor/python-gpt2-large": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_layer": 36, "vocab_size": 50257}, "h2oai/h2ogpt-16k-codellama-7b-python": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "ammarinjtkrbh/llama-2-7b-food-search": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "GroNLP/gpt2-small-dutch": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 40000}, "pszemraj/opt-350m-email-generation": {"architectures": ["OPTForCausalLM"], "hidden_size": 1024, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50265}, "caffsean/t5-small-finetuned-keyword-to-text-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "lmqg/mt5-small-dequad-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "yuyijiong/T5-large-sentiment-analysis-Chinese-MultiTask": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32596}, "sonoisa/t5-qiita-title-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "YeungNLP/firefly-bloom-1b4": {"architectures": ["BloomForCausalLM"], "hidden_size": 2048, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 46145}, "samwit/koala-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Vicuna-13B-1-3-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Photolens/OpenOrcaxOpenChat-2-13b-langchain-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "Ichsan2895/Merak-7B-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "flozi00/Llama-2-7b-german-assistant-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ss1612/loki-chat": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "OpenBuddy/openbuddy-falcon-7b-v5-fp16": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 70144}, "wellecks/llmstep-mathlib4-pythia2.8b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50278}, "dariolopez/llama-2-7b-oasst1-es": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/WizardLM-1.0-Uncensored-CodeLlama-34B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "brad1141/gpt2-finetuned-comp2": {"architectures": ["GPT2ForTokenClassification"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TheBloke/chronos-hermes-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "lizhuang144/flan-t5-large-VG-factual-sg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "nivos/pythia-410m-deduped-finetuned-final-activity-text-10epoch": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 1024, "intermediate_size": 4096, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "HamidRezaAttar/gpt2-product-description-generator": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TheBloke/ORCA_LLaMA_70B_QLoRA-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "lmsys/vicuna-13b-delta-v0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "jacobmorrison/tk-instruct-xl-lora-experiments": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32100}, "GroNLP/gpt2-small-italian": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 30001}, "yihsuan/mt5_chinese_small": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250100}, "YTTD/DialoGPT-medium-souv2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "arubenruben/ptt5-portuguese-cnn-dailymail-azure-pt-pt": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "localmodels/Llama-2-7B-Chat-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "lgaalves/llama-2-13b-chat-platypus": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "it5/it5-large-question-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32103}, "psyche/KoT5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "TheBloke/Llama2-70B-OASST-SFT-v10-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32007}, "deepparag/Aeona": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "lmqg/mt5-small-koquad-qg": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "lmqg/mt5-small-esquad-qg": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "NinedayWang/PolyCoder-0.4B": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 1024, "intermediate_size": 4096, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "ConvLab/t5-small-nlu-multiwoz21": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "SIC98/GPT2-python-code-generator": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "lmqg/mt5-small-itquad-qg": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "kaiyuy/leandojo-lean4-tacgen-byt5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3584, "d_model": 1472, "num_heads": 6, "num_layers": 12, "vocab_size": 384}, "usvsnsp/pythia-6.9b-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "PlanTL-GOB-ES/gpt2-large-bne": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_layer": 36, "vocab_size": 50261}, "jordiclive/flan-t5-11b-summarizer-filtered": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "Jordine/scpoo": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "behnamsh/gpt2_camel_physics": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "lmqg/mt5-small-esquad-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "MerlynMind/merlyn-education-teacher-assistant": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50688}, "mesolitica/llama-7b-hf-16384-fpf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "MatthisHoules/rat-t5-qdmr-grounded-with-db": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "persiannlp/mt5-small-parsinlu-qqp-query-paraphrasing": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "lmqg/mt5-small-koquad-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "lmqg/mt5-small-itquad-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "openthaigpt/openthaigpt-gpt2-instructgpt-poc-0.0.4": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50268}, "ChanceFocus/finma-7b-full": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "vivekraina/Llama-2-7b-hf-8bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "dpml/vicuna_mt_450s": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "burberg92/resume_summary": {"architectures": ["BloomForCausalLM"], "hidden_size": 2048, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "Monero/Pygmalion-Metharme-7b-4bit-TopScore": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Icaruas/7bill8k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32002}, "dahara1/ELYZA-japanese-Llama-2-7b-fast-instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 45043}, "TheBloke/Yarn-Llama-2-13B-64K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "prithivida/passive_to_active_styletransfer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "lmqg/mt5-small-frquad-qg": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "JamesStratford/PLord-bot-DialoGPT-medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "yizhangliu/prompt-extend": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 52000}, "lmqg/mt5-small-frquad-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "Beltenebros/DialoGPT-small-PerionOfGaul": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "sominw/rel23_conll": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "mncai/SGPT-5.8B-wiki-mirae-bank_securities-epoch5": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 16, "num_hidden_layers": 28, "vocab_size": 30080}, "MickyMike/VulRepair": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32105}, "ybelkada/t5-11b-sharded": {"architectures": ["T5WithLMHeadModel"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "Einmalumdiewelt/T5-Base_GNAD_MaxSamples": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "digitous/13B-HyperMantis_GPTQ_4bit-128g": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "weqweasdas/hh_rlhf_rm_open_llama_3b": {"architectures": ["LlamaForSequenceClassification"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/WizardMath-13B-V1.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "ziqingyang/chinese-alpaca-2-7b-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 55296}, "valhalla/t5-base-squad": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "ELiRF/mt5-base-dacsa-es": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "abhitopia/question-answer-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32102}, "TurkuNLP/gpt3-finnish-large": {"architectures": ["BloomModel"], "hidden_size": 1536, "n_head": 16, "n_layer": 24, "vocab_size": 131072}, "Abyss-fyf/DialoGPT-small-discord": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TheBloke/OpenOrca-Platypus2-13B-GGML": {}, "TheBloke/Airoboros-L2-7B-2.1-GGUF": {}, "huggingtweets/googleai": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "it5/it5-base-question-answering": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32103}, "woodmtaylor/DialoGPT-medium-Heej": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "kimdwan/t5-base-korean-summarize-LOGAN": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50358}, "Narrativa/mT5-base-finetuned-tydiQA-question-generation": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "huggingtweets/normmacdonald": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "r3dhummingbird/DialoGPT-medium-neku": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "yhavinga/t5-v1.1-base-dutch-cnn-test": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32103}, "kennethhendricks/DialoGPT-medium-jared-hendricks-gen1": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "retrieva-jp/t5-small-long": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "TheBloke/Vigogne-2-7B-Chat-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TigerResearch/tigerbot-7b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 60928}, "Fredithefish/Guanaco-13B-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "consciousAI/question-answering-generative-t5-v1-base-s-q-c": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "TheBloke/open-llama-7B-v2-open-instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "mosama/Llama-2-Medical-Merged-LoRA": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "bullmount/quanIta_t5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32103}, "YeungNLP/bloomz-396m-zh": {"architectures": ["BloomForCausalLM"], "hidden_size": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 46145}, "GreenBitAI/LLaMA-7B-2bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "chgk13/decicoder-1b-openvino-int8": {"architectures": ["DeciCoderForCausalLM"], "hidden_size": 2048, "intermediate_size": 5888, "num_attention_heads": 32, "num_hidden_layers": 20, "vocab_size": 49152}, "bigscience/bloomz-mt": {"architectures": ["BloomForCausalLM"], "n_layer": 70, "num_attention_heads": 112, "vocab_size": 250880}, "LarkAI/codet5p-770m_nl2sql_oig": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "Linly-AI/Chinese-Falcon-7B": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 90046}, "ckip-joint/bloom-3b-zh-instruct": {"architectures": ["BloomForCausalLM"], "hidden_size": 2560, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "sgr23/llama2-fine-tuned-dolly-15k-dto": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "edbeeching/gpt2-imdb": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "cardiffnlp/flan-t5-small-tweet-emotion": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "TheBloke/airoboros-7B-gpt4-1.4-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/CodeLlama-7B-GGUF": {}, "TheBloke/Airoboros-c34B-2.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "byeongal/Ko-DialoGPT": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 51200}, "ismaelfaro/gpt2-poems.en": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "tuner007/t5_abs_qa": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "kennethhendricks/DialoGPT-medium-PowPowGaming": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "DunnBC22/flan-t5-base-text_summarization_data": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "zarakiquemparte/hermeslimarp-l2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "MagicLEMP/llamavocat_13B_mixed_16K": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "4bit/ELYZA-japanese-Llama-2-7b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "EnglishVoice/t5-base-us-to-uk-english": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "devanshipatel/t5-gec-english-125k": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "helloollel/vicuna-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "nferroukhi/WizardLM-Uncensored-Falcon-7b-sharded-bf16": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65025}, "dacorvo/tiny-random-gpt2-neuronx": {"intermediate_size": 37, "n_embd": 32, "n_head": 4, "n_inner": null, "n_layer": 5, "vocab_size": 1000}, "JamesStratford/Pidrow-bot-DialoGPT-Large-Feb2023": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50257}, "tsuyuan/Llama-2-7b-unit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 41218}, "OFA-Sys/gsm8k-rft-llama7b2-u13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "uer/gpt2-chinese-ancient": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 25370}, "YTTD/DialoGPT-medium-safv3": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "Neko-Institute-of-Science/LLaMA-65B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TheBloke/Spicyboros-13B-2.2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "IDEA-CCNL/Randeng-T5-77M-MultiTask-Chinese": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32596}, "coreml-projects/Llama-2-7b-chat-coreml": {"architectures": ["LlamaForCausalLM"], "vocab_size": 32000}, "oscorrea/scores-lince-sm": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "morzecrew/FRED-T5-RefinedPersonaChat": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1536, "num_heads": 24, "num_layers": 24, "vocab_size": 50364}, "anjakuzev/harry_7": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Mythalion-13B-GGUF": {}, "Kryptone/monikAI": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/Luna-AI-Llama2-Uncensored-GGML": {}, "mlabonne/llama-2-7b-miniguanaco": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Hermes-LLongMA-2-7B-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "zlsl/l_erotic_kink_chat": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50259}, "Sao10K/Stheno-Inverted-1.2-L2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "castorini/duot5-base-msmarco": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "mrm8488/t5-base-finetuned-qasc": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "entropy/gpt2_zinc_87m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 2707}, "MarkyMarx/DialoGPT-medium-jimmybot2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "stefan-it/secret-gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Narrativa/byt5-base-tweet-hate-detection": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3968, "d_model": 1536, "num_heads": 12, "num_layers": 18, "vocab_size": 384}, "nicholasKluge/Aira-2-124M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50261}, "TheBloke/Samantha-1.11-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "castorini/monot5-large-msmarco": {"architectures": ["T5Model"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "PoloHuggingface/French_grammar_error_corrector": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32102}, "cambridgeltl/magic_mscoco": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50259}, "Gatozu35/tortoise-tts": {"architectures": ["GPT2InferenceModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 30, "vocab_size": 604}, "abacusai/Giraffe-v1-delta-13b-scaled-16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "flozi00/Llama-2-13B-german-assistant-v3-4bit-autogptq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "HAERAE-HUB/tulu_13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "doc2query/msmarco-14langs-mt5-base-v1": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "Maciel/T5Corrector-base-v2": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "vilm/vietcuna-3b-v2": {"architectures": ["BloomForCausalLM"], "hidden_size": 2560, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "TitanML/ct2-int8-falcon-7b-instruct": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "ybelkada/llama-7b-GPTQ-test": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "h2oai/h2ogpt-16k-codellama-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "TigerResearch/tigerbot-70b-chat-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 60928}, "Supiri/t5-base-conversation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "msterbentz/t5-base-break-high": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "igorktech/rut5-small-chit-chat-intelligent": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 20100}, "kuleshov/llama-7b-4bit": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "hipnologo/gpt2-imdb-finetune": {"architectures": ["GPT2ForSequenceClassification"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "qwopqwop/danbooru-llama-gptq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "t-dai-con/gpt-fine-tuned-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Platypus2-70B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "KETI-AIR/ke-t5-base-ko": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 64128}, "doc2query/all-t5-base-v1": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "GT4SD/multitask-text-and-chemistry-t5-base-standard": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "uer/gpt2-medium-chinese-cluecorpussmall": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 21128}, "UBC-NLP/AraT5-base-title-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 110080}, "dsivakumar/text2sql": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "power-greg/super-fast-llm": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2048, "n_head": 16, "n_inner": 2048, "n_layer": 4, "vocab_size": 2048}, "AlexWortega/instruct_rugptMedium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50263}, "hiyouga/Llama-2-Chinese-13b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "frank098/llama2-13b-8k-vnf-virtualization": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "OFA-Sys/gsm8k-rft-llama7b-sample100": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "EnterNameBros/Senko-ai-medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "PeanutJar/LLaMa-2-PeanutButter_v19_R8-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Sao10K/Medusa-1.1-L2-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ChrisVCB/DialoGPT-medium-cmjs": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "indonesian-nlp/gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "persiannlp/mt5-small-parsinlu-squad-reading-comprehension": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "stmnk/codet5-small-code-summarization-python": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "emozilla/LLongMA-2-13b-16k-flash": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "bigscience/bloom-petals": {"architectures": ["BloomForCausalLM"], "hidden_size": 14336, "n_head": 112, "n_layer": 70, "vocab_size": 250880}, "procesaur/gpt2-srlat": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 52000}, "ashwinR/CodeExplainer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "Chirayu/nl2pandas": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "OpenBuddy/openbuddy-falcon-7b-v6-bf16": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 70144}, "swbaek/tulu_65b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32001}, "huggingtweets/wallstreetbets": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Sultannn/gpt2-ft-id-puisi": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 35000}, "sonoisa/sentence-t5-base-ja-mean-tokens": {"architectures": ["T5Model"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "sdadas/polish-gpt2-xl": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_inner": 6400, "n_layer": 48, "vocab_size": 51200}, "sjrhuschlee/flan-t5-large-squad2": {"architectures": ["T5ForQuestionAnswering"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "Hnabil/t5-address-standardizer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Gryphe/MythoLogic-Mini-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Athena-v1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Undi95/MythoMax-L2-Kimiko-v2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "magnifi/llama-augmented-contextual-2-epoch-6-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "doc2query/msmarco-chinese-mt5-base-v1": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "Sakuna/t5_grammar_checker": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Dahoas/pythia-1B-response-full-static-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "localmodels/Vicuna-7B-v1.3-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Sao10K/Stheno-1.1-L2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "mlabonne/drllama-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "IlyaGusev/rugpt3medium_sum_gazeta": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "describeai/gemini": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "mojians/E2E-QA-Mining": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32102}, "TheBloke/Wizard-Vicuna-13B-Uncensored-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "dnagpt/human_gpt2-v1": {"architectures": ["GPT2Model"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 25000}, "heegyu/WizardVicuna-Uncensored-pythia-160m-deduped": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50304}, "maximuslee07/llama-2-7b-rockwell": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "DylanJHJ/fidt5-base-nq": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "laituan245/molt5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "DancingIguana/music-generation": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 6, "vocab_size": 25000}, "Qiliang/flan-t5-large-summarization-finetuned-xsum": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TheBloke/Vicuna-7B-CoT-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "hpcaitech/openmoe-base": {"architectures": ["OpenMoeForCausalLM"], "hidden_size": 768, "intermediate_size": 2048, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 256384}, "CalderaAI/13B-Thorns-l2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "CHIH-HUNG/llama-2-13b-FINETUNE1_17w-r4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "IlyaGusev/rugpt_medium_turbo_instructed": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50258}, "pankajmathur/orca_alpaca_3b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/Wizard-Vicuna-7B-Uncensored-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "abacusai/Giraffe-v1-delta-13b-scaled-4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Huginn-v3-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "bloom-testing/test-bloomd-350m-main": {"architectures": ["BloomModel"], "n_inner": null, "n_layer": 24, "num_attention_heads": 16, "vocab_size": 250880}, "AI-Sweden/gpt-sw3-356m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": 4096, "n_layer": 24, "vocab_size": 64000}, "raymondho/DialoGPT-small-harry": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TheBloke/airochronos-33B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/OpenChat_v3.2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "ahnyeonchan/OpenOrca-AYT-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "stanford-crfm/expanse-gpt2-small-x777": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "doc2query/msmarco-german-mt5-base-v1": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "ku-nlp/gpt2-medium-japanese-char": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 6000}, "llm-blender/gen_fuser_3b": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32100}, "lomahony/eleuther-pythia2.8b-hh-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50304}, "TheBloke/Llama2-22B-GPLATTY-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "grammarly/coedit-xl-composite": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32100}, "imuncomfortable/DiabloGPT-small-CocoAtarashi": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "kaiyuy/leandojo-lean3-retriever-tacgen-byt5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3584, "d_model": 1472, "num_heads": 6, "num_layers": 12, "vocab_size": 384}, "michaelwzhu/Chinese-LlaMA2-13B-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 55296}, "Xenova/llama2.c-stories110M": {"architectures": ["LlamaForCausalLM"], "hidden_size": 768, "intermediate_size": 2048, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 32000}, "Youngwoo9/T5_Pyeongsan": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50358}, "literallywood/DialoGPT-small-ekansh": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "jondurbin/spicyboros-7b-2.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "indobenchmark/indogpt": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 40005}, "it5/it5-efficient-small-el32-news-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 32, "vocab_size": 32100}, "mesolitica/finetune-translation-t5-base-standard-bahasa-cased-v2": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "Den4ikAI/FRED-T5-XL_instructor": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1536, "num_heads": 24, "num_layers": 24, "vocab_size": 50365}, "mlabonne/gpt2-GPTQ-4bit": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "steerapi/Llama-2-7b-chat-hf-onnx": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Langboat/bloom-1b4-zh": {"architectures": ["BloomForCausalLM"], "hidden_size": 2048, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 46145}, "neulab/docprompting-codet5-python-doc-retriever": {"architectures": ["BERTScorerForCL"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "AI-Sweden/gpt-sw3-20b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 44, "vocab_size": 64000}, "syndi-models/article-title-generator": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "vgaraujov/Dummy5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "TFLai/Orca-Nova-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "allenai/tk-instruct-11b-def-pos": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "aspis/gpt2-genre-story-generation": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50267}, "lcw99/t5-base-korean-paraphrase": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50358}, "Celestinian/TopicGPT": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50257}, "TheBloke/Redmond-Hermes-Coder-GPTQ": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49153}, "TheBloke/h2ogpt-gm-oasst1-en-2048-falcon-7b-v3-GPTQ": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "abhinavkulkarni/meta-llama-Llama-2-13b-chat-hf-w4-g128-awq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jypppp/llama-2-7b-manual_GPT_ver2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Llama-2-7B-32K-Instruct-GGML": {}, "TheBloke/Yarn-Llama-2-7B-128K-GGML": {}, "quantumaikr/KoreanLM-llama-2-7B-finetuned": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 46336}, "google/t5-xl-ssm-nq": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "nikokons/gpt2-greek": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 22000}, "NYTK/PULI-GPT-3SX": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50048}, "Futyn-Maker/rugpt3small_based_on_gpt2-finetuned_teachers_quotes_small": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50264}, "localmodels/Llama-2-13B-Chat-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "SebastianSchramm/UniNER-7B-all-GPTQ-4bit-128g-actorder_True": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Airoboros-L2-70B-2.1-Creative-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "casperhansen/vicuna-7b-v1.5-awq-gemv": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "IDEA-CCNL/Wenzhong-GPT2-3.5B": {"architectures": ["GPT2LMHeadModel"], "n_embd": 3072, "n_head": 32, "n_inner": 12288, "n_layer": 30, "vocab_size": 50304}, "antoinelouis/belgpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50257}, "atkh6673/DialoGPT-small-trump": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "huggingface-course/mt5-small-finetuned-amazon-en-es": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "malteos/gpt2-xl-wechsel-german": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_inner": 6400, "n_layer": 48, "vocab_size": 50304}, "KES/caribe-capitalise": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "pszemraj/flan-t5-large-instruct-dolly_hhrlhf": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "Tanmay09516/StableBeluga-7B-sharded-bf16-5GB": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "abhinavkulkarni/codellama-CodeLlama-7b-Python-hf-w4-g128-awq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Spicyboros-7B-2.2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "huggingtweets/elonmusk": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "BelleGroup/BELLE-7B-2M": {"architectures": ["BloomModel"], "n_inner": null, "n_layer": 30, "num_attention_heads": 32, "vocab_size": 250880}, "snoop2head/Gomoku-GPT2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 256, "n_head": 4, "n_inner": null, "n_layer": 4, "vocab_size": 404}, "AnimusOG/pygmalion-7b-4bit-128g-cuda-2048Token": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/airoboros-l2-7B-gpt4-m2.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Juniplayground/Mist_LLaMA-2-7B-1024_V3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "DataLinguistic/DataLinguistic-34B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32001}, "erikycd/chatbot_hadita": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50262}, "medicalai/ClinicalGPT-base-zh": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "TheBloke/orca_mini_v2_13b-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "NIRVANA/T5_academic_paraphraser": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "josmunpen/mt5-small-spanish-summarization": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "shahp7575/gpt2-horoscopes": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50260}, "yihsuan/best_model_0427_small_long": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250100}, "malteos/bloom-6b4-clp-german-oasst-v0.1": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_layer": 30, "vocab_size": 50272}, "openllmplayground/openalpaca_7b_700bt_preview": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Gaivoronsky/ruGPT-3.5-13B-fp16": {"architectures": ["GPT2LMHeadModel"], "n_embd": 5120, "n_head": 40, "n_inner": null, "n_layer": 40, "vocab_size": 50272}, "universeTBD/astrollama": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "gorkemgoknar/gpt2-small-turkish": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "huggingtweets/joejoinerr": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Prarabdha/T5-Transformer-RickBot": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "beomi/kollama-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 52000}, "mohammadtaghizadeh/flan-t5-base-imdb-text-classification": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "nicholasKluge/Aira-Instruct-774M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50259}, "bhenrym14/airoboros-7b-gpt4-1.4.1-lxctx-PI-16384-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Alireza1044/michael_bert_lm": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 6, "vocab_size": 50257}, "shibing624/gpt2-dialogbot-base-chinese": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 10, "vocab_size": 13317}, "mesolitica/finetune-summarization-ms-t5-base-standard-bahasa-cased": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "lmqg/flan-t5-large-squad-qg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32101}, "circulus/alpaca-7b": {"architectures": ["LlaMAForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "reeducator/vicuna-13b-free": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "flozi00/Llama-2-13b-german-assistant-v6-4bit-autogptq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 37632}, "SasnayaLetovka/tinkoff-zhientaev-model": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50262}, "mesolitica/t5-base-standard-bahasa-cased": {"architectures": ["T5Model"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "EllyPony/flutterbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "pszemraj/flan-t5-xl-grammar-synthesis": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "jinxuewen/vicuna-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "fireballoon/baichuan-llama-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 64000}, "TheBloke/Vicuna-7B-v1-3-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "scural/arxiv_model": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "Undi95/CodeEngine": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Codexister/DialoGPT-medium-KafkaBotV1": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "google/t5-xxl-ssm-nq": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "uer/gpt2-chinese-couplet": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 21128}, "nicholasKluge/Aira-Instruct-355M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50259}, "HIT-SCIR/huozi-7b-sft": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "NousResearch/CodeLlama-13b-Instruct-hf-flash": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "Enno-Ai/vigogne2-enno-13b-sft-lora-4bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "sonoisa/t5-base-japanese-article-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Kyrmasch/t5-kazakh-qa": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 18947}, "TheBloke/airoboros-13b-gpt4-1.4-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Kimiko-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "arya555/vicuna-7b-v1.5-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Geo/gpt2_custom_c_q_and_a": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "laituan245/molt5-small-smiles2caption": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "cloudqi/cqi_brain_memory_summarizer_large_pt_v0": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "ybelkada/bloom-1b7-8bit": {"architectures": ["BloomForCausalLM"], "hidden_size": 2048, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "snipaid/snip-igel-500-v2-adapter-merged": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_layer": 30, "vocab_size": 50304}, "TabbyML/SantaCoder-1B": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 49280}, "TheBloke/Guanaco-33B-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "hanseokhyeon/kullm-polyglot-5.8b-v2-GPTQ": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 16, "num_hidden_layers": 28, "vocab_size": 30080}, "CAIRE-CedarsSinai/falcon-7b-qlora-chat-support-bot-faq-alzkb-version-1": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "pranavpsv/genre-story-generator-v2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50266}, "nandakishormpai/t5-small-machine-articles-tag-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "ITG/DialoGPT-medium-spanish-chitchat": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "4bit/falcon-7b-instruct-GPTQ": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "OpenBuddy/openbuddy-openllama-7b-v5-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 38449}, "papahawk/keya-560m": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 24, "num_attention_heads": 16, "vocab_size": 250880}, "abhinavkulkarni/tiiuae-falcon-40b-instruct-w4-g128-awq": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "funstoryai/immersiveL-exp": {"architectures": ["BloomForCausalLM"], "hidden_size": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "Benson/llama-2-7b-miniguanaco-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "clancystudios/DialoGPT-medium-Morty": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "huggingtweets/realdonaldtrump": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "charanhu/text_to_sql_2": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32102}, "beomi/kollama-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 52000}, "IDEA-CCNL/Ziya-LLaMA-13B-v1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 39424}, "nicholasKluge/Aira-Instruct-PT-1B7": {"architectures": ["BloomForCausalLM"], "hidden_size": 2048, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250683}, "TheBloke/Llama2-22B-Daydreamer-v3-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "yongzx/pythia-160m-sft-hh": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50304}, "h2oai/h2ogpt-16k-codellama-34b-python": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "nedima68/author_articles_GPT2_textgen_TR": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 52001}, "IronChef/MascotAI_Open_LLaMA_FINAL": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "unionai/pythia-1B-deduped-wikipedia-8bit": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "Chirayu/nl2cql": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "TheBloke/Nous-Puffin-70B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TheBloke/Llama-2-70B-Orca-200k-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TheBloke/Llama-2-70B-chat-GGUF": {}, "sartmis1/CodeLlama-34b-instruct-openapi": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "flax-community/bengali-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32103}, "csebuetnlp/mT5_m2o_hindi_crossSum": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "huggingtweets/fabrizioromano": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "yshen99/ZhiGuoLiZheng-GPT2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 21128}, "malalejandra/putinspeaks": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Intel/fid_flan_t5_base_nq": {"architectures": ["FusionInDecoderForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "sjrhuschlee/flan-t5-base-mnli": {"architectures": ["T5ForSequenceClassification"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "TheBloke/Codegen25-7B-mono-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 51200}, "frank098/starcoder-vyatta": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49152}, "Xenova/llama2.c-stories42M": {"architectures": ["LlamaForCausalLM"], "hidden_size": 512, "intermediate_size": 1376, "num_attention_heads": 8, "num_hidden_layers": 8, "vocab_size": 32000}, "flozi00/Llama-2-13b-german-assistant-v5": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 37632}, "Andrei-Alex/Fine-Tuned-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/vicuna-7B-1.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "sharpbai/alpaca-7b-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "Clakmann/t5-base-Clakmann-thesis": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "RuterNorway/Llama-2-13b-chat-norwegian-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "osieosie/bloom-560m-4bit": {"architectures": ["BloomForCausalLM"], "hidden_size": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "paulowoicho/t5-podcast-summarisation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "liujch1998/rainier-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "gsdas/qct5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "nicholasKluge/Aira-Instruct-1B5": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_inner": null, "n_layer": 48, "vocab_size": 50259}, "kajdun/iubaris-13b-v3_GGML": {}, "csebuetnlp/mT5_m2o_english_crossSum": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "dehio/german-qg-t5-quad": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "emil2000/dialogpt-for-french-language": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "khalidsaifullaah/bengali-lyricist-gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "thinhda/chatbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "Finnish-NLP/llama-7b-finnish": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 64256}, "ehartford/WizardLM-7B-V1.0-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/WizardCoder-Guanaco-15B-V1.0-GPTQ": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49153}, "DUOMO-Lab/TransGPT-v0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 49954}, "TheBloke/Platypus2-70B-Instruct-GGUF": {}, "lmqg/t5-large-squad-qg-ae": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32101}, "rubentito/hivt5-base-mpdocvqa": {"architectures": ["HiVT5"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "cosimoiaia/Loquace-70m": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "metamyth/jennyNew": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "AlexWortega/LLama2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "totally-not-an-llm/AlpacaCielo2-7b-8k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/orca_mini_v3_7B-GGML": {}, "zjunlp/knowlm-13b-base-v1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ticoAg/gpt2-tigerbot-pt-zh": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "akshat3492/mT5": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "TheBloke/Falcon-180B-Chat-GGUF": {}, "unicamp-dl/mt5-base-mmarco-v2": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "malteos/gpt2-wechsel-german-ds-meg": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 12, "vocab_size": 50304}, "phpaiola/ptt5-base-summ-temario": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "mesolitica/finetune-translation-t5-super-tiny-standard-bahasa-cased": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 256, "num_heads": 6, "num_layers": 2, "vocab_size": 32100}, "ademfatnassi/bonjourGPT-small": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "pr1me/llama2_13b_eros_instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "Xenova/llama2.c-stories15M": {"architectures": ["LlamaForCausalLM"], "hidden_size": 288, "intermediate_size": 768, "num_attention_heads": 6, "num_hidden_layers": 6, "vocab_size": 32000}, "sekarmulyani/gpt2-ulasan-beauty-products-gen": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "akhooli/gpt2-small-arabic-poetry": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50257}, "mrm8488/spanish-t5-small-sqac-for-qa": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32103}, "flozi00/falcon-7b-german-assistant-v2": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65040}, "TheBloke/llama-2-13B-chat-limarp-v2-merged-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ticoAg/gpt2-tiger-sft-zh": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "thiagomf/Llama-2-7b-hf-sharded-bf16-1GB": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "THUMT/mGPT": {"architectures": ["GPT2LMHeadModel"], "vocab_size": 250100, "n_embd": 1024, "n_layer": 24, "n_head": 16, "n_inner": 4096}, "lmqg/flan-t5-base-squad-qg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-700bt": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Phind-CodeLlama-34B-Python-v1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "arogov/llama2_13b_chat_uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ai-forever/mGPT-1.3B-bulgarian": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2048, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 100000}, "davesoma/SageBeluga13": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "pssubitha/llama-2-7b-sales-force-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "PyaeSoneK/pythia_70m_legalQA": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "hidude562/OpenMusenet-2.1-L": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50259}, "abeiler/huggingface-goatLora-goatV9-testData-morePushes": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "abinayam/gpt-2-tamil": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "persiannlp/mt5-base-parsinlu-squad-reading-comprehension": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "pierreguillou/t5-base-qa-squad-v1.1-portuguese": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "lchaloupsky/czech-gpt2-oscar": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50257}, "OpenHust/viet-gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "tiansz/ChatYuan-7B-merge": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "voidful/llama-v2-unit-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 8195}, "taaredikahan23/Llama-2-7b-chat-finetune": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "deutsche-telekom/mt5-small-sum-de-en-v1": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250100}, "hetpandya/t5-small-tapaco": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "sunhao666/chi-sum2": {"architectures": ["T5Model"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 21228}, "smartik/mt5-small-finetuned-gec-0.2": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "PORTULAN/gervasio-ptbr-base": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 50304}, "clibrain/Llama-2-13b-ft-instruct-es-gptq-4bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "soketlabs/bhasha-7b-2k-hi": {"architectures": ["MPTForCausalLM"], "d_model": 4096, "vocab_size": 61772}, "codefuse-ai/CodeFuse-13B": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 100831}, "Sentdex/GPyT": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 52000}, "it5/it5-large-news-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32103}, "FredZhang7/distilgpt2-stable-diffusion": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 6, "vocab_size": 50257}, "Rostlab/ProstT5_fp16": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 150}, "approach0/mathy-vicuna-13B-FFT": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "lighteternal/gpt2-finetuned-greek": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "stanford-crfm/battlestar-gpt2-small-x49": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "stacked-summaries/flan-t5-small-stacked-samsum-1024": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "TigerResearch/tigerbot-7b-base-v1": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250680}, "Chang-Su/llama-2-13b-chat-ko": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 39478}, "Clakmann/t5-base-Clakmann-thesis-epoch10": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "yekaraoglann/results": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "bitadin/gpt-4-medium-titles-v2-flan-t5-base-llm-6": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "google/t5_11b_trueteacher_and_anli": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "TaylorAI/Flash-Llama-30M": {"architectures": ["LlamaForCausalLM"], "hidden_size": 384, "intermediate_size": 1024, "num_attention_heads": 12, "num_hidden_layers": 4, "vocab_size": 32000}, "flax-community/t5-base-wikisplit": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "razent/SciFive-large-Pubmed_PMC": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "inkoziev/rugpt_chitchat": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50258}, "lomahony/eleuther-pythia410m-hh-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 1024, "intermediate_size": 4096, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "TheBloke/Vicuna-13B-v1.3-German-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "emozilla/LLongMA-2-13b-storysummarizer": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "yongzx/pythia-1b-sft-hh": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 50304}, "TheBloke/airoboros-13b-gpt4-1.4-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "heegyu/llama-2-ko-7b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 46336}, "flozi00/Llama-2-7b-german-assistant-v3-4bit-autogptq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "zarakiquemparte/zararp-l2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Sao10K/Stheno-1.3-L2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TsinghuaAI/CPM-Generate": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2560, "n_head": 32, "n_inner": null, "n_layer": 32, "vocab_size": 30000}, "AlexWortega/instruct_rugptlarge": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50263}, "tatsu-lab/alpaca-7b-wdiff": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "microsoft/phi-1_5": {"architectures": ["MixFormerSequentialForCausalLM"], "n_embd": 2048, "n_head": 32, "n_inner": null, "n_layer": 24, "vocab_size": 51200}, "Deci/DeciLM-6b": {"architectures": ["DeciLMForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Deci/DeciLM-6b-instruct": {"architectures": ["DeciLMForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "baichuan-inc/Baichuan2-13B-Chat": {"architectures": ["BaichuanForCausalLM"], "hidden_size": 5120, "intermediate_size": 13696, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 125696}, "microsoft/phi-1": {"architectures": ["MixFormerSequentialForCausalLM"], "n_embd": 2048, "n_head": 32, "n_inner": null, "n_layer": 24, "vocab_size": 51200}, "Deci/DeciCoder-1b": {"architectures": ["DeciCoderForCausalLM"], "hidden_size": 2048, "intermediate_size": 5888, "num_attention_heads": 32, "num_hidden_layers": 20, "vocab_size": 49152}, "baichuan-inc/Baichuan2-7B-Chat": {"architectures": ["BaichuanForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 125696}, "CofeAI/FLM-101B": {"n_embd": 10240, "n_head": 80, "n_inner": 40960, "n_layer": 80, "vocab_size": 100352}, "jphme/phi-1_5_Wizard_Vicuna_uncensored": {"architectures": ["MixFormerSequentialForCausalLM"], "n_embd": 2048, "n_head": 32, "n_inner": null, "n_layer": 24, "vocab_size": 50304}, "ehartford/samantha-phi": {"architectures": ["MixFormerSequentialForCausalLM"], "n_embd": 2048, "n_head": 32, "n_inner": null, "n_layer": 24, "vocab_size": 50304}, "teknium/Phi-Hermes-1.3B": {"architectures": ["MixFormerSequentialForCausalLM"], "n_embd": 2048, "n_head": 32, "n_inner": null, "n_layer": 24, "vocab_size": 50304}, "baichuan-inc/Baichuan2-13B-Chat-4bits": {"architectures": ["BaichuanForCausalLM"], "hidden_size": 5120, "intermediate_size": 13696, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 125696}, "teknium/Puffin-Phi-v2": {"architectures": ["MixFormerSequentialForCausalLM"], "n_embd": 2048, "n_head": 32, "n_inner": null, "n_layer": 24, "vocab_size": 50304}, "AIDC-ai-business/Marcoroni-70B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "PY007/TinyLlama-1.1B-Chat-v0.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 2048, "intermediate_size": 5632, "num_attention_heads": 32, "num_hidden_layers": 22, "vocab_size": 32001}, "SkunkworksAI/PlatyPhi-1.5B": {"architectures": ["MixFormerSequentialForCausalLM"], "n_embd": 2048, "n_head": 32, "n_inner": null, "n_layer": 24, "vocab_size": 50304}, "gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50257}, "ibm/MoLM-700M-8B": {"architectures": ["ModuleFormerForCausalLM"], "n_embd": 1024, "n_head": 1, "n_layer": 48, "vocab_size": 50295}, "tangger/Qwen-7B-Chat": {"architectures": ["QWenLMHeadModel"], "hidden_size": 4096, "intermediate_size": 22016, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 151936}, "baichuan-inc/Baichuan2-7B-Chat-4bits": {"architectures": ["BaichuanForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 125696}, "lyogavin/Anima-7B-100K": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "baichuan-inc/Baichuan2-13B-Base": {"architectures": ["BaichuanForCausalLM"], "hidden_size": 5120, "intermediate_size": 13696, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 125696}, "baichuan-inc/Baichuan-7B": {"architectures": ["BaiChuanForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 64000}, "TheBloke/MLewd-L2-Chat-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}} \ No newline at end of file +{"NousResearch/Llama-2-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "google/flan-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "google/flan-t5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "t5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "PascalNotin/Tranception_Small": {"architectures": ["TranceptionLMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 12, "vocab_size": 25}, "bigscience/bloom-560m": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 24, "num_attention_heads": 16, "vocab_size": 250880}, "distilgpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 6, "vocab_size": 50257}, "hf-internal-testing/tiny-random-gpt2": {"intermediate_size": 37, "n_embd": 32, "n_head": 4, "n_inner": null, "n_layer": 5, "vocab_size": 1000}, "tiiuae/falcon-7b": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "t5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "bigscience/bloomz-1b1": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 24, "num_attention_heads": 16, "vocab_size": 250880}, "gpt2-medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_layer": 24, "vocab_size": 50257}, "mrm8488/t5-base-finetuned-common_gen": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "lmsys/fastchat-t5-3b-v1.0": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32110}, "gpt2-xl": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_layer": 48, "vocab_size": 50257}, "meta-llama/Llama-2-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "hf-internal-testing/tiny-random-t5": {"d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 1103}, "EleutherAI/pythia-6.9b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "databricks/dolly-v2-3b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50280}, "hf-internal-testing/tiny-random-GPTNeoXForCausalLM": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 32, "intermediate_size": 37, "num_attention_heads": 4, "num_hidden_layers": 5, "vocab_size": 1024}, "meta-llama/Llama-2-7b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "microsoft/DialoGPT-medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_layer": 24, "vocab_size": 50257}, "google/mt5-base": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "TheBloke/Wizard-Vicuna-7B-Uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "hf-internal-testing/tiny-random-BloomModel": {"architectures": ["BloomModel"], "hidden_size": 32, "n_head": 4, "n_layer": 5, "vocab_size": 1024}, "google/flan-t5-xxl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "lmsys/vicuna-7b-v1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "huggyllama/llama-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "mrm8488/t5-base-finetuned-summarize-news": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "google/flan-t5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "tiiuae/falcon-40b-instruct": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "ramsrigouthamg/t5_sentence_paraphraser": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "flexudy/t5-base-multi-sentence-doctor": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "lewtun/tiny-random-mt5": {"architectures": ["MT5Model"], "d_ff": 1024, "d_model": 16, "num_heads": 4, "num_layers": 2, "vocab_size": 250112}, "gpt2-large": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_layer": 36, "vocab_size": 50257}, "valhalla/t5-base-e2e-qg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32102}, "sshleifer/tiny-gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2, "n_head": 2, "n_layer": 2, "vocab_size": 50257}, "fxmarty/tiny-llama-fast-tokenizer": {"architectures": ["LlamaForCausalLM"], "hidden_size": 16, "intermediate_size": 64, "num_attention_heads": 4, "num_hidden_layers": 2, "vocab_size": 32000}, "decapoda-research/llama-7b-hf": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "stabilityai/StableBeluga2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "syzymon/long_llama_3b": {"architectures": ["LongLlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "NousResearch/Llama-2-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Llama-2-70B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "tiiuae/falcon-7b-instruct": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "google/flan-t5-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "meta-llama/Llama-2-13b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "petals-team/StableBeluga2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "meta-llama/Llama-2-70b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "EleutherAI/gpt-neox-20b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 64, "num_hidden_layers": 44, "vocab_size": 50432}, "hf-internal-testing/tiny-random-GPTBigCodeForCausalLM": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "nferruz/ProtGPT2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50257}, "philschmid/flan-t5-xxl-sharded-fp16": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "HuggingFaceM4/tiny-random-LlamaForCausalLM": {"architectures": ["LlamaForCausalLM"], "hidden_size": 16, "intermediate_size": 64, "num_attention_heads": 4, "num_hidden_layers": 2, "vocab_size": 32000}, "hf-internal-testing/tiny-random-BloomForCausalLM": {"architectures": ["BloomForCausalLM"], "hidden_size": 32, "n_head": 4, "n_layer": 5, "vocab_size": 1024}, "hf-internal-testing/tiny-random-GPT2LMHeadModel": {"architectures": ["GPT2LMHeadModel"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "Vamsi/T5_Paraphrase_Paws": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "lmsys/vicuna-7b-v1.3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "meta-llama/Llama-2-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ybelkada/tiny-random-T5ForConditionalGeneration-calibrated": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 32100}, "prithivida/parrot_paraphraser_on_T5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "hf-internal-testing/tiny-random-GPTBigCodeModel": {"architectures": ["GPTBigCodeModel"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "hkunlp/instructor-large": {"architectures": ["T5EncoderModel"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "lmsys/vicuna-7b-v1.5": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "fabiochiu/t5-small-medium-title-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "TheBloke/Llama-2-7b-Chat-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "skt/kogpt2-base-v2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 51200}, "google/t5-v1_1-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Maykeye/TinyLLama-v0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 64, "intermediate_size": 256, "num_attention_heads": 16, "num_hidden_layers": 8, "vocab_size": 32000}, "TheBloke/Llama-2-13B-chat-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "tiiuae/falcon-40b": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "sonoisa/t5-base-japanese-title-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Rostlab/prot_t5_xl_uniref50": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 128}, "TheBloke/vicuna-7B-v1.3-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "daryl149/llama-2-7b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "stabilityai/StableBeluga-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "meta-llama/Llama-2-70b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TheBloke/MythoMax-L2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "czurita/nsql-llama-2-7B-sharded-bf16-2GB": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "vennify/t5-base-grammar-correction": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "czearing/story-to-title": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "google/byt5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3840, "d_model": 1536, "num_heads": 16, "num_layers": 36, "vocab_size": 384}, "HuggingFaceH4/starchat-beta": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49156}, "codellama/CodeLlama-34b-Instruct-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "openlm-research/open_llama_13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "optimum/t5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "t5-3b": {"architectures": ["T5WithLMHeadModel"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "humarin/chatgpt_paraphraser_on_T5_base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Gustavosta/MagicPrompt-Stable-Diffusion": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "bigscience/bloomz-7b1": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 30, "num_attention_heads": 32, "vocab_size": 250880}, "trl-internal-testing/tiny-random-GPTNeoXForCausalLM": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 32, "intermediate_size": 37, "num_attention_heads": 4, "num_hidden_layers": 5, "vocab_size": 1024}, "NousResearch/Yarn-Llama-2-7b-64k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "khhuang/zerofec-qa2claim-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b-v3": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "khhuang/zerofec-daqa-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "declare-lab/flan-alpaca-gpt4-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "codellama/CodeLlama-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "lmsys/vicuna-13b-v1.3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Rostlab/prot_t5_xl_half_uniref50-enc": {"architectures": ["T5EncoderModel"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 128}, "google/mt5-small": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "Salesforce/safety-flan-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "patrickvonplaten/t5-tiny-random": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 256, "d_model": 64, "num_heads": 2, "num_layers": 2, "vocab_size": 32128}, "google/flan-ul2": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 16384, "d_model": 4096, "num_heads": 16, "num_layers": 32, "vocab_size": 32128}, "EleutherAI/pythia-70m": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "bigscience/mt0-large": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 250112}, "stevhliu/my_awesome_billsum_model": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "EleutherAI/pythia-70m-deduped": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "lmsys/vicuna-13b-v1.5": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "PAIXAI/Astrid-1B": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 50304}, "Phind/Phind-CodeLlama-34B-Python-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "pszemraj/flan-t5-large-grammar-synthesis": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "Voicelab/vlt5-base-keywords": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50048}, "togethercomputer/Llama-2-7B-32K-Instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "huggyllama/llama-65b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "ai-forever/ruGPT-3.5-13B": {"architectures": ["GPT2LMHeadModel"], "n_embd": 5120, "n_head": 40, "n_inner": null, "n_layer": 40, "vocab_size": 50272}, "Einmalumdiewelt/T5-Base_GNAD": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "google/t5-v1_1-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "Austism/chronos-hermes-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "upstage/SOLAR-0-70b-16bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "bigscience/bloom-7b1": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "nlpai-lab/kullm-polyglot-12.8b-v2": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 30080}, "codellama/CodeLlama-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "hf-internal-testing/tiny-random-GPT2Model": {"architectures": ["GPT2Model"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "Gryphe/MythoMax-L2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "openlm-research/open_llama_3b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/Llama-2-70B-chat-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "trl-internal-testing/dummy-GPT2-correct-vocab": {"architectures": ["GPT2LMHeadModel"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 50257}, "charsiu/g2p_multilingual_byT5_small_100": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3584, "d_model": 1472, "num_heads": 6, "num_layers": 12, "vocab_size": 384}, "EleutherAI/pythia-160m": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50304}, "ElnaggarLab/ankh-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 48, "vocab_size": 144}, "trl-internal-testing/tiny-random-GPT2LMHeadModel": {"architectures": ["GPT2LMHeadModel"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "openlm-research/open_llama_7b_v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/OpenAssistant-Llama2-13B-Orca-v2-8K-3166-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "codellama/CodeLlama-7b-Instruct-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "WizardLM/WizardCoder-Python-34B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32001}, "pszemraj/grammar-synthesis-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "TheBloke/llama-2-70b-Guanaco-QLoRA-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "openlm-research/open_llama_3b_v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "IDEA-CCNL/Wenzhong-GPT2-110M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 12, "vocab_size": 50304}, "microsoft/DialoGPT-small": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50257}, "trl-internal-testing/tiny-random-BloomForCausalLM": {"architectures": ["BloomForCausalLM"], "hidden_size": 32, "n_head": 4, "n_layer": 5, "vocab_size": 1024}, "trl-internal-testing/tiny-random-T5ForConditionalGeneration": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 1302}, "hf-internal-testing/tiny-random-onnx-mt5": {"architectures": ["MT5Model"], "d_ff": 1024, "d_model": 16, "num_heads": 4, "num_layers": 2, "vocab_size": 250112}, "NousResearch/Llama-2-7b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "trl-internal-testing/tiny-random-MT5ForConditionalGeneration": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 256, "d_model": 64, "num_heads": 4, "num_layers": 8, "vocab_size": 5100}, "fxmarty/tiny-testing-gpt2-remote-code": {"architectures": ["GPT2CustomLMHeadModel"], "intermediate_size": 37, "n_embd": 32, "n_head": 4, "n_inner": null, "n_layer": 5, "vocab_size": 1000}, "castorini/monot5-base-msmarco-10k": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "microsoft/DialoGPT-large": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_layer": 36, "vocab_size": 50257}, "bigscience/bloomz-560m": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 24, "num_attention_heads": 16, "vocab_size": 250880}, "Open-Orca/OpenOrca-Platypus2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "google/byt5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3584, "d_model": 1472, "num_heads": 6, "num_layers": 12, "vocab_size": 384}, "bigscience/bloom-1b7": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 24, "num_attention_heads": 16, "vocab_size": 250880}, "elinas/chronos-13b-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "google/t5-efficient-tiny": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 256, "num_heads": 4, "num_layers": 4, "vocab_size": 32128}, "bigscience/bloom-1b1": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 24, "num_attention_heads": 16, "vocab_size": 250880}, "EleutherAI/polyglot-ko-1.3b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 30080}, "bigscience/bloom-3b": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 30, "num_attention_heads": 32, "vocab_size": 250880}, "TinyPixel/Llama-2-7B-bf16-sharded": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "edumunozsala/llama-2-7b-int4-python-code-20k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "yahma/llama-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "beomi/KoAlpaca-Polyglot-12.8B": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 30003}, "stanfordnlp/backpack-gpt2": {"architectures": ["BackpackGPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50264}, "prithivida/grammar_error_correcter_v1": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "lvkaokao/llama2-7b-hf-chat-lora-v3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "google/t5-v1_1-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "TheBloke/gpt4-alpaca-lora_mlp-65B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "google/mt5-large": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 250112}, "EleutherAI/pythia-2.8b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50304}, "cyberagent/open-calm-7b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 52224}, "lvwerra/gpt2-imdb": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50257}, "WizardLM/WizardLM-13B-V1.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "KoboldAI/GPT-NeoX-20B-Erebus": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 64, "num_hidden_layers": 44, "vocab_size": 50432}, "togethercomputer/RedPajama-INCITE-Instruct-3B-v1": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "aditi2222/automatic_title_generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "shibing624/chinese-alpaca-plus-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 49954}, "optimum/gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "togethercomputer/LLaMA-2-7B-32K": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "amazon/FalconLite": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65040}, "EleutherAI/polyglot-ko-5.8b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 16, "num_hidden_layers": 28, "vocab_size": 30080}, "databricks/dolly-v2-7b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50280}, "snrspeaks/t5-one-line-summary": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "lmsys/vicuna-33b-v1.3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/OpenOrca-Platypus2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "TheBloke/Llama-2-13B-Chat-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "sdadas/mt5-base-translator-pl-en": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250100}, "TheBloke/Llama-2-7b-chat-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "bigcode/gpt_bigcode-santacoder": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 49280}, "h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b-v2": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "TheBloke/airoboros-l2-70B-GPT4-2.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "lmsys/vicuna-13b-v1.5-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "bigcode/santacoder": {"architectures": ["GPT2LMHeadCustomModel"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 49280}, "togethercomputer/RedPajama-INCITE-Chat-3B-v1": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "ai-forever/mGPT": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2048, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 100000}, "openlm-research/open_llama_7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "decapoda-research/llama-13b-hf": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "OpenAssistant/codellama-13b-oasst-sft-v10": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "rinna/bilingual-gpt-neox-4b-instruction-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2816, "intermediate_size": 11264, "num_attention_heads": 22, "num_hidden_layers": 36, "vocab_size": 65536}, "KoboldAI/LLaMA2-13B-Holomax-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "MBZUAI/LaMini-T5-61M": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "google/t5-v1_1-xxl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "EleutherAI/pythia-1.4b-deduped": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "stabilityai/StableBeluga-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "tiiuae/falcon-rw-1b": {"architectures": ["FalconForCausalLM"], "hidden_size": 2048, "num_attention_heads": 32, "num_hidden_layers": 24, "vocab_size": 50304}, "ClueAI/ChatYuan-large-v2": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "af1tang/personaGPT": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50263}, "google/t5-large-lm-adapt": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "vilsonrodrigues/falcon-7b-instruct-sharded": {"architectures": ["FalconForCausalLM"], "hidden_size": 4544, "num_attention_heads": 71, "num_hidden_layers": 32, "vocab_size": 65024}, "petals-team/falcon-rw-1b": {"architectures": ["FalconForCausalLM"], "hidden_size": 2048, "num_attention_heads": 32, "num_hidden_layers": 24, "vocab_size": 50304}, "bigscience/T0_3B": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "TheTravellingEngineer/llama2-7b-hf-guanaco": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Salesforce/codet5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "EleutherAI/pythia-2.8b-deduped": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50304}, "The-Face-Of-Goonery/Huginn-13b-v1.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "FredZhang7/distilgpt2-stable-diffusion-v2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 6, "vocab_size": 50257}, "WizardLM/WizardCoder-15B-V1.0": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49153}, "EleutherAI/pythia-410m-deduped": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 1024, "intermediate_size": 4096, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "huggyllama/llama-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ybelkada/falcon-7b-sharded-bf16": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "MingZhong/unieval-sum": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "NousResearch/Nous-Hermes-Llama2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "csebuetnlp/mT5_multilingual_XLSum": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "hkunlp/instructor-xl": {"architectures": ["T5EncoderModel"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "h2oai/h2ogpt-4096-llama2-13b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "HuggingFaceH4/tiny-random-LlamaForCausalLM": {"architectures": ["LlamaForCausalLM"], "hidden_size": 16, "intermediate_size": 64, "num_attention_heads": 4, "num_hidden_layers": 2, "vocab_size": 32000}, "EleutherAI/polyglot-ko-12.8b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 30080}, "databricks/dolly-v2-12b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50280}, "mrm8488/t5-base-finetuned-span-sentiment-extraction": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "WizardLM/WizardLM-70B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32001}, "codellama/CodeLlama-34b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "Salesforce/codet5-base-multi-sum": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "MBZUAI/LaMini-T5-738M": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "codellama/CodeLlama-13b-Instruct-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "h2oai/h2ogpt-4096-llama2-7b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50288}, "bigscience/bloom": {"architectures": ["BloomForCausalLM"], "n_layer": 70, "num_attention_heads": 112, "vocab_size": 250880}, "TigerResearch/tigerbot-13b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 60928}, "TheBloke/airoboros-l2-70B-gpt4-1.4.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Austism/chronos-hermes-13b-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "snrspeaks/KeyPhraseTransformer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Rocketknight1/tiny-random-falcon-7b": {"architectures": ["FalconForCausalLM"], "hidden_size": 1136, "num_attention_heads": 71, "num_hidden_layers": 2, "vocab_size": 65024}, "hf-internal-testing/tiny-random-T5Model": {"architectures": ["T5Model"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 32100}, "sambanovasystems/BLOOMChat-176B-v1": {"architectures": ["BloomForCausalLM"], "hidden_size": 14336, "n_head": 112, "n_layer": 70, "vocab_size": 250880}, "huggyllama/llama-30b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "elyza/ELYZA-japanese-Llama-2-7b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "lcw99/t5-base-korean-text-summary": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50358}, "it5/it5-base-news-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32103}, "uer/gpt2-chinese-cluecorpussmall": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 21128}, "t5-11b": {"architectures": ["T5WithLMHeadModel"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "KoboldAI/LLaMA2-13B-Holomax": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Llama-2-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "bigscience/bloomz-3b": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 30, "num_attention_heads": 32, "vocab_size": 250880}, "lmsys/vicuna-7b-v1.5-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "sonoisa/t5-base-japanese": {"architectures": ["T5Model"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "line-corporation/japanese-large-lm-3.6b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 3072, "intermediate_size": 12288, "num_attention_heads": 32, "num_hidden_layers": 30, "vocab_size": 51200}, "TheBloke/Llama-2-7B-32K-Instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "EleutherAI/pythia-410m": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 1024, "intermediate_size": 4096, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "NousResearch/Llama-2-70b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TheBloke/falcon-7b-instruct-GPTQ": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "eachadea/vicuna-13b-1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "beomi/llama-2-ko-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 46336}, "TheBloke/falcon-40b-instruct-GPTQ": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "TheBloke/Llama-2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "garage-bAInd/Platypus2-70B-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "rajkumarrrk/gpt2-fine-tuned-on-imdb-positive-reviews": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "cerebras/Cerebras-GPT-13B": {"architectures": ["GPT2Model"], "n_embd": 5120, "n_head": 40, "n_inner": 20480, "n_layer": 40, "vocab_size": 50257}, "rinna/japanese-gpt2-medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": 4096, "n_layer": 24, "vocab_size": 32000}, "bigscience/T0pp": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "Phind/Phind-CodeLlama-34B-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "beomi/kykim-gpt3-kor-small_based_on_gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 42000}, "Pi3141/DialoGPT-medium-elon-3": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "EleutherAI/pythia-1b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 50304}, "ai-forever/rugpt3large_based_on_gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "jondurbin/airoboros-l2-13b-gpt4-m2.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "codellama/CodeLlama-13b-Python-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "AUTOMATIC/promptgen-lexart": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 6, "vocab_size": 50257}, "Salesforce/codet5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "h2oai/h2ogpt-oig-oasst1-512-6_9b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "rinna/japanese-gpt-neox-3.6b-instruction-ppo": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2816, "intermediate_size": 11264, "num_attention_heads": 22, "num_hidden_layers": 36, "vocab_size": 32000}, "prithivida/informal_to_formal_styletransfer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "TheBloke/Wizard-Vicuna-13B-Uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "matsuo-lab/weblab-10b-instruction-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4864, "intermediate_size": 19456, "num_attention_heads": 38, "num_hidden_layers": 36, "vocab_size": 50277}, "succinctly/text2image-prompt-generator": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TheBloke/Llama-2-7B-Chat-GGML": {}, "TheBloke/Llama-2-70B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "sentence-transformers/gtr-t5-large": {"architectures": ["T5EncoderModel"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "h2oai/h2ogpt-gm-oasst1-en-2048-falcon-40b-v2": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "togethercomputer/RedPajama-INCITE-Base-3B-v1": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "rinna/bilingual-gpt-neox-4b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2816, "intermediate_size": 11264, "num_attention_heads": 22, "num_hidden_layers": 36, "vocab_size": 65536}, "TheBloke/Llama-2-13B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "pankajmathur/orca_mini_v3_70b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "OpenAssistant/llama2-13b-orca-8k-3319": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "TheBloke/StableBeluga-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "defog/sqlcoder": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49152}, "WizardLM/WizardCoder-Python-13B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "stabilityai/stablelm-tuned-alpha-3b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 16, "vocab_size": 50688}, "cyberagent/open-calm-small": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 52096}, "TheBloke/Wizard-Vicuna-30B-Uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/WizardLM-70B-V1.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32001}, "bigscience/bigscience-small-testing": {"architectures": ["BloomModel"], "hidden_size": 64, "n_head": 8, "n_inner": null, "n_layer": 2, "vocab_size": 250880}, "cyberagent/open-calm-1b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 52096}, "lamini/lamini_docs_finetuned": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "EnglishVoice/t5-base-uk-to-us-english": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "codellama/CodeLlama-7b-Python-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/WizardLM-13B-V1.2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "EleutherAI/pythia-160m-deduped": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50304}, "jphme/Llama-2-13b-chat-german": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "EleutherAI/pythia-12b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50688}, "TheBloke/WizardLM-1.0-Uncensored-Llama2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Salesforce/codet5p-220m": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "google/mt5-xl": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 250112}, "cerebras/Cerebras-GPT-111M": {"n_inner": 3072, "n_embd": 768, "n_head": 12, "n_layer": 10, "vocab_size": 50257}, "google/t5-v1_1-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TheBloke/vicuna-7B-v1.5-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "chavinlo/alpaca-native": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "kimnt93/kmv-7b-03": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "NumbersStation/nsql-llama-2-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "cerebras/Cerebras-GPT-1.3B": {"n_inner": 8192, "n_embd": 2048, "n_head": 16, "n_layer": 24, "vocab_size": 50257}, "trl-internal-testing/tiny-T5ForConditionalGeneration-correct-vocab": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 32128}, "akreal/tiny-random-LlamaForCausalLM": {"architectures": ["LlamaForCausalLM"], "hidden_size": 16, "intermediate_size": 64, "num_attention_heads": 4, "num_hidden_layers": 2, "vocab_size": 32000}, "akreal/tiny-random-BloomForCausalLM": {"architectures": ["BloomForCausalLM"], "hidden_size": 32, "n_head": 4, "n_layer": 5, "vocab_size": 1024}, "NousResearch/Nous-Hermes-llama-2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ai-forever/rugpt3small_based_on_gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50264}, "VMware/open-llama-7b-v2-open-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "robertmyers/targon-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "TheBloke/Nous-Hermes-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "TheBloke/WizardLM-33B-V1.0-Uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/WizardLM-7B-uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "ramsrigouthamg/t5_paraphraser": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "tinkoff-ai/ruDialoGPT-medium": {"n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50261}, "OpenAssistant/falcon-7b-sft-mix-2000": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65040}, "bigcode/tiny_starcoder_py": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 20, "vocab_size": 49152}, "rinna/japanese-gpt-1b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 44928}, "TheBloke/orca_mini_v3_70B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "UBC-NLP/turjuman": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 110080}, "h2oai/h2ogpt-4096-llama2-70b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Phind/Phind-CodeLlama-34B-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "elyza/ELYZA-japanese-Llama-2-7b-fast-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 45043}, "iarfmoose/t5-base-question-generator": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32102}, "TheBloke/Llama-2-7B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "mrm8488/t5-base-finetuned-emotion": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "hkunlp/instructor-base": {"architectures": ["T5EncoderModel"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "fxmarty/onnx-tiny-random-gpt2-without-merge": {"architectures": ["GPT2Model"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "fxmarty/onnx-tiny-random-gpt2-with-merge": {"architectures": ["GPT2Model"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "microsoft/GODEL-v1_1-large-seq2seq": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32102}, "rinna/japanese-gpt-neox-3.6b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2816, "intermediate_size": 11264, "num_attention_heads": 22, "num_hidden_layers": 36, "vocab_size": 32000}, "cyberagent/open-calm-3b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 52224}, "eachadea/vicuna-7b-1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "beomi/KoAlpaca-Polyglot-5.8B": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 16, "num_hidden_layers": 28, "vocab_size": 30080}, "grammarly/coedit-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "TheBloke/Platypus2-70B-Instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "codellama/CodeLlama-34b-Python-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "noamwies/llama-test-gqa-with-better-transformer": {"architectures": ["LlamaForCausalLM"], "hidden_size": 128, "intermediate_size": 344, "num_attention_heads": 8, "num_hidden_layers": 2, "vocab_size": 2000}, "bigscience/bloomz-7b1-mt": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 30, "num_attention_heads": 32, "vocab_size": 250880}, "Salesforce/codet5p-770m": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "OpenAssistant/pythia-12b-sft-v8-7k-steps": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50288}, "augtoma/qCammel-70-x": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "NousResearch/Llama-2-13b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "plguillou/t5-base-fr-sum-cnndm": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32102}, "WeOpenML/PandaLM-7B-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "VMware/open-llama-7b-open-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "pankajmathur/orca_mini_v3_7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "google/t5-xl-lm-adapt": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "LinkSoul/Chinese-Llama-2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "line-corporation/japanese-large-lm-3.6b-instruction-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 3072, "intermediate_size": 12288, "num_attention_heads": 32, "num_hidden_layers": 30, "vocab_size": 51200}, "OpenAssistant/oasst-sft-1-pythia-12b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50288}, "ehartford/WizardLM-7B-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "upstage/llama-30b-instruct-2048": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "cyberagent/open-calm-large": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 1536, "intermediate_size": 6144, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 52096}, "Gryphe/MythoLogic-L2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "eenzeenee/t5-small-korean-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 50358}, "google/t5-xxl-lm-adapt": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "mywateriswet/ShuanBot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "hf-internal-testing/tiny-random-bloom": {"architectures": ["BloomModel"], "hidden_size": 64, "n_head": 8, "n_inner": null, "n_layer": 2, "vocab_size": 250880}, "TheBloke/Llama-2-13B-chat-GGML": {}, "decapoda-research/llama-30b-hf": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "lmsys/longchat-7b-v1.5-32k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ziqingyang/chinese-alpaca-2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 55296}, "nlpai-lab/kullm-polyglot-5.8b-v2": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 16, "num_hidden_layers": 28, "vocab_size": 30080}, "google/byt5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3968, "d_model": 1536, "num_heads": 12, "num_layers": 18, "vocab_size": 384}, "stabilityai/stablelm-tuned-alpha-7b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 48, "num_hidden_layers": 16, "vocab_size": 50432}, "PygmalionAI/pygmalion-1.3b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "stanford-crfm/BioMedLM": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2560, "n_head": 20, "n_inner": null, "n_layer": 32, "vocab_size": 28896}, "PY007/TinyLlama-1.1B-step-50K-105b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 2048, "intermediate_size": 5632, "num_attention_heads": 32, "num_hidden_layers": 22, "vocab_size": 32000}, "georgesung/llama2_7b_chat_uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "bigscience/mt0-small": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "TheBloke/WizardCoder-15B-1.0-GPTQ": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49153}, "google/t5-base-lm-adapt": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "OpenAssistant/falcon-40b-sft-top1-560": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65040}, "TheBloke/WizardLM-30B-Uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "TheBloke/WizardCoder-Python-34B-V1.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32001}, "garage-bAInd/Camel-Platypus2-70B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "DeepFloyd/t5-v1_1-xxl": {"architectures": ["T5EncoderModel"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "EleutherAI/pythia-1b-deduped": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 50304}, "TheBloke/CodeLlama-7B-Instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "kfkas/Llama-2-ko-7b-Chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 46336}, "valhalla/t5-small-qa-qg-hl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32102}, "FlagAlpha/Llama2-Chinese-13b-Chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Open-Orca/OpenOrcaxOpenChat-Preview2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "trl-internal-testing/tiny-random-LlamaForCausalLM": {"architectures": ["LlamaForCausalLM"], "hidden_size": 16, "intermediate_size": 64, "num_attention_heads": 4, "num_hidden_layers": 2, "vocab_size": 32000}, "abhishek/llama-2-7b-hf-small-shards": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "togethercomputer/RedPajama-INCITE-7B-Base": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "Salesforce/codegen25-7b-multi": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 51200}, "fabiochiu/t5-base-tag-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "MBZUAI/LaMini-Flan-T5-248M": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "bigscience/bloomz-1b7": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 24, "num_attention_heads": 16, "vocab_size": 250880}, "valhalla/t5-base-qg-hl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "Wi/gptp": {"architectures": ["GPTPModel"], "n_embd": 128, "n_head": 4, "n_inner": null, "n_layer": 4, "vocab_size": 1000}, "medalpaca/medalpaca-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "yentinglin/Taiwan-LLaMa-v1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "rinna/japanese-gpt-neox-small": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 44416}, "TheBloke/llama2_7b_chat_uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "EleutherAI/pythia-1.4b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "daryl149/llama-2-13b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "flax-community/gpt-2-spanish": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "KoboldAI/LLAMA2-13B-Holodeck-1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-13b-gpt4-1.4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "mrm8488/t5-base-finetuned-question-generation-ap": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "OpenBuddy/openbuddy-llama2-13b-v8.1-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 37632}, "EleutherAI/pythia-6.9b-deduped": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "tscholak/3vnuv1vf": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32102}, "OpenAssistant/llama2-70b-oasst-sft-v10": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32007}, "TheBloke/vicuna-13B-v1.5-16K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "OpenAssistant/falcon-7b-sft-top1-696": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65040}, "sentence-transformers/sentence-t5-large": {"architectures": ["T5EncoderModel"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TheBloke/Nous-Hermes-Llama2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "mesolitica/finetune-translation-t5-super-super-tiny-standard-bahasa-cased": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 512, "d_model": 128, "num_heads": 6, "num_layers": 2, "vocab_size": 32100}, "Henk717/spring-dragon": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "openchat/openchat_v3.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "WizardLM/WizardMath-70B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32002}, "potsawee/t5-large-generation-squad-QuestionAnswer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TheBloke/Phind-CodeLlama-34B-v2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "pankajmathur/orca_mini_3b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "fffrrt/ruGPT-3.5-13B-GPTQ": {"architectures": ["GPT2LMHeadModel"], "n_embd": 5120, "n_head": 40, "n_inner": null, "n_layer": 40, "vocab_size": 50272}, "kykim/gpt3-kor-small_based_on_gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 42000}, "PAIXAI/Astrid-1B-CPU": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 50304}, "ElnaggarLab/ankh-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3840, "d_model": 1536, "num_heads": 16, "num_layers": 48, "vocab_size": 144}, "togethercomputer/RedPajama-INCITE-7B-Chat": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "ramsrigouthamg/t5_squad_v1": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "KETI-AIR/ke-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 64128}, "sentence-transformers/gtr-t5-base": {"architectures": ["T5EncoderModel"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "ramsrigouthamg/t5-large-paraphraser-diverse-high-quality": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "rinna/japanese-gpt2-small": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 12, "vocab_size": 32000}, "rinna/bilingual-gpt-neox-4b-instruction-ppo": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2816, "intermediate_size": 11264, "num_attention_heads": 22, "num_hidden_layers": 36, "vocab_size": 65536}, "ramsrigouthamg/t5_boolean_questions": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "philschmid/flan-t5-base-samsum": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "google/t5-small-lm-adapt": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "matsuo-lab/weblab-10b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4864, "intermediate_size": 19456, "num_attention_heads": 38, "num_hidden_layers": 36, "vocab_size": 50304}, "stabilityai/stablecode-completion-alpha-3b-4k": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 49152}, "IDEA-CCNL/Ziya-LLaMA-7B-Reward": {"architectures": ["LlamaRewardModel"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "ichitaka/falcon-40b-instruct-8bit": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "TheBloke/WizardCoder-Python-13B-V1.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "togethercomputer/Pythia-Chat-Base-7B": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "TheBloke/wizardLM-7B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "AUTOMATIC/promptgen-majinai-unsafe": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 6, "vocab_size": 50257}, "pinkmanlove/llama-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "lmsys/longchat-13b-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "togethercomputer/RedPajama-INCITE-7B-Instruct": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "lmsys/vicuna-13b-v1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Salesforce/codet5-large": {"architectures": ["T5WithLMHeadModel"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "FredZhang7/anime-anything-promptgen-v2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 6, "vocab_size": 50257}, "Salesforce/xgen-7b-8k-inst": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 51200}, "jojo0217/step3_mk7": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 30008}, "EleutherAI/pythia-14m": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 128, "intermediate_size": 512, "num_attention_heads": 4, "num_hidden_layers": 6, "vocab_size": 50304}, "cerebras/Cerebras-GPT-590M": {"n_inner": 6144, "n_embd": 1536, "n_head": 12, "n_layer": 18, "vocab_size": 50257}, "dbmdz/german-gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50265}, "KoboldAI/GPT-NeoX-20B-Skein": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 64, "num_hidden_layers": 44, "vocab_size": 50432}, "beomi/polyglot-ko-12.8b-safetensors": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 30080}, "sentence-transformers/sentence-t5-base": {"architectures": ["T5EncoderModel"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "decapoda-research/llama-65b-hf": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "mesolitica/finetune-translation-t5-small-standard-bahasa-cased": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "marcsun13/bloom-1b7_with_lm_head": {"architectures": ["BloomForCausalLM"], "hidden_size": 2048, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "MBZUAI/LaMini-Flan-T5-783M": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "medalpaca/medalpaca-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "JulesBelveze/t5-small-headline-generator": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "Michau/t5-base-en-generate-headline": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "TheBloke/Falcon-180B-Chat-GPTQ": {"architectures": ["FalconForCausalLM"], "hidden_size": 14848, "num_attention_heads": 232, "num_hidden_layers": 80, "vocab_size": 65024}, "Salesforce/xgen-7b-8k-base": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 51200}, "ai-forever/ruT5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "KRAFTON/KORani-v3-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "bigscience/mt0-xxl-mt": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 250112}, "garage-bAInd/Stable-Platypus2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Wizard-Vicuna-13B-Uncensored-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "h2oai/h2ogpt-oasst1-512-12b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50688}, "Parth/result": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "declare-lab/flan-alpaca-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "sdadas/mt5-base-translator-en-pl": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250100}, "ziqingyang/chinese-llama-2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 55296}, "NousResearch/Nous-Hermes-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "pragmatic-programs/listener-suffix-idx-300k": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3584, "d_model": 1472, "num_heads": 6, "num_layers": 12, "vocab_size": 384}, "jinaai/jina-embedding-l-en-v1": {"architectures": ["T5EncoderModel"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "stabilityai/stablelm-base-alpha-3b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 16, "vocab_size": 50688}, "razent/SciFive-base-Pubmed_PMC": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "uer/gpt2-chinese-poem": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 22557}, "openchat/openchat_v3.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "IDEA-CCNL/Ziya-LLaMA-13B-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 39424}, "Sao10K/Mythical-Destroyer-V2-L2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "juierror/text-to-sql-with-table-schema": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "MingZhong/unieval-fact": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "TheBloke/vicuna-13B-v1.5-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "cerebras/Cerebras-GPT-256M": {"n_inner": 4352, "n_embd": 1088, "n_head": 17, "n_layer": 14, "vocab_size": 50257}, "declare-lab/flan-alpaca-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "ehartford/WizardLM-1.0-Uncensored-Llama2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "aubmindlab/aragpt2-base": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 64000}, "valhalla/t5-small-e2e-qg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32102}, "elinas/llama-7b-hf-transformers-4.29": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "lmsys/vicuna-13b-delta-v1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "garage-bAInd/Platypus2-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "PKU-Alignment/beaver-7b-v1.0-cost": {"architectures": ["LlamaModelForScore"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "allenai/unifiedqa-t5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "JackFram/llama-160m": {"architectures": ["LlamaForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 32000}, "daryl149/llama-2-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "akreal/tiny-random-t5": {"d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 99}, "cyberagent/open-calm-medium": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 1024, "intermediate_size": 4096, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 52096}, "The-Face-Of-Goonery/Huginn-13b-FP16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "facebook/tart-full-flan-t5-xl": {"architectures": ["EncT5ForSequenceClassification"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "csebuetnlp/banglat5_banglaparaphrase": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "FlagAlpha/Llama2-Chinese-7b-Chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "jerryjalapeno/Llama-2-1b-0-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 32000}, "NousResearch/Redmond-Puffin-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "bigscience/bloomz": {"architectures": ["BloomForCausalLM"], "n_layer": 70, "num_attention_heads": 112, "vocab_size": 250880}, "allenai/unifiedqa-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "WizardLM/WizardMath-7B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "pragmatic-programs/speaker-prefix-idx-300k": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3584, "d_model": 1472, "num_heads": 6, "num_layers": 12, "vocab_size": 384}, "TheBloke/CodeLlama-13B-Instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "TheBloke/Upstage-Llama-2-70B-instruct-v2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "pinkmanlove/llama-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "VietAI/envit5-translation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50048}, "cerebras/Cerebras-GPT-2.7B": {"n_inner": 10240, "n_embd": 2560, "n_head": 32, "n_layer": 32, "vocab_size": 50257}, "Open-Orca/LlongOrca-7B-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32003}, "hf-internal-testing/tiny-random-T5ForConditionalGeneration": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 32100}, "juierror/flan-t5-text2sql-with-schema-v2": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "BeIR/query-gen-msmarco-t5-base-v1": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "conceptofmind/LLongMA-2-13b-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "NousResearch/Yarn-Llama-2-13b-128k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "wangrongsheng/MiniGPT-4-LLaMA": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "hf-internal-testing/tiny-random-GPT2ForSequenceClassification": {"architectures": ["GPT2ForSequenceClassification"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "zenham/wail_m_e4_16h_2k": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "h2oai/h2ogpt-4096-llama2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ai-forever/FRED-T5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 50364}, "FreedomIntelligence/phoenix-inst-chat-7b": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "castorini/monot5-base-msmarco": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "minlik/chinese-alpaca-plus-7b-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 49954}, "joaogante/tiny-random-gpt2-with-generation-config": {"intermediate_size": 37, "n_embd": 32, "n_head": 4, "n_inner": null, "n_layer": 5, "vocab_size": 1000}, "neulab/gpt2-finetuned-wikitext103": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "jarradh/llama2_70b_chat_uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TigerResearch/tigerbot-13b-base": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 60928}, "rinna/japanese-gpt-neox-3.6b-instruction-sft-v2": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2816, "intermediate_size": 11264, "num_attention_heads": 22, "num_hidden_layers": 36, "vocab_size": 32000}, "bofenghuang/vigogne-2-7b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/stable-vicuna-13B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "aiplanet/effi-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-33b-gpt4-m2.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/orca_mini_v3_13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "HuggingFaceH4/starchat-alpha": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49156}, "WizardLM/WizardMath-13B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "upstage/Llama-2-70b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "anushehchaudry/llama-2-tiny-random": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8, "intermediate_size": 32, "num_attention_heads": 2, "num_hidden_layers": 1, "vocab_size": 32000}, "fangloveskari/ORCA_LLaMA_70B_QLoRA": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "HyperbeeAI/Tulpar-7b-v0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Llama-2-70B-Chat-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "csebuetnlp/mT5_m2m_crossSum_enhanced": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "TheBloke/Genz-70b-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "cerebras/Cerebras-GPT-6.7B": {"n_embd": 4096, "vocab_size": 50257, "n_layer": 32, "n_head": 32, "n_inner": 16384}, "ziqingyang/chinese-alpaca-2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 55296}, "google/t5-small-ssm-nq": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "EleutherAI/polyglot-ko-3.8b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 3072, "intermediate_size": 12288, "num_attention_heads": 24, "num_hidden_layers": 32, "vocab_size": 30080}, "kashif/stack-llama-2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "line-corporation/japanese-large-lm-1.7b": {"vocab_size": 51200, "n_embd": 2304, "n_layer": 24, "n_head": 24, "n_inner": 9216, "architectures": ["GPT2LMHeadModel"]}, "microsoft/codereviewer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32216}, "TheBloke/guanaco-7B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "circulus/Llama-2-7b-orca-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "FlagAlpha/Atom-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 65000}, "Tap-M/Luna-AI-Llama2-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "K024/mt5-zh-ja-en-trimmed": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 85292}, "deep-learning-analytics/automatic-title-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "luodian/llama-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "stabilityai/stablelm-base-alpha-7b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 48, "num_hidden_layers": 16, "vocab_size": 50432}, "OpenLemur/lemur-70b-chat-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32005}, "rahular/varta-t5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 128128}, "rinna/japanese-gpt-neox-3.6b-instruction-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2816, "intermediate_size": 11264, "num_attention_heads": 22, "num_hidden_layers": 36, "vocab_size": 32000}, "garage-bAInd/Platypus-30B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "WizardLM/WizardCoder-Python-7B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "chavinlo/gpt4-x-alpaca": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "sentence-transformers/gtr-t5-xl": {"architectures": ["T5EncoderModel"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "wangrongsheng/MiniGPT-4-LLaMA-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "EleutherAI/pythia-12b-deduped": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50688}, "unicamp-dl/translation-pt-en-t5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "bigscience/mt0-base": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "Pirr/pythia-13b-deduped-green_devil": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50277}, "trl-internal-testing/tiny-random-GPT2Model": {"architectures": ["GPT2Model"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "MBZUAI/LaMini-GPT-1.5B": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_inner": null, "n_layer": 48, "vocab_size": 50258}, "Universal-NER/UniNER-7B-all": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/koala-13B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Rostlab/prot_t5_xl_bfd": {"architectures": ["T5WithLMHeadModel"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 128}, "Voicelab/trurl-2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "explosion-testing/llama2-kv-sharing": {"architectures": ["LlamaForCausalLM"], "hidden_size": 256, "intermediate_size": 512, "num_attention_heads": 4, "num_hidden_layers": 5, "vocab_size": 1024}, "inpars/monot5-3b-inpars-v2-nq-promptagator": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "upstage/llama-65b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "microsoft/CodeGPT-small-py": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50001}, "VietAI/vit5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 36096}, "TheBloke/CodeUp-Llama-2-13B-Chat-HF-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/CodeLlama-34B-Instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "elyza/ELYZA-japanese-Llama-2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "jondurbin/airoboros-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/CodeLlama-7B-Python-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "FlagAlpha/Llama2-Chinese-13b-Chat-4bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/CodeLlama-13B-Python-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Enoch/llama-65b-hf": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "chargoddard/platypus-2-22b-relora": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "togethercomputer/GPT-NeoXT-Chat-Base-20B": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 64, "num_hidden_layers": 44, "vocab_size": 50432}, "porkorbeef/Llama-2-13b-sf": {"architectures": ["LlamaModel"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "ehartford/Wizard-Vicuna-13B-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "doas/test5": {"architectures": ["LlamaModel"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "klosax/open_llama_3b_350bt_preview": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "Writer/camel-5b-hf": {"architectures": ["GPT2LMHeadModel"], "n_embd": 4096, "n_head": 32, "n_inner": 16384, "n_layer": 24, "vocab_size": 50258}, "Filosofas/DialoGPT-medium-PALPATINE2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "nomic-ai/gpt4all-falcon": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "reciprocate/llama2-7b-gsm8k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/CodeLlama-13B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "pankajmathur/orca_mini_v3_13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "projecte-aina/aguila-7b": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 50257}, "TheBloke/WizardLM-13B-V1.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "MBZUAI/LaMini-GPT-124M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50258}, "google/mt5-xxl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 250112}, "MaRiOrOsSi/t5-base-finetuned-question-answering": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "satvikag/chatbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "LMFlow/Robin-7b-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "daryl149/llama-2-70b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "acrastt/Puma-3B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/orca_mini_v3_7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "taeminlee/kogpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50000}, "NousResearch/Llama-2-70b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "rinna/japanese-gpt2-xsmall": {"architectures": ["GPT2LMHeadModel"], "n_embd": 512, "n_head": 8, "n_inner": 2304, "n_layer": 6, "vocab_size": 32000}, "ziqingyang/chinese-llama-2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 55296}, "hf-internal-testing/tiny-random-t5-v1.1": {"d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 1103}, "pankajmathur/Lima_Unchained_70b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "chargoddard/llama2-22b-blocktriangular": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/CodeLlama-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "BeIR/query-gen-msmarco-t5-large-v1": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TheBloke/CodeLlama-13B-Instruct-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "acrastt/Marx-3B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "PygmalionAI/pygmalion-2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "shibing624/chinese-alpaca-plus-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 49954}, "TheBloke/OpenOrcaxOpenChat-Preview2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "syzymon/long_llama_3b_instruct": {"architectures": ["LongLlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "bofenghuang/vigogne-2-7b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Gustavosta/MagicPrompt-Dalle": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "muchad/idt5-qa-qg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 30002}, "TheBloke/vicuna-13b-v1.3.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TigerResearch/tigerbot-13b-base-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 60928}, "ehartford/WizardLM-13B-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "clibrain/Llama-2-7b-ft-instruct-es": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "google/t5_xxl_true_nli_mixture": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "unikei/t5-base-split-and-rephrase": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "microsoft/Promptist": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50257}, "stas/mt5-tiny-random": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 256, "d_model": 64, "num_heads": 4, "num_layers": 8, "vocab_size": 5100}, "AIDC-ai-business/Luban-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "microsoft/GODEL-v1_1-base-seq2seq": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32102}, "CalderaAI/30B-Lazarus": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "acrastt/Marx-3B-V2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "h2oai/h2ogpt-4096-llama2-70b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "ajibawa-2023/scarlett-33b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "jondurbin/airoboros-l2-70b-2.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "rubentito/vt5-base-spdocvqa": {"architectures": ["HF_VT5"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "aisquared/dlite-v2-774m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50260}, "elyza/ELYZA-japanese-Llama-2-7b-fast": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 45043}, "quantumaikr/llama-2-70b-fb16-korean": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TheBloke/CodeLlama-34B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "microsoft/DialogRPT-updown": {"architectures": ["GPT2ForSequenceClassification"], "n_embd": 1024, "n_head": 16, "n_layer": 24, "vocab_size": 50257}, "TheBloke/CodeLlama-34B-Instruct-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "garage-bAInd/Platypus2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "trl-internal-testing/tiny-BloomForCausalLM-correct-vocab": {"architectures": ["BloomForCausalLM"], "hidden_size": 32, "n_head": 4, "n_layer": 5, "vocab_size": 250880}, "TheBloke/Llama-2-7B-GGML": {}, "TheBloke/Wizard-Vicuna-7B-Uncensored-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "wenge-research/yayi-7b-llama2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32005}, "coffeeee/nsfw-story-generator2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "jondurbin/airoboros-33b-gpt4-2.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "totally-not-an-llm/EverythingLM-13b-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "datificate/gpt2-small-spanish": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "mrm8488/t5-base-finetuned-wikiSQL": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "bofenghuang/vigogne-2-13b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "OpenAssistant/stablelm-7b-sft-v7-epoch-3": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 48, "num_hidden_layers": 16, "vocab_size": 50288}, "bhenrym14/airoboros-33b-gpt4-1.4.1-lxctx-PI-16384-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "flozi00/codellama-34b-german-assistant-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "WizardLM/WizardCoder-1B-V1.0": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 49153}, "upstage/llama-30b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "ehartford/dolphin-llama2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Open-Orca/LlongOrca-13B-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32004}, "NousResearch/Nous-Hermes-Llama2-70b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32001}, "ml6team/mt5-small-german-query-generation": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "bigscience/mt0-xxl": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 250112}, "EleutherAI/pythia-2.8b-deduped-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50304}, "TheBloke/wizardLM-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "conceptofmind/LLongMA-2-7b-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "lmsys/vicuna-7b-delta-v1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "bofenghuang/vigogne-7b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "csebuetnlp/banglat5_nmt_en_bn": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "trl-internal-testing/tiny-random-T5Model": {"architectures": ["T5Model"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 1302}, "OpenBuddy/openbuddy-llama2-70b-v10.1-bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 37632}, "TheBloke/wizard-vicuna-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "JosephusCheung/Guanaco": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "openchat/opencoderplus": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49153}, "jacobmorrison/tk-instruct-large-lora-experiments": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "PygmalionAI/metharme-1.3b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "TheBloke/orca_mini_13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "EleutherAI/pythia-70m-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "project-baize/baize-v2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "line-corporation/japanese-large-lm-1.7b-instruction-sft": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2304, "n_head": 24, "n_inner": 9216, "n_layer": 24, "vocab_size": 51200}, "TheBloke/WizardLM-13B-V1-0-Uncensored-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/llama-2-70b-Guanaco-QLoRA-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "MBZUAI/LaMini-Flan-T5-77M": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "csebuetnlp/banglat5_nmt_bn_en": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Trelis/Llama-2-7b-chat-hf-function-calling-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ehartford/Wizard-Vicuna-7B-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "llSourcell/medllama2_7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "WizardLM/WizardLM-13B-V1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "Gryphe/MythoMix-L2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/StableBeluga2-70B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "VietAI/vit5-large-vietnews-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 36096}, "adasnew/t5-small-xsum": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "Intel/t5-small-xsum-int8-dynamic": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "daspartho/prompt-extend": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 52000}, "EleutherAI/pythia-160m-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50304}, "Ar4ikov/gpt2-650k-stable-diffusion-prompt-generator": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "ehartford/WizardLM-Uncensored-Falcon-7b": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65025}, "CobraMamba/mamba-gpt-3b-v3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/llama2_70b_chat_uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "ai-forever/FRED-T5-1.7B": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1536, "num_heads": 24, "num_layers": 24, "vocab_size": 50364}, "MBZUAI/LaMini-Cerebras-590M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 12, "n_inner": 6144, "n_layer": 18, "vocab_size": 50258}, "mrm8488/llama-2-coder-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "guardrail/llama-2-7b-guanaco-instruct-sharded": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "rinna/bilingual-gpt-neox-4b-8k": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2816, "intermediate_size": 11264, "num_attention_heads": 22, "num_hidden_layers": 36, "vocab_size": 65536}, "mrm8488/falcoder-7b": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "circulus/Llama-2-13b-orca-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "allenai/tk-instruct-3b-def": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32100}, "pierreguillou/gpt2-small-portuguese": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50257}, "junelee/wizard-vicuna-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "castorini/monot5-3b-msmarco-10k": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "TheBloke/Llama-2-70B-Chat-GGML": {}, "TheBloke/CodeLlama-7B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "yeontaek/llama-2-13B-ensemble-v5": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "ybelkada/flan-t5-xl-sharded-bf16": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "WizardLM/WizardCoder-3B-V1.0": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 2816, "n_head": 22, "n_inner": 11264, "n_layer": 36, "vocab_size": 49153}, "Langboat/mengzi-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "MBZUAI/LaMini-GPT-774M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50258}, "ToddGoldfarb/Cadet-Tiny": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "TigerResearch/tigerbot-7b-base": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 60928}, "UrukHan/t5-russian-spell": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "LinkSoul/Chinese-Llama-2-7b-4bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Vicuna-13B-CoT-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "EleutherAI/pythia-1.4b-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "MayaPH/GodziLLa2-70B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TheBloke/wizardLM-13B-1.0-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "Gryphe/MythoBoros-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "abacusai/Giraffe-v2-13b-32k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-l2-13b-gpt4-1.4.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "razent/SciFive-base-Pubmed": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "TehVenom/Pygmalion-13b-Merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "garage-bAInd/SuperPlatty-30B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "jondurbin/airoboros-l2-7b-gpt4-2.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Rostlab/ProstT5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 150}, "TheBloke/guanaco-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "JackFram/llama-68m": {"architectures": ["LlamaForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 2, "vocab_size": 32000}, "MBZUAI/LaMini-Cerebras-111M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 10, "vocab_size": 50258}, "ehartford/Wizard-Vicuna-30B-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "stockmark/gpt-neox-japanese-1.4b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50000}, "TheBloke/MythoMax-L2-13B-GGML": {}, "MBZUAI/LaMini-Cerebras-256M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1088, "n_head": 17, "n_inner": 4352, "n_layer": 14, "vocab_size": 50258}, "jondurbin/airoboros-l2-13b-gpt4-2.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "The-Face-Of-Goonery/Chronos-Beluga-v2-13bfp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "lmqg/t5-base-squad-qag": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "Voicelab/trurl-2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "ehartford/Samantha-1.11-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "clibrain/Llama-2-13b-ft-instruct-es": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "deepse/CodeUp-Llama-2-13b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "mrm8488/t5-base-finetuned-sarcasm-twitter": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "ToolBench/ToolLLaMA-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "marella/gpt-2-ggml": {}, "Henk717/airochronos-33B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "stanford-crfm/alias-gpt2-small-x21": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "WizardLM/WizardLM-30B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "timdettmers/guanaco-33b-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "xkianteb/alg_ppo_separate_lr_1e-6_n_epochs_10_v_epochs_10_kl_target_1.0_clip_range_0.2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TheBloke/wizard-mega-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "bigscience/mt0-xl": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 250112}, "luffycodes/nash-vicuna-13b-v1dot5-ep2-w-rag-w-simple": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "h2oai/h2ogpt-oig-oasst1-256-6_9b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "fabiochiu/t5-base-medium-title-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "OpenAssistant/falcon-40b-sft-mix-1226": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65040}, "Writer/palmyra-base": {"architectures": ["GPT2LMHeadModel"], "n_embd": 4096, "n_head": 32, "n_inner": 16384, "n_layer": 24, "vocab_size": 50257}, "TheBloke/llama-2-70b-Guanaco-QLoRA-GGML": {}, "Rostlab/prot_t5_base_mt_uniref50": {"architectures": ["T5WithLMHeadModel"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 256}, "Lajonbot/Llama-2-13b-hf-instruct-pl-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "WizardLM/WizardLM-7B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "pankajmathur/orca_mini_7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "yhyhy3/open_llama_7b_v2_med_instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "NousResearch/CodeLlama-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "OpenBuddy/openbuddy-llama2-13b-v11.1-bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 37632}, "hf-internal-testing/tiny-random-GPT2ForQuestionAnswering": {"architectures": ["GPT2ForQuestionAnswering"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "explosion-testing/llama2-fewer-kv-heads": {"architectures": ["LlamaForCausalLM"], "hidden_size": 256, "intermediate_size": 512, "num_attention_heads": 4, "num_hidden_layers": 5, "vocab_size": 1024}, "hetpandya/t5-base-tapaco": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "PygmalionAI/pygmalion-2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "mrm8488/t5-base-finetuned-imdb-sentiment": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "explosion-testing/falcon-test": {"architectures": ["FalconForCausalLM"], "hidden_size": 32, "num_attention_heads": 4, "num_hidden_layers": 5, "vocab_size": 1024}, "ehartford/WizardLM-33B-V1.0-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/StableBeluga-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/WizardLM-Uncensored-SuperCOT-StoryTelling-30B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "jinaai/jina-embedding-s-en-v1": {"architectures": ["T5EncoderModel"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "FelixChao/vicuna-33b-coder": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/llama-30b-supercot-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "quantumaikr/llama-2-70b-fb16-orca-chat-10k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TheBloke/airoboros-l2-13B-gpt4-1.4.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "EleutherAI/pythia-31m": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 256, "intermediate_size": 1024, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "hf-internal-testing/tiny-random-GPT2ForTokenClassification": {"architectures": ["GPT2ForTokenClassification"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "jondurbin/airoboros-l2-70b-gpt4-1.4.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "kimsan0622/gpt2-medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 64007}, "TheBloke/EverythingLM-13B-16K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Linly-AI/Chinese-LLaMA-2-13B-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 40076}, "BlackSamorez/rudialogpt3_medium_based_on_gpt2_2ch": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "EleutherAI/pythia-2.8b-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50304}, "TheBloke/llama-2-7B-Guanaco-QLoRA-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "google/byt5-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 6720, "d_model": 2560, "num_heads": 32, "num_layers": 36, "vocab_size": 384}, "TheBloke/wizard-vicuna-13B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TehVenom/Pygmalion-Vicuna-1.1-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "openaccess-ai-collective/wizard-mega-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-l2-7b-gpt4-m2.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "openchat/openchat_v3.2_super": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "openaccess-ai-collective/manticore-13b-chat-pyg": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Neko-Institute-of-Science/pygmalion-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "unicamp-dl/ptt5-small-portuguese-vocab": {"architectures": ["T5WithLMHeadModel"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "hf-internal-testing/tiny-random-T5ForQuestionAnswering": {"architectures": ["T5ForQuestionAnswering"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 32100}, "microsoft/CodeGPT-small-java-adaptedGPT2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50260}, "unicamp-dl/ptt5-base-portuguese-vocab": {"architectures": ["T5WithLMHeadModel"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Fredithefish/ScarletPajama-3B-HF": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "hf-internal-testing/tiny-random-T5ForSequenceClassification": {"architectures": ["T5ForSequenceClassification"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 32100}, "TheBloke/Nous-Hermes-Llama-2-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "calvindoingstuff/DialoGPT-medium-luffy": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "lvkaokao/llama2-7b-hf-chat-lora-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "skt/ko-gpt-trinity-1.2B-v0.5": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1920, "n_head": 16, "n_inner": 7680, "n_layer": 24, "vocab_size": 51200}, "saibo/llama-1B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 2, "vocab_size": 32000}, "vonjack/Qwen-LLaMAfied-HFTok-7B-Chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 151936}, "TheBloke/CodeLlama-34B-Python-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "GAIR/rst-all-11b": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "GeorgiaTechResearchInstitute/starcoder-gpteacher-code-instruct": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49153}, "jondurbin/airoboros-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "aisquared/dlite-v2-1_5b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_inner": null, "n_layer": 48, "vocab_size": 50260}, "aiassociates/t5-small-grammar-correction-german": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "asi/gpt-fr-cased-small": {"n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50000}, "csebuetnlp/mT5_m2o_chinese_simplified_crossSum": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "openthaigpt/openthaigpt-1.0.0-alpha-7b-chat-ckpt-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "jondurbin/airoboros-l2-13b-2.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "sentence-transformers/sentence-t5-xl": {"architectures": ["T5EncoderModel"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "OpenBuddy/openbuddy-openllama-3b-v10-bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 37120}, "TheBloke/guanaco-33B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "h2oai/h2ogpt-oasst1-512-20b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 64, "num_hidden_layers": 44, "vocab_size": 50432}, "Open-Orca/OpenOrca-Preview1-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "WizardLM/WizardLM-13B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "garage-bAInd/Camel-Platypus2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "wxjiao/alpaca-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "TheBloke/Wizard-Vicuna-13B-Uncensored-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "FelixChao/vicuna-7B-chemical": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Arc53/docsgpt-14b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "OpenAssistant/llama2-13b-megacode2-oasst": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32007}, "TheBloke/Lemur-70B-Chat-v1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32005}, "EleutherAI/pythia-6.9b-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "grimpep/L2-MythoMax22b-instruct-Falseblock": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "Austism/chronos-hermes-13b-v2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "UBC-NLP/AraT5v2-base-1024": {"d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 110208}, "fireballoon/baichuan-vicuna-chinese-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 64000}, "abeja/gpt2-large-japanese": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 32000}, "TheBloke/Airoboros-L2-70B-2.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "vicgalle/gpt2-alpaca-gpt4": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50260}, "flax-community/gpt2-small-indonesian": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TheBloke/Nous-Hermes-13B-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "imone/LLaMA2_13B_with_EOT_token": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "Corianas/111m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 10, "vocab_size": 50257}, "The-Face-Of-Goonery/Huginn-v3-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ehartford/Samantha-1.11-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "heegyu/WizardVicuna-3B-0719": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "acrastt/Griffin-3B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "totally-not-an-llm/EverythingLM-13b-V2-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ikala/bloom-zh-3b-chat": {"architectures": ["BloomForCausalLM"], "hidden_size": 2560, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250688}, "Gryphe/MythoLogic-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "AlekseyKorshuk/vicuna-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "flax-community/gpt2-medium-persian": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50000}, "ehartford/samantha-1.1-llama-33b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "garage-bAInd/Platypus2-70B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "OpenLemur/lemur-70b-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32024}, "ausboss/llama-30b-supercot": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "jondurbin/airoboros-l2-70b-gpt4-m2.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "lmqg/mt5-small-koquad-qg-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "TheBloke/OpenAssistant-SFT-7-Llama-30B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32016}, "TheBloke/h2ogpt-gm-oasst1-en-2048-falcon-40b-v2-GPTQ": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "GOAT-AI/GOAT-7B-Community": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "h2oai/h2ogpt-gm-oasst1-en-1024-20b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 64, "num_hidden_layers": 44, "vocab_size": 50432}, "beaugogh/pythia-1.4b-deduped-sharegpt": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50280}, "amurshak/llama-2-7b-miniguanaco": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "psyche/kollama2-7b-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "IlyaGusev/fred_t5_ru_turbo_alpaca": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1536, "num_heads": 24, "num_layers": 24, "vocab_size": 50365}, "potsawee/t5-large-generation-race-Distractor": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "heegyu/WizardVicuna-Uncensored-3B-0719": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/openchat_v2_openorca_preview-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "CalderaAI/13B-Legerdemain-L2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "SebastianSchramm/Cerebras-GPT-111M-instruction": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 10, "vocab_size": 50258}, "Mikael110/llama-2-7b-guanaco-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Locutusque/gpt2-large-conversational": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50260}, "CalderaAI/13B-Ouroboros": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "chaoyi-wu/MedLLaMA_13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "YeungNLP/firefly-llama2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "garage-bAInd/GPlatty-30B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "pankajmathur/orca_mini_v2_13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "pankajmathur/model_007_13b_v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "chargoddard/Chronorctypus-Limarobormes-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "timdettmers/guanaco-65b-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "digitous/13B-HyperMantis": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ckiplab/gpt2-base-chinese": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 21128}, "ehartford/dolphin-llama-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jphme/orca_mini_v2_ger_7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "malhajar/Platypus2-70B-instruct-4bit-gptq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "heegyu/WizardVicuna-open-llama-3b-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "pankajmathur/model_007": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "vicgalle/gpt2-alpaca": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50260}, "h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-300bt-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "stabilityai/stablecode-completion-alpha-3b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 49152}, "aisquared/dlite-v2-355m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50260}, "google/byt5-xxl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 12352, "d_model": 4672, "num_heads": 64, "num_layers": 36, "vocab_size": 384}, "ehartford/Samantha-1.11-CodeLlama-34b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "h2oai/h2ogpt-gm-oasst1-multilang-1024-20b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 64, "num_hidden_layers": 44, "vocab_size": 50432}, "TheBloke/koala-7B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ehartford/WizardLM-30B-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "clibrain/Llama-2-ft-instruct-es": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "quantumaikr/llama-2-70b-fb16-guanaco-1k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "psyche/kogpt": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 32002}, "wenge-research/yayi-7b": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250684}, "Aspik101/WizardVicuna-Uncensored-3B-instruct-PL-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "rombodawg/LosslessMegaCoder-llama2-7b-mini": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32007}, "TurkuNLP/gpt3-finnish-medium": {"architectures": ["BloomModel"], "hidden_size": 1024, "n_head": 16, "n_layer": 24, "vocab_size": 131072}, "pankajmathur/orca_mini_13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Mikael110/llama-2-13b-guanaco-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "totally-not-an-llm/PuddleJumper-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "jondurbin/airoboros-13b-gpt4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "CobraMamba/mamba-gpt-3b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "zarakiquemparte/zarablend-l2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Locutusque/gpt2-conversational-or-qa": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50262}, "frank098/Wizard-Vicuna-13B-juniper": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-gpt-3.5-turbo-100k-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "jondurbin/airoboros-33b-gpt4-1.4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "jondurbin/airoboros-l2-70b-gpt4-2.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "MBZUAI/LaMini-Cerebras-1.3B": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 50258}, "h2oai/h2ogpt-research-oasst1-llama-65b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "nkpz/llama2-22b-daydreamer-v3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "Aspik101/trurl-2-13b-pl-instruct_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "OpenAssistant/pythia-12b-pre-v8-12.5k-steps": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50288}, "breadlicker45/dough-instruct-base-001": {"architectures": ["LlamaForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50402}, "OpenBuddy/openbuddy-llama-30b-v7.1-bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 37632}, "andreaskoepf/llama2-13b-megacode2_min100": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32007}, "ehartford/Samantha-1.11-70b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "flax-community/t5-recipe-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "BreadAi/PM_modelV2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1088, "n_head": 17, "n_inner": 4352, "n_layer": 14, "vocab_size": 50257}, "minlik/chinese-alpaca-33b-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 49954}, "jordiclive/Llama-2-70b-oasst-1-200": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32016}, "Lajonbot/tableBeluga-7B-instruct-pl-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "sia-ai/llama-2-7b-1-percent-open-orca-1000-steps-v0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "h2oai/h2ogpt-gm-oasst1-en-1024-12b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50688}, "jondurbin/airoboros-33b-gpt4-1.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "openchat/openchat_8192": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "TaylorAI/Flash-Llama-3B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "yeontaek/llama-2-13B-ensemble-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Kirili4ik/ruDialoGpt3-medium-finetuned-telegram": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "WangZeJun/bloom-820m-chat": {"architectures": ["BloomForCausalLM"], "hidden_size": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 46145}, "4bit/Llama-2-70b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "chargoddard/llama2-22b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "augtoma/qCammel-13": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "NlpHUST/gpt2-vietnamese": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Monero/Manticore-13b-Chat-Pyg-Guanaco": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "NousResearch/CodeLlama-34b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "aisquared/dlite-v2-124m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50260}, "pankajmathur/orca_mini_v2_7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "The-Face-Of-Goonery/Huginn-22b-Prototype": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "DevaMalla/llama7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "openaccess-ai-collective/manticore-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "nkpz/llama2-22b-chat-wizard-uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "davzoku/cria-llama2-7b-v1.3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TaylorAI/Flash-Llama-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Fredithefish/ReasonixPajama-3B-HF": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "TheBloke/Platypus-30B-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "LoupGarou/WizardCoder-Guanaco-15B-V1.1": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49153}, "TheBloke/guanaco-65B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "hakurei/lotus-12B": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50688}, "bofenghuang/vigogne-33b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "grimpep/llama2-22B-GPLATTY": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "concedo/Pythia-70M-ChatSalad": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50278}, "rombodawg/LosslessMegaCoder-llama2-13b-mini": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32007}, "TaylorAI/Flash-Llama-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/chronos-wizardlm-uc-scot-st-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "OpenBuddy/openbuddy-llama-65b-v8-bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 37632}, "ajibawa-2023/scarlett-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/medalpaca-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "elinas/chronos-33b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "OpenBuddy/openbuddy-atom-13b-v9-bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 49953}, "OpenAssistant/pythia-12b-sft-v8-rlhf-2k-steps": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50282}, "TheTravellingEngineer/llama2-7b-chat-hf-v4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Ejafa/vicuna_7B_vanilla_1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "yulan-team/YuLan-Chat-2-13b-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 51200}, "huashiyiqike/testmodel": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 10, "vocab_size": 50257}, "TheBloke/WizardLM-30B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "notstoic/PygmalionCoT-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Vicuna-33B-1-3-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "FelixChao/vicuna-7B-physics": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/tulu-30B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "jondurbin/airoboros-65b-gpt4-1.4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "uukuguy/speechless-llama2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "digitous/13B-Chimera": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-7b-gpt4-1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "grimpep/llama2-28B-Airo03": {"architectures": ["LlamaForCausalLM"], "hidden_size": 7296, "intermediate_size": 22016, "num_attention_heads": 57, "num_hidden_layers": 40, "vocab_size": 32000}, "ehartford/CodeLlama-34b-Instruct-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "YeungNLP/firefly-ziya-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 39424}, "TheTravellingEngineer/bloom-560m-RLHF-v2": {"architectures": ["BloomForCausalLM"], "hidden_size": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "TheTravellingEngineer/llama2-7b-chat-hf-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "uukuguy/speechless-hermes-coig-lite-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "BreadAi/gpt-Youtube": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50304}, "Aspik101/llama-30b-instruct-2048-PL-lora": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "beaugogh/Llama2-13b-sharegpt4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "gaodrew/gaodrew-gorgonzola-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "OpenBuddy/openbuddy-llama2-13b-v11-bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 37632}, "TheBloke/guanaco-13B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/WizardLM-13B-V1-1-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "NousResearch/CodeLlama-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "BreadAi/MusePy-1-2": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "jondurbin/airoboros-33b-gpt4-1.3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "YeungNLP/firefly-bloom-7b1": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "grimpep/llama2-22b-wizard_vicuna": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "Fredithefish/Guanaco-3B-Uncensored": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "digitous/Alpacino13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "mncai/SGPT-1.3B-insurance-epoch10": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 30080}, "TheTravellingEngineer/llama2-7b-chat-hf-dpo": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "yeontaek/Platypus2xOpenOrca-13B-LoRa": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "TheBloke/gpt4-alpaca-lora-30b-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "bhenrym14/airophin-13b-pntk-16k-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Kimiko-13B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "porkorbeef/Llama-2-13b-12_153950": {"architectures": ["LlamaModel"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "PSanni/Deer-3b": {"architectures": ["BloomForCausalLM"], "hidden_size": 2560, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250683}, "IGeniusDev/llama13B-quant8-testv1-openorca-customdataset": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Neko-Institute-of-Science/metharme-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "alibidaran/medical_transcription_generator": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Panchovix/airoboros-33b-gpt4-1.2-SuperHOT-8k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "digitous/Alpacino30b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "lgaalves/gpt2-dolly": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TaylorAI/FLAN-Llama-7B-2_Llama2-7B-Flash_868_full_model": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "zarakiquemparte/zarafusionex-1.1-l2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "OpenAssistant/pythia-12b-sft-v8-2.5k-steps": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50288}, "TheBloke/airoboros-13B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/robin-33B-v2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "Aspik101/trurl-2-7b-pl-instruct_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "llama-anon/petra-13b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "TFLai/gpt2-turkish-uncased": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "health360/Healix-3B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/Mythalion-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-300bt": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "pe-nlp/llama-2-13b-vicuna-wizard": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "yeontaek/Platypus2-13B-QLoRa": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "acrastt/OmegLLaMA-3B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "jslin09/bloom-560m-finetuned-fraud": {"architectures": ["BloomForCausalLM"], "hidden_size": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "YeungNLP/firefly-bloom-2b6-v2": {"architectures": ["BloomForCausalLM"], "hidden_size": 2560, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 46145}, "xzuyn/LLaMa-1-MedicWizard-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "Azure99/blossom-v2-3b": {"architectures": ["BloomForCausalLM"], "hidden_size": 2560, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "TheBloke/Airoboros-L2-13B-2.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "MetaIX/GPT4-X-Alpasta-30b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32016}, "PocketDoc/Dans-PersonalityEngine-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "vicgalle/alpaca-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Corianas/590m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 12, "n_inner": 6144, "n_layer": 18, "vocab_size": 50257}, "OpenBuddy/openbuddy-openllama-13b-v7-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 38656}, "gywy/llama2-13b-chinese-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 49376}, "Corianas/Quokka_590m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 12, "n_inner": 6144, "n_layer": 18, "vocab_size": 50260}, "aisquared/dlite-v1-355m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50260}, "aisquared/dlite-v1-774m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50260}, "Fredithefish/RedPajama-INCITE-Chat-3B-Instruction-Tuning-with-GPT-4": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "project-baize/baize-v2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Project-Baize-v2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "FabbriSimo01/GPT_Large_Quantized": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50257}, "ajibawa-2023/carl-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Azure99/blossom-v1-3b": {"architectures": ["BloomForCausalLM"], "hidden_size": 2560, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "Aspik101/30B-Lazarus-instruct-PL-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "yeontaek/Platypus2xOpenOrca-13B-IA3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "Enno-Ai/ennodata-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "FabbriSimo01/Cerebras_1.3b_Quantized": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 50257}, "migtissera/Synthia-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "pe-nlp/llama-2-13b-platypus-vicuna-wizard": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "yeontaek/Platypus2xOpenOrca-13B-IA3-ensemble": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "Corianas/1.3b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 50257}, "Rachneet/gpt2-xl-alpaca": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_inner": null, "n_layer": 48, "vocab_size": 50257}, "Aeala/VicUnlocked-alpaca-30b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/VicUnlocked-30B-LoRA-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "Monero/WizardLM-Uncensored-SuperCOT-StoryTelling-30b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "bavest/fin-llama-33b-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "openchat/openchat_v2_w": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "FabbriSimo01/Bloom_1b_Quantized": {"architectures": ["BloomForCausalLM"], "hidden_size": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "Aspik101/tulu-7b-instruct-pl-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "TheTravellingEngineer/llama2-7b-chat-hf-v3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "yeontaek/llama-2-70b-IA3-guanaco": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Kunhao/pile-7b-250b-tokens": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 49152}, "yeontaek/llama-2-13b-QLoRA": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "yeontaek/llama-2-13b-Beluga-QLoRA": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "yeontaek/Platypus2xOpenOrca-13B-IA3-v3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "ausboss/llama7b-wizardlm-unfiltered": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/h2ogpt-oasst1-512-30B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "bofenghuang/vigogne-7b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "NYTK/PULI-GPTrio": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 150016}, "LLMs/WizardLM-30B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "openaccess-ai-collective/minotaur-13b-fixed": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheTravellingEngineer/bloom-1b1-RLHF-v2": {"architectures": ["BloomForCausalLM"], "hidden_size": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "yeontaek/Platypus2xOpenOrca-13B-IA3-v4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "BreadAi/DiscordPy": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1088, "n_head": 17, "n_inner": 4352, "n_layer": 14, "vocab_size": 50257}, "TehVenom/oasst-sft-6-llama-33b-xor-MERGED-16bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32016}, "gaodrew/gaodrew-llama-30b-instruct-2048-Open-Platypus-100steps": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "yeontaek/Platypus2xOpenOrca-13B-IA3-v2.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "yeontaek/Platypus2xOpenOrca-13B-LoRa-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "dvruette/oasst-pythia-12b-6000-steps": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50288}, "LoupGarou/WizardCoder-Guanaco-15B-V1.0": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49153}, "KnutJaegersberg/gpt-2-xl-EvolInstruct": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_inner": null, "n_layer": 48, "vocab_size": 50257}, "Lajonbot/WizardLM-13B-V1.2-PL-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "yeontaek/Platypus2-13B-IA3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "zarakiquemparte/zaraxe-l2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "BreadAi/gpt-YA-1-1_70M": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "dvruette/oasst-pythia-12b-reference": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50288}, "Panchovix/WizardLM-33B-V1.0-Uncensored-SuperHOT-8k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "titan087/OpenLlama13B-Guanaco": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "w601sxs/b1ade-1b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 50304}, "Andron00e/YetAnother_Open-Llama-3B-LoRA": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "quantumaikr/QuantumLM": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-65b-gpt4-2.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Aspik101/llama-30b-2048-instruct-PL-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "yeontaek/Platypus2-13B-LoRa": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "zarakiquemparte/zarafusionix-l2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "huggingtweets/gladosystem": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "eachadea/legacy-vicuna-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "Aeala/GPT4-x-AlpacaDente2-30b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32016}, "shibing624/chinese-llama-plus-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 49953}, "euclaise/gpt-neox-122m-minipile-digits": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 48262}, "TheBloke/UltraLM-13B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "lvkaokao/llama2-7b-hf-instruction-lora": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "BreadAi/StoryPy": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "dvruette/oasst-pythia-12b-flash-attn-5000-steps": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50288}, "aisquared/dlite-v1-124m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "ewof/koishi-instruct-3b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "h2oai/h2ogpt-gm-oasst1-en-1024-open-llama-7b-preview-400bt": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/airoboros-7b-gpt4-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "jondurbin/airoboros-13b-gpt4-1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/tulu-13B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "yhyhy3/med-orca-instruct-33b": {"architectures": ["LlamaModel"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "heegyu/LIMA-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "abhishek/llama2guanacotest": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "heegyu/LIMA2-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Corianas/Quokka_256m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1088, "n_head": 17, "n_inner": 4352, "n_layer": 14, "vocab_size": 50260}, "golaxy/gogpt-560m": {"architectures": ["BloomForCausalLM"], "hidden_size": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "OptimalScale/robin-7b-v2-delta": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "bofenghuang/vigogne-13b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "klosax/pythia-160m-deduped-step92k-193bt": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50304}, "golaxy/gogpt2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 68420}, "YeungNLP/firefly-llama2-13b-v1.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "WhoTookMyAmogusNickname/NewHope_HF_not_official": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "ehartford/CodeLlama-34b-Python-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "KnutJaegersberg/megatron-GPT-2-345m-EvolInstruct": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": 4096, "n_layer": 24, "vocab_size": 50257}, "Aeala/Alpaca-elina-65b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Monero/WizardLM-30B-Uncensored-Guanaco-SuperCOT-30b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "csitfun/llama-7b-logicot": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "OptimalScale/robin-65b-v2-delta": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "LLMs/WizardLM-13B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "CobraMamba/mamba-gpt-3b-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "aisquared/dlite-v1-1_5b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_inner": null, "n_layer": 48, "vocab_size": 50257}, "nthngdy/pythia-owt2-70m-100k": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "LLMs/AlpacaGPT4-7B-elina": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Yhyu13/oasst-rlhf-2-llama-30b-7k-steps-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32006}, "jondurbin/airoboros-7b-gpt4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "grantprice/Cerebras-GPT-590M-finetuned-DND": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 12, "n_inner": 6144, "n_layer": 18, "vocab_size": 50257}, "TheBloke/robin-13B-v2-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/robin-65b-v2-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "FPHam/Free_Sydney_13b_HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "acrastt/RedPajama-INCITE-Chat-Instruct-3B-V1": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "jondurbin/airoboros-65b-gpt4-m2.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "heegyu/LIMA2-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "BreadAi/MuseCan": {"architectures": ["GPT2LMHeadModel"], "n_embd": 960, "n_head": 15, "n_inner": 9, "n_layer": 5, "vocab_size": 50304}, "ausboss/llama-13b-supercot": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "openaccess-ai-collective/manticore-30b-chat-pyg-alpha": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "OptimalScale/robin-13b-v2-delta": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "quantumaikr/llama-2-7b-hf-guanaco-1k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Fredithefish/RedPajama-INCITE-Chat-3B-ShareGPT-11K": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "CalderaAI/13B-BlueMethod": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "SaylorTwift/gpt2_test": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50257}, "WeOpenML/PandaLM-Alpaca-7B-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "WeOpenML/Alpaca-7B-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "sumo43/lora_moe_7b_baseline": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "wenge-research/yayi-13b-llama2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32005}, "golaxy/gowizardlm": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 68420}, "abhiramtirumala/DialoGPT-sarcastic-medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Corianas/Quokka_2.7b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2560, "n_head": 32, "n_inner": 10240, "n_layer": 32, "vocab_size": 50260}, "Corianas/256_5epoch": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1088, "n_head": 17, "n_inner": 4352, "n_layer": 14, "vocab_size": 50257}, "dvruette/llama-13b-pretrained": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "TheBloke/alpaca-lora-65B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "ashercn97/giraffe-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Aspik101/Vicuzard-30B-Uncensored-instruct-PL-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "TheBloke/dromedary-65b-lora-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Yhyu13/chimera-inst-chat-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "ehartford/based-30b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "concedo/Vicuzard-30B-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "64bits/LexPodLM-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "MayaPH/GodziLLa-30B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "Aspik101/vicuna-7b-v1.3-instruct-pl-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "julianweng/Llama-2-7b-chat-orcah": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "heegyu/RedTulu-Uncensored-3B-0719": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "Aspik101/Llama-2-7b-hf-instruct-pl-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "quantumaikr/QuantumLM-70B-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "BreadAi/gpt-YA-1-1_160M": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50304}, "dvruette/oasst-pythia-12b-pretrained-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50288}, "Aeala/GPT4-x-AlpacaDente-30b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32016}, "TehVenom/Pygmalion_AlpacaLora-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "LLMs/Stable-Vicuna-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "quantumaikr/open_llama_7b_hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Aeala/GPT4-x-Alpasta-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "Fredithefish/CrimsonPajama": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "openaccess-ai-collective/hippogriff-30b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "camel-ai/CAMEL-13B-Role-Playing-Data": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/landmark-attention-llama7b-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32002}, "TheBloke/robin-33B-v2-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/GPlatty-30B-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/Chinese-Alpaca-33B-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 49954}, "TheBloke/CAMEL-33B-Combined-Data-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "klosax/open_llama_13b_600bt_preview": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Aspik101/Nous-Hermes-13b-pl-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "jondurbin/airoboros-l2-7b-gpt4-1.4.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "YeungNLP/firefly-llama-30b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "ashercn97/manatee-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "lizhuang144/starcoder_mirror": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49152}, "Aspik101/vicuna-13b-v1.5-PL-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Aspik101/Redmond-Puffin-13B-instruct-PL-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "Aspik101/StableBeluga-13B-instruct-PL-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "RoversX/llama-2-7b-hf-small-shards-Samantha-V1-SFT": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Corianas/Quokka_1.3b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 50260}, "nthngdy/pythia-owt2-70m-50k": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "danielhanchen/open_llama_3b_600bt_preview": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/VicUnlocked-alpaca-65B-QLoRA-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "IDEA-CCNL/Ziya-LLaMA-13B-Pretrain-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 39424}, "kevinpro/Vicuna-13B-CoT": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "wahaha1987/llama_7b_sharegpt94k_fastchat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "openaccess-ai-collective/minotaur-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/tulu-7B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "golaxy/gogpt-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 68420}, "Aeala/Enterredaas-33b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "kingbri/chronolima-airo-grad-l2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheTravellingEngineer/bloom-560m-RLHF": {"architectures": ["BloomForCausalLM"], "hidden_size": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "HWERI/Llama2-7b-sharegpt4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "l3utterfly/llama2-7b-layla": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "yeontaek/llama-2-13b-Guanaco-QLoRA": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "duliadotio/dulia-13b-8k-alpha": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "yeontaek/llama-2-13B-ensemble-v3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "dvruette/oasst-gpt-neox-20b-3000-steps": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 64, "num_hidden_layers": 44, "vocab_size": 50288}, "dvruette/oasst-gpt-neox-20b-1000-steps": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 64, "num_hidden_layers": 44, "vocab_size": 50288}, "huggingtweets/jerma985": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Dampish/Dante-2.8B": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "TheBloke/Planner-7B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "klosax/pythia-70m-deduped-step44k-92bt": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "klosax/open_llama_7b_400bt_preview": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "golaxy/gogpt2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 68420}, "Lajonbot/Llama-2-7b-chat-hf-instruct-pl-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheTravellingEngineer/llama2-7b-chat-hf-guanaco": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Lajonbot/vicuna-7b-v1.5-PL-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "kingbri/airolima-chronos-grad-l2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "yeontaek/llama-2-70B-ensemble-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "dvruette/oasst-llama-13b-2-epochs": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "dvruette/llama-13b-pretrained-sft-epoch-1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "dvruette/llama-13b-pretrained-dropout": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "hakurei/instruct-12b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50277}, "dvruette/gpt-neox-20b-full-precision": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 6144, "intermediate_size": 24576, "num_attention_heads": 64, "num_hidden_layers": 44, "vocab_size": 50288}, "Monero/WizardLM-13b-OpenAssistant-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "Yhyu13/llama-30B-hf-openassitant": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "jondurbin/airoboros-65b-gpt4-1.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "camel-ai/CAMEL-33B-Combined-Data": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "MBZUAI/bactrian-x-llama-13b-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "dsvv-cair/alpaca-cleaned-llama-30b-bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "YeungNLP/firefly-llama-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "YeungNLP/firefly-llama-13b-v1.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "heegyu/WizardVicuna2-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "dvruette/oasst-llama-13b-1000-steps": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "dvruette/llama-13b-pretrained-sft-do2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "pillowtalks-ai/delta13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "illuin/test-custom-llama": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "MrNJK/gpt2-xl-sft": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_inner": null, "n_layer": 48, "vocab_size": 50257}, "PocketDoc/Dans-PileOfSets-Mk1-llama-13b-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-65b-gpt4-1.3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "bhenrym14/airoboros-33b-gpt4-1.4.1-PI-8192-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "frank098/WizardLM_13B_juniper": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "golaxy/goims": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 68420}, "dvruette/oasst-pythia-6.9b-4000-steps": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50288}, "mncai/chatdoctor": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "wannaphong/openthaigpt-0.1.0-beta-full-model_for_open_llm_leaderboard": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "golaxy/gogpt-3b-bloom": {"architectures": ["BloomForCausalLM"], "hidden_size": 2560, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "golaxy/gogpt-7b-bloom": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "jondurbin/airoboros-33b-gpt4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "jondurbin/airoboros-13b-gpt4-1.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-7b-gpt4-1.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "jondurbin/airoboros-13b-gpt4-1.3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-7b-gpt4-1.4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "jondurbin/airoboros-13b-gpt4-1.4-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-7b-gpt4-1.4.1-qlora": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "frank098/orca_mini_3b_juniper": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "Lajonbot/vicuna-13b-v1.3-PL-lora_unload": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jxhong/CAlign-alpaca-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "quantumaikr/KoreanLM-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "keyfan/vicuna-chinese-replication-v1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 49953}, "jondurbin/airoboros-7b-gpt4-1.3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "jerryjalapeno/nart-100k-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "xzuyn/Alpacino-SuperCOT-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "wahaha1987/llama_13b_sharegpt94k_fastchat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "quantumaikr/QuantumLM-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Undi95/ReMM-SLERP-L2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "huggingtweets/bladeecity-jerma985": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "pszemraj/pythia-6.9b-HC3": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "CalderaAI/30B-Epsilon": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TFLai/OpenOrca-Platypus2-13B-QLoRA-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "alpindale/pygmalion-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "jondurbin/airoboros-c34b-2.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "abacaj/starcoderbase-1b-sft": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 49153}, "bongchoi/test-llama2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TinyPixel/lima-test": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "quantumaikr/llama-2-70B-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "yeontaek/llama-2-13B-ensemble-v4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "cointegrated/rut5-base-absum": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 30000}, "pankajmathur/model_420_preview": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Fredithefish/Guanaco-3B-Uncensored-v2": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "yeontaek/llama-2-70B-ensemble-v4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Writer/palmyra-large": {"architectures": ["GPT2LMHeadModel"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 44, "vocab_size": 50257}, "RobbeD/OpenLlama-Platypus-3B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TFLai/OrcaMini-Platypus2-13B-QLoRA-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "NobodyExistsOnTheInternet/PuffedConvo13bLoraE4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Sao10K/Medusa-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Manticore-13B-Chat-Pyg-Guanaco-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TFLai/Nous-Hermes-Platypus2-13B-QLoRA-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "dhmeltzer/llama-7b-SFT_eli5_wiki65k_1024_r_64_alpha_16_merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TFLai/MythoMix-Platypus2-13B-QLoRA-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "chargoddard/llama-2-34b-uncode": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "zarakiquemparte/zaraxls-l2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TFLai/Stable-Platypus2-13B-QLoRA-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Danielbrdz/Barcenas-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "alan-turing-institute/mt5-large-finetuned-mnli-xtreme-xnli": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 250112}, "TFLai/Limarp-Platypus2-13B-QLoRA-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TFLai/PuddleJumper-Platypus2-13B-QLoRA-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "The-Face-Of-Goonery/Huginn-13b-v4.5": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-l2-7b-2.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "allenai/unifiedqa-v2-t5-large-1363200": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TheBloke/OpenAssistant-Llama2-13B-Orca-8K-3319-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "synapsoft/Llama-2-7b-hf-flan2022-1.2M": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "yeontaek/Platypus2-13B-LoRa-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "KES/T5-KES": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "robowaifudev/megatron-gpt2-345m": {"vocab_size": 50257, "n_embd": 1024, "n_layer": 24, "n_head": 16, "n_inner": 4096, "architectures": ["GPT2LMHeadModel"]}, "Sao10K/Mythical-Destroyer-L2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "CHIH-HUNG/llama-2-13b-dolphin_20w": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "The-Face-Of-Goonery/Huginn-13b-V4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "haining/scientific_abstract_simplification": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "ChanonUtupon/openthaigpt-merge-lora-llama-2-7B-3470k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 56554}, "chaoyi-wu/PMC_LLAMA_7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "CHIH-HUNG/llama-2-13b-OpenOrca_5w": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "clibrain/lince-zero": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "TheBloke/Project-Baize-v2-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "uukuguy/speechless-codellama-platypus-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "CHIH-HUNG/llama-2-13b-dolphin_5w": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "yeontaek/airoboros-2.1-llama-2-13B-QLoRa": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "uukuguy/speechless-llama2-luban-orca-platypus-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Norquinal/llama-2-7b-claude-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TFLai/Luban-Platypus2-13B-QLora-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "conceptofmind/Open-LLongMA-3b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "Norquinal/llama-2-7b-claude-chat-rp": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "lgaalves/llama-2-7b-hf_open-platypus": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ziqingyang/chinese-llama-2-7b-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 55296}, "yeontaek/llama-2-13B-ensemble-v6": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "yeontaek/llama-2-70B-ensemble-v7": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "ubikpt/t5-small-finetuned-cnn": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "rajkumarrrk/t5-base-fine-tuned-on-cnn-dm": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "google/t5-efficient-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "TFLai/Airboros2.1-Platypus2-13B-QLora-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "dhmeltzer/llama-7b-SFT_ds_eli5_1024_r_64_alpha_16_merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "h2oai/h2ogpt-4096-llama2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "dhmeltzer/llama-7b-SFT_ds_wiki65k_1024_r_64_alpha_16_merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TFLai/Ensemble5-Platypus2-13B-QLora-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "TFLai/Athena-Platypus2-13B-QLora-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "4bit/Llama-2-7b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TFLai/MythicalDestroyerV2-Platypus2-13B-QLora-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TFLai/OpenOrcaPlatypus2-Platypus2-13B-QLora-0.80-epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "Salesforce/codegen25-7b-mono": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 51200}, "Sao10K/Stheno-L2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "yeontaek/WizardCoder-Python-13B-LoRa": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "uukuguy/speechless-orca-platypus-coig-lite-2k-0.6e-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "casperhansen/vicuna-7b-v1.5-awq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "luffycodes/nash-vicuna-33b-v1dot3-ep2-w-rag-w-simple": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "CHIH-HUNG/llama-2-13b-OpenOrca_20w": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "google/t5-efficient-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "TheBloke/orca_mini_v2_7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "tianyil1/denas-llama2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Sao10K/Stheno-Inverted-L2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "junelee/ko_vicuna_7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Seungyoun/codellama-7b-instruct-pad": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32025}, "TheBloke/Kimiko-v2-13B-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "uukuguy/speechless-orca-platypus-coig-lite-4k-0.5e-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "uukuguy/speechless-orca-platypus-coig-lite-4k-0.6e-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "Undi95/UndiMix-v1-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "yeontaek/llama-2-70B-ensemble-v6": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TheBloke/vicuna-13B-v1.5-16K-GGML": {}, "KnutJaegersberg/black_goo_recipe_a": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "PKU-Alignment/beaver-7b-v1.0-reward": {"architectures": ["LlamaModelForScore"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "KnutJaegersberg/black_goo_recipe_b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "lgaalves/gpt2_open-platypus": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "cointegrated/rut5-base-multitask": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 30000}, "h2oai/h2ogpt-gm-oasst1-multilang-2048-falcon-7b": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "Cheng98/llama-160m": {"architectures": ["LlamaForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 32000}, "Andron00e/YetAnother_Open-Llama-3B-LoRA-OpenOrca": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "lgaalves/gpt2_guanaco-dolly-platypus": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "gagan3012/k2t-base": {"architectures": ["T5WithLMHeadModel"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "marcchew/Platypus-2-7B-LaMini-14K": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "lgaalves/gpt2_platypus-dolly-guanaco": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "czearing/article-title-generator": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "luffycodes/mcq-vicuna-13b-v1.5": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Universal-NER/UniNER-7B-definition": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Meli/GPT2-Prompt": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50259}, "s-nlp/ruT5-base-detox": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "cointegrated/rut5-base-paraphraser": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 30000}, "DevaMalla/llama7b_alpaca_bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "Universal-NER/UniNER-7B-type": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/starchat-beta-GPTQ": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49156}, "bigscience/sgpt-bloom-7b1-msmarco": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 30, "num_attention_heads": 32, "vocab_size": 250682}, "4bit/Llama-2-13b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "uukuguy/speechless-llama2-hermes-orca-platypus-wizardlm-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ClueAI/PromptCLUE-base-v1-5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "budecosystem/genz-13b-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/LlongOrca-13B-16K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32004}, "ozcangundes/mt5-multitask-qa-qg-turkish": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250102}, "EleutherAI/pythia-410m-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 1024, "intermediate_size": 4096, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "CHIH-HUNG/llama-2-13b-FINETUNE2_3w": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "sonoisa/t5-base-japanese-v1.1": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "bolbolzaban/gpt2-persian": {"n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 25000, "architectures": ["GPT2LMHeadModel"]}, "google/t5-large-ssm": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "DeepPavlov/rudialogpt3_medium_based_on_gpt2_v2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "Mikivis/xuanxuan": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "uukuguy/speechless-llama2-hermes-orca-platypus-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "KnutJaegersberg/black_goo_recipe_c": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "beaugogh/Llama2-7b-sharegpt4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Salesforce/codet5p-770m-py": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "codefuse-ai/CodeFuse-CodeLlama-34B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "AUTOMATIC/promptgen-majinai-safe": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 6, "vocab_size": 50257}, "reciprocate/shepherd-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Devio/test-22B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "acrastt/Bean-3B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/L2-MythoMax22b-Instruct-Falseblock-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "vihangd/smartplat-3b-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "jinaai/jina-embedding-b-en-v1": {"architectures": ["T5EncoderModel"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "yahma/llama-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "uukuguy/speechless-codellama-orca-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "CHIH-HUNG/llama-2-13b-FINETUNE1_17w": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "VMware/open-llama-13b-open-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ToolBench/ToolLLaMA-2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "luffycodes/mcq-hal-vicuna-13b-v1.5": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "TheBloke/BigTranslate-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 53613}, "PeanutJar/LLaMa-2-PeanutButter_v18_A-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "openbmb/UltraLM-65b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Mikivis/gpt2-large-lora-sft": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50257}, "Devio/test-3b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 16, "vocab_size": 32000}, "akhooli/gpt2-small-arabic": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50257}, "Rardilit/Panther_v1": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ValiantLabs/ShiningValiant": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Devio/test100": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "Devio/testC": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Chronoboros-33B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/Pygmalion-13B-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "vihangd/smartplat-3b-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "laituan245/t5-v1_1-small-smiles2caption-ft-from-pretrained-c4": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "4bit/Llama-2-7b-Chat-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "CHIH-HUNG/llama-2-13b-FINETUNE2_3w-gate_up_down_proj": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "CHIH-HUNG/llama-2-13b-FINETUNE2_3w-q_k_v_o_proj": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/vicuna-33B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "Devio/test-1400": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/gpt4-alpaca-lora-30B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "notstoic/pygmalion-13b-4bit-128g": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "NousResearch/Yarn-Llama-2-7b-128k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Den4ikAI/FRED-T5-LARGE_text_qa": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 50364}, "valhalla/t5-base-qa-qg-hl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32102}, "Undi95/ReMM-L2-13B-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Zarablend-L2-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "KnutJaegersberg/black_goo_recipe_d": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "ckip-joint/bloom-1b1-zh": {"architectures": ["BloomModel"], "hidden_size": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "seonglae/llama-2-13b-chat-hf-gptq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Trelis/Llama-2-7b-chat-hf-sharded-bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "KnutJaegersberg/LLongMA-3b-LIMA": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "h2oai/h2ogpt-gm-oasst1-en-xgen-7b-8k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 51200}, "PeanutJar/LLaMa-2-PeanutButter_v18_B-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ehartford/WizardLM-1.0-Uncensored-CodeLlama-34b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "uukuguy/speechless-codellama-orca-platypus-13b-0.10e": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "DeepESP/gpt2-spanish": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "paust/pko-flan-t5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 50358}, "ThomasNLG/t5-qa_squad2neg-en": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "PharMolix/BioMedGPT-LM-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "eenzeenee/t5-base-korean-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50358}, "porkorbeef/Llama-2-13b-public": {"architectures": ["LlamaModel"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/WizardLM-Uncensored-Falcon-7B-GPTQ": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65025}, "dahara1/weblab-10b-instruction-sft-GPTQ": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4864, "intermediate_size": 19456, "num_attention_heads": 38, "num_hidden_layers": 36, "vocab_size": 50277}, "CHIH-HUNG/llama-2-13b-FINETUNE2_TEST_2.2w": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "mrm8488/t5-small-finetuned-emotion": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "gurgutan/saiga2-13b-4bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "IlyaGusev/rut5_base_sum_gazeta": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 30000}, "TheBloke/Llama-2-13B-German-Assistant-v4-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 37632}, "gaodrew/OpenOrca-Platypus2-13B-thera-1250": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "minlik/chinese-llama-7b-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 49953}, "TheBloke/Stable-Platypus2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Luna-AI-Llama2-Uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "allenai/t5-small-squad2-question-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "microsoft/bloom-deepspeed-inference-fp16": {"architectures": ["BloomModel"], "n_layer": 70, "num_attention_heads": 112, "vocab_size": 250880}, "csebuetnlp/banglat5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "potsawee/t5-large-generation-race-QuestionAnswer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "grammarly/coedit-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32100}, "Narrativaai/bloom-560m-finetuned-totto-table-to-text": {"architectures": ["BloomForCausalLM"], "hidden_size": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "jjaaaww/posi_13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "IDEA-CCNL/Randeng-T5-784M-MultiTask-Chinese": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32596}, "Undi95/Nous-Hermes-13B-Code": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "paust/pko-t5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 50358}, "learnanything/llama-7b-huggingface": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "weiren119/Taiwan-LLaMa-v1.0-4bits-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ml6team/keyphrase-generation-t5-small-inspec": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "TheBloke/CodeLlama-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "Undi95/MLewd-L2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "tscholak/cxmefzzi": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32102}, "Gaivoronsky/ruGPT-3.5-13B-8bit": {"architectures": ["GPT2LMHeadModel"], "n_embd": 5120, "n_head": 40, "n_inner": null, "n_layer": 40, "vocab_size": 50272}, "SatoruDano/llama-2-7b-finetuned_v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ClueAI/PromptCLUE-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "uukuguy/speechless-codellama-orca-airoboros-13b-0.10e": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "anonymous-german-nlp/german-gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 52000}, "fxmarty/gpt2-tiny-onnx": {"intermediate_size": 37, "n_embd": 32, "n_head": 4, "n_inner": null, "n_layer": 5, "vocab_size": 1000}, "prakharz/DIAL-FLANT5-XL": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32100}, "h2oai/h2ogpt-oasst1-falcon-40b": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "synapsoft/Llama-2-7b-chat-hf-flan2022-1.2M": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Undi95/ReMM-L2-13B-PIPPA": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "CHIH-HUNG/llama-2-13b-FINETUNE1_17w-gate_up_down_proj": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Fredithefish/Guanaco-7B-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "conceptofmind/Yarn-Llama-2-13b-128k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Undi95/LewdEngine": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/CodeLlama-7B-Instruct-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "CHIH-HUNG/llama-2-13b-Open_Platypus_and_ccp_2.6w": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "CHIH-HUNG/llama-2-13b-FINETUNE1_17w-q_k_v_o_proj": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jondurbin/airoboros-33b-2.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "Salesforce/codet5p-220m-py": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "Danielbrdz/CodeBarcenas-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "SJ-Ray/Re-Punctuate": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "EasthShin/Youth_Chatbot_Kogpt2-base": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 51200}, "ThomasNLG/t5-qg_squad1-en": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "EleutherAI/pythia-160m-deduped-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50304}, "MBZUAI/LaMini-T5-223M": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "HooshvareLab/gpt2-fa": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 42001}, "TFLai/Nova-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "conceptofmind/LLongMA-2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TDC2023/trojan-base-pythia-1.4b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "phpaiola/ptt5-base-summ-xlsum": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "TFLai/SpeechlessV1-Nova-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/stablecode-instruct-alpha-3b-GPTQ": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 49152}, "stanford-crfm/music-small-800k": {"vocab_size": 55028, "n_embd": 768, "n_layer": 12, "n_head": 12, "n_inner": null, "architectures": null}, "TFLai/EnsembleV5-Nova-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "declare-lab/flan-alpaca-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "jpwahle/t5-large-word-sense-disambiguation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "lizhuang144/flan-t5-large-factual-sg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "DKYoon/mt5-base-lm-adapt": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "TheBloke/guanaco-65B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Salesforce/codegen25-7b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 51200}, "bigscience-data/sgpt-bloom-1b7-nli": {"architectures": ["BloomModel"], "n_inner": null, "n_layer": 24, "num_attention_heads": 16, "vocab_size": 250880}, "TurkuNLP/gpt3-finnish-small": {"architectures": ["BloomModel"], "hidden_size": 768, "n_head": 12, "n_layer": 12, "vocab_size": 131072}, "jordiclive/flan-t5-3b-summarizer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "marblyso/DialoGPT-small-what-the-fuck": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "retrieva-jp/t5-small-short": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "codeparrot/codeparrot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_inner": null, "n_layer": 48, "vocab_size": 32768}, "openthaigpt/openthaigpt-1.0.0-beta-7b-chat-ckpt-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 56554}, "Rocketknight1/falcon-rw-1b": {"architectures": ["FalconForCausalLM"], "hidden_size": 2048, "num_attention_heads": 32, "num_hidden_layers": 24, "vocab_size": 50304}, "TaylorAI/Flash-Llama-30M-20001": {"architectures": ["LlamaForCausalLM"], "hidden_size": 384, "intermediate_size": 1024, "num_attention_heads": 12, "num_hidden_layers": 4, "vocab_size": 32000}, "castorini/t5-base-canard": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "r3dhummingbird/DialoGPT-medium-joshua": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "IDEA-CCNL/Wenzhong2.0-GPT2-110M-BertTokenizer-chinese": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 12, "vocab_size": 21133}, "TigerResearch/tigerbot-13b-chat-8bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 60928}, "pranavpsv/gpt2-genre-story-generator": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50266}, "Photolens/llama-2-7b-langchain-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ck46/t5-base-hotpot-qa-qg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32102}, "castorini/monot5-small-msmarco-10k": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "yujiepan/llama-2-tiny-random": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8, "intermediate_size": 32, "num_attention_heads": 2, "num_hidden_layers": 1, "vocab_size": 32000}, "castorini/doc2query-t5-base-msmarco": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "oliverguhr/spelling-correction-multilingual-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "allenai/unifiedqa-t5-11b": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "TheBloke/CodeLlama-34B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "snorkelai/sdnet": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "SiberiaSoft/SiberianFRED-T5-XL": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1536, "num_heads": 24, "num_layers": 24, "vocab_size": 50365}, "sultan/ArabicT5-Base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 20, "vocab_size": 32000}, "nikaashpuri/gpt-expt-sp-v3-K-600-MA-Mac-actions-kmeans-v16": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 1913}, "TheBloke/Yarn-Llama-2-13B-128K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "allenai/cosmo-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "flax-community/gpt2-bengali": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "EleutherAI/pythia-410m-deduped-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 1024, "intermediate_size": 4096, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "Writer/palmyra-small": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 12, "vocab_size": 50257}, "LukasStankevicius/t5-base-lithuanian-news-summaries-175": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "laituan245/molt5-large-caption2smiles": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "google/ul2": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 16384, "d_model": 4096, "num_heads": 16, "num_layers": 32, "vocab_size": 32128}, "Suva/uptag-keyphrase-model": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "TheBloke/orca_mini_7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TusharJoshi89/title-generator": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "juierror/flan-t5-text2sql-with-schema": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "hf-tiny-model-private/tiny-random-T5ForConditionalGeneration": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 32100}, "stacked-summaries/flan-t5-large-stacked-samsum-1024": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TheBloke/WizardLM-33B-V1-0-Uncensored-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "persiannlp/mt5-base-parsinlu-opus-translation_fa_en": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "gurgutan/ruGPT-13B-4bit": {"architectures": ["GPT2LMHeadModel"], "n_embd": 5120, "n_head": 40, "n_inner": null, "n_layer": 40, "vocab_size": 50272}, "TheBloke/upstage-llama-30b-instruct-2048-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "sdadas/polish-gpt2-medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": 4096, "n_layer": 24, "vocab_size": 51200}, "aubmindlab/aragpt2-medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 64000}, "SEBIS/code_trans_t5_large_source_code_summarization_python_multitask_finetune": {"architectures": ["T5Model"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "maximxls/text-normalization-ru-terrible": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 256, "num_heads": 4, "num_layers": 3, "vocab_size": 5120}, "TheBloke/llama-2-13B-Guanaco-QLoRA-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ziqingyang/chinese-alpaca-2-13b-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 55296}, "KETI-AIR/ke-t5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 64128}, "ibm/qcpg-sentences": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32158}, "tiiuae/falcon-rw-7b": {"architectures": ["FalconForCausalLM"], "hidden_size": 4096, "num_attention_heads": 64, "num_hidden_layers": 36, "vocab_size": 65024}, "timdettmers/guanaco-13b-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "h2oai/h2ogpt-oig-oasst1-falcon-40b": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "coffeeee/nsfw-story-generator": {"architectures": ["GPT2Model"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "zpn/llama-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "crumb/bloom-560m-RLHF-SD2-prompter-aesthetic": {"architectures": ["BloomForCausalLM"], "hidden_size": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "kalpeshk2011/dipper-paraphraser-xxl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "TheBloke/WizardLM-13B-V1.0-Uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/WizardLM-13B-V1-1-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "allenai/unifiedqa-t5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "oliverguhr/spelling-correction-german-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "ThomasSimonini/t5-end2end-question-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "asi/gpt-fr-cased-base": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1792, "n_head": 14, "n_inner": null, "n_layer": 24, "vocab_size": 50000}, "lora-x/backpack-gpt2": {"architectures": ["BackpackGPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50264}, "TheBloke/Vigogne-2-13B-Instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ai-forever/ruT5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "ml6team/keyphrase-generation-t5-small-openkp": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "mrm8488/t5-base-finetuned-e2m-intent": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "nikaashpuri/gpt-expt-sp-v3-K-600-MA-Mac-actions-kmeans-v14": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 1902}, "TheBloke/Marx-3b-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/Dolphin-Llama2-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "oscorrea/scores-falcon40b-sm-merged": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "lmqg/t5-small-squad-qag": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32101}, "ehartford/WizardLM-Uncensored-Falcon-40b": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65025}, "persiannlp/mt5-base-parsinlu-sentiment-analysis": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "VietAI/vit5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 36100}, "thanathorn/mt5-cpe-kmutt-thai-sentence-sum": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "Blackroot/Hermes-Kimiko-13B-f16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "CarperAI/stable-vicuna-13b-delta": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "j5ng/kullm-12.8b-GPTQ-8bit": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 30080}, "TheBloke/ReMM-SLERP-L2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Weni/WeniGPT-L-70": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "valhalla/t5-small-qg-hl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32102}, "retrieva-jp/t5-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "TheBloke/Wizard-Vicuna-30B-Superhot-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "openllmplayground/openalpaca_3b_600bt_preview": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "ArmelR/starcoder-gradio-v0": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49152}, "chanind/frame-semantic-transformer-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "akreal/tiny-random-gpt2": {"intermediate_size": 37, "n_embd": 32, "n_head": 4, "n_inner": null, "n_layer": 5, "vocab_size": 99}, "Neko-Institute-of-Science/LLaMA-7B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Writer/palmyra-med-20b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 44, "vocab_size": 50259}, "SiberiaSoft/SiberianPersonaFred": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1536, "num_heads": 24, "num_layers": 24, "vocab_size": 50364}, "mrm8488/spanish-gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "unicamp-dl/translation-en-pt-t5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "OFA-Sys/gsm8k-rft-llama7b-u13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "liuhaotian/LLaVA-13b-delta-v0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32003}, "huggingface/falcon-40b-gptq": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "Ravi07bec/llama-qlora-65b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "PKU-Alignment/alpaca-7b-reproduced": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "Unbabel/gec-t5_small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "TheBloke/Speechless-Llama2-Hermes-Orca-Platypus-WizardLM-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "MIIB-NLP/Arabic-question-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 110080}, "google/t5-large-ssm-nq": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "stanford-crfm/arwen-gpt2-medium-x21": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "sentence-transformers/gtr-t5-xxl": {"architectures": ["T5EncoderModel"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "TheBloke/Nous-Hermes-Llama2-70B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32001}, "paust/pko-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50358}, "allenai/tk-instruct-11b-def": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "amphora/FinABSA": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32105}, "TurkuNLP/gpt3-finnish-13B": {"architectures": ["BloomModel"], "hidden_size": 5120, "n_head": 40, "n_layer": 40, "vocab_size": 131072}, "PAIXAI/Astrid-LLama-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Aalaa/opt-125m-wikitext2": {"architectures": ["OPTForCausalLM"], "hidden_size": 768, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50272}, "hf-internal-testing/tiny-random-GPTNeoXForQuestionAnswering": {"architectures": ["GPTNeoXForQuestionAnswering"], "hidden_size": 32, "intermediate_size": 37, "num_attention_heads": 4, "num_hidden_layers": 5, "vocab_size": 1024}, "cssupport/t5-small-awesome-text-to-sql": {"vocab_size": 32128, "d_model": 512, "d_ff": 2048, "num_layers": 6, "num_heads": 8, "architectures": ["T5ForConditionalGeneration"]}, "TheBloke/MythoMix-L2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "conceptofmind/Hermes-LLongMA-2-13b-8k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "lysandre/arxiv-nlp": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50257}, "Pcik/DialoGPT-medium-Kirby": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "PY007/SLM_1-4B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 2048, "intermediate_size": 5632, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50432}, "ceshine/t5-paraphrase-paws-msrp-opinosis": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "microsoft/bloom-deepspeed-inference-int8": {"architectures": ["BloomModel"], "n_layer": 70, "num_attention_heads": 112, "vocab_size": 250880}, "TheBloke/PuddleJumper-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "gorilla-llm/gorilla-falcon-7b-hf-v0": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "TheBloke/starcoder-GPTQ": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49152}, "lmsys/longchat-7b-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "DAMO-NLP-MT/polylm-1.7b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 256000}, "Salesforce/xgen-7b-4k-base": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 51200}, "DAMO-NLP-MT/polylm-13b": {"architectures": ["PolyLMHeadModel"], "n_embd": 5120, "n_head": 40, "n_inner": 20480, "n_layer": 40, "vocab_size": 256000}, "dbddv01/gpt2-french-small": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "EleutherAI/pythia-70m-deduped-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "algolet/mt5-base-chinese-qg": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250101}, "hf-internal-testing/tiny-random-BloomForQuestionAnswering": {"architectures": ["BloomForQuestionAnswering"], "hidden_size": 32, "n_head": 4, "n_layer": 5, "vocab_size": 1024}, "hf-internal-testing/tiny-random-BloomForTokenClassification": {"architectures": ["BloomForTokenClassification"], "hidden_size": 32, "n_head": 4, "n_layer": 5, "vocab_size": 1024}, "flax-community/t5-base-cnn-dm": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "hf-internal-testing/tiny-random-BloomForSequenceClassification": {"architectures": ["BloomForSequenceClassification"], "hidden_size": 32, "n_head": 4, "n_layer": 5, "vocab_size": 1024}, "tau/t5-v1_1-large-rss": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TheBloke/airoboros-l2-13b-gpt4-m2.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "hf-internal-testing/tiny-random-GPTNeoXForSequenceClassification": {"architectures": ["GPTNeoXForSequenceClassification"], "hidden_size": 32, "intermediate_size": 37, "num_attention_heads": 4, "num_hidden_layers": 5, "vocab_size": 1024}, "allegro/plt5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 50048}, "TheBloke/stable-vicuna-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "hf-internal-testing/tiny-random-GPTNeoXForTokenClassification": {"architectures": ["GPTNeoXForTokenClassification"], "hidden_size": 32, "intermediate_size": 37, "num_attention_heads": 4, "num_hidden_layers": 5, "vocab_size": 1024}, "TheBloke/WizardLM-7B-V1-0-Uncensored-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "HuggingFaceH4/tiny-random-LlamaForSequenceClassification": {"architectures": ["LlamaForSequenceClassification"], "hidden_size": 16, "intermediate_size": 64, "num_attention_heads": 4, "num_hidden_layers": 2, "vocab_size": 32000}, "hf-internal-testing/tiny-random-GPTNeoXModel": {"architectures": ["GPTNeoXModel"], "hidden_size": 32, "intermediate_size": 37, "num_attention_heads": 4, "num_hidden_layers": 5, "vocab_size": 1024}, "IlyaGusev/rut5_base_headline_gen_telegram": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 30000}, "lgaalves/gpt2_camel_physics-platypus": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "lightonai/alfred-40b-0723": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "KETI-AIR/ke-t5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 64128}, "ibm/regen-disambiguation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "vihangd/smartplat-3b-v3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/OpenBuddy-Llama2-13B-v11.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 37632}, "BlinksFly/Harry_Potter-Ai": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "conceptofmind/Yarn-Llama-2-7b-128k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "liujch1998/vera": {"architectures": ["T5EncoderModel"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "kaist-ai/CoT-T5-11B": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "lintang/t5-v1_1-base-flan": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "sentence-transformers/sentence-t5-xxl": {"architectures": ["T5EncoderModel"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "TheBloke/vicuna-7B-v1.5-16K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "retrieva-jp/t5-large-long": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "retrieva-jp/t5-base-long": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "upstage/SOLAR-0-70b-8bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "jerteh/gpt2-vrabac": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 49152}, "Parth/boolean": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "hf-internal-testing/tiny-random-GPTBigCodeForSequenceClassification": {"architectures": ["GPTBigCodeForSequenceClassification"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "hf-internal-testing/tiny-random-GPTBigCodeForTokenClassification": {"architectures": ["GPTBigCodeForTokenClassification"], "n_embd": 32, "n_head": 4, "n_inner": 37, "n_layer": 5, "vocab_size": 1024}, "megagonlabs/t5-base-japanese-web": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32000}, "MisguidedKerbal/DialoGPT-kerbalV3": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "praeclarum/cuneiform": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "uw-hai/polyjuice": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "reciprocate/tiny-llama": {"architectures": ["LlamaForCausalLM"], "hidden_size": 64, "intermediate_size": 64, "num_attention_heads": 1, "num_hidden_layers": 1, "vocab_size": 32000}, "luqh/ClinicalT5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "stanford-crfm/celebrimbor-gpt2-medium-x81": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/CodeLlama-13B-Python-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "laituan245/molt5-large-smiles2caption": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TurkuNLP/gpt3-finnish-8B": {"architectures": ["BloomModel"], "hidden_size": 4096, "n_head": 32, "n_layer": 32, "vocab_size": 131072}, "NeuML/t5-small-txtsql": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "malteos/bloom-6b4-clp-german": {"hidden_size": 4096, "n_head": 32, "n_layer": 30, "vocab_size": 50304}, "GT4SD/multitask-text-and-chemistry-t5-base-augm": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "allenai/open-instruct-stanford-alpaca-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "CarperAI/randomwalks": {"architectures": ["GPT2LMHeadModel"], "n_embd": 144, "n_head": 6, "n_inner": null, "n_layer": 6, "vocab_size": 23}, "unicamp-dl/mt5-13b-mmarco-100k": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 250112}, "lmqg/t5-small-squad-qg-ae": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32101}, "naltukhov/joke-generator-rus-t5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "hf-internal-testing/tiny-random-UMT5Model": {"architectures": ["UMT5Model"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 256300}, "rentcarsAI/falcon-7b-codegenerator-qlora-merged": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "panggi/t5-base-indonesian-summarization-cased": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "hf-internal-testing/tiny-random-UMT5ForQuestionAnswering": {"architectures": ["UMT5ForQuestionAnswering"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 256300}, "UBC-NLP/AraT5-base": {"d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 110080}, "kmewhort/stable-diffusion-prompt-bolster": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 52000}, "TheBloke/Llama-2-13B-GGML": {}, "gaussalgo/T5-LM-Large-text2sql-spider": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "DAMO-NLP-MT/polylm-multialpaca-13b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 5120, "n_head": 40, "n_inner": 20480, "n_layer": 40, "vocab_size": 256000}, "hf-internal-testing/tiny-random-UMT5ForSequenceClassification": {"architectures": ["UMT5ForSequenceClassification"], "d_ff": 37, "d_model": 32, "num_heads": 4, "num_layers": 5, "vocab_size": 256300}, "tinkoff-ai/ruDialoGPT-small": {"n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50261}, "indonesian-nlp/gpt2-medium-indonesian": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "Salesforce/mixqg-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "EleutherAI/pythia-1b-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 50304}, "NinedayWang/PolyCoder-2.7B": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50304}, "zanchat/falcon-1b": {"architectures": ["RWForCausalLM"], "hidden_size": 2048, "n_head": 32, "n_layer": 24, "vocab_size": 50304}, "Goodnoway/DialoGPT-nerbalV2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "crumb/llama2-7b-shard-bf16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "sagawa/ReactionT5-retrosynthesis": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 268}, "DKYoon/mt5-large-lm-adapt": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 250112}, "lintang/t5-v1_1-xl-flan": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "castorini/monot5-large-msmarco-10k": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "Ichsan2895/Merak-7B-v3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "stanford-crfm/caprica-gpt2-small-x81": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "vicgalle/gpt2-open-instruct-v1": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50260}, "philschmid/llama-2-7b-instruction-generator": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "aubmindlab/aragpt2-large": {"architectures": ["GPT2LMHeadModel"], "intermediate_size": 5120, "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 64000}, "NonzeroCornet34/DialoGPT-small-philbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Trelis/Llama-2-7b-chat-hf-sharded-bf16-5GB": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "deep-learning-analytics/wikihow-t5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "JDBN/t5-base-fr-qg-fquad": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32102}, "stanford-crfm/durin-gpt2-medium-x343": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "abjbpi/Dwight_Schrute": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "Spico/Humback-Myx": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "T-Systems-onsite/mt5-small-sum-de-en-v2": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250100}, "kaiyuy/leandojo-lean3-tacgen-byt5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3584, "d_model": 1472, "num_heads": 6, "num_layers": 12, "vocab_size": 384}, "pinkmanlove/llama-33b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "lintang/t5-v1_1-large-flan": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "Naseej/noon-7b": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "chizhikchi/sci-five-radsum23": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "impyadav/GPT2-FineTuned-Hinglish-Song-Generation": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "elinas/llama-13b-hf-transformers-4.29": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/GodziLLa2-70B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TheBloke/Llama-2-70B-OASST-1-200-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32016}, "jacobmorrison/tk-instruct-base-lora-experiments": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-3b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "ingen51/DialoGPT-medium-GPT4": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "cointegrated/rut5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 30000}, "PocketDoc/Dans-CreepingSenseOfDoom": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "tsmatz/mt5_summarize_japanese": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "domenicrosati/QA2D-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "gorkemgoknar/gpt2chatbotenglish": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50262}, "DeliveryBoy/DiabloGPT-medium-Kurisu": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "philschmid/instruct-igel-001": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_layer": 30, "vocab_size": 50304}, "xDAN2099/xDAN_13B_Zh_Base": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 60928}, "codeparrot/codeparrot-small": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 32768}, "paust/pko-t5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 50358}, "flozi00/Llama-2-13b-german-assistant-v4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 37632}, "doc2query/msmarco-t5-base-v1": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "microsoft/DialogRPT-depth": {"architectures": ["GPT2ForSequenceClassification"], "n_embd": 1024, "n_head": 16, "n_layer": 24, "vocab_size": 50257}, "nomic-ai/gpt4all-13b-snoozy": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "NousResearch/Yarn-Llama-2-13b-64k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "mrm8488/t5-base-e2e-question-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "postbot/gpt2-medium-emailgen": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "vanilladucky/Friends_chatting_bot_redefined": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/LlongOrca-7B-16K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32003}, "mutamuta/DialoGPT-spongebob-small": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Ar4ikov/gpt2-medium-650k-stable-diffusion-prompt-generator": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/HermesLimaRP-L2-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "clibrain/Llama-2-7b-ft-instruct-es-gptq-4bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Yarn-Llama-2-7B-128K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "lmqg/mt5-small-jaquad-qg-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "allenai/tk-instruct-base-def-pos": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "davidkim205/komt-Llama-2-7b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "tangy0/llama-2-7b-dtlpy_v0.4chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TigerResearch/tigerbot-70b-base": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 60928}, "hadifar/eventextraction": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "TintinMeimei/NousResearch-Llama-2-7b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/airoboros-l2-13b-gpt4-2.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Nekochu/Llama-2-13B-fp16-french": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "minhtoan/t5-translation-vietnamese-nom": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 300, "num_heads": 8, "num_layers": 6, "vocab_size": 30100}, "BELLE-2/BELLE-Llama2-13B-chat-0.4M": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "bigscience/T0": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "andreaskoepf/pythia-1.4b-gpt4all-pretrain": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50288}, "Salesforce/codet5-base-codexglue-clone": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "Chae/scottbot_med": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/LLaMA-7b-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "sagard21/python-code-explainer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "stanfordnlp/SteamSHP-flan-t5-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "MarinHinawa/DialoGPT-medium-Ene": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "aiautomationlab/german-news-title-gen-mt5": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "TheBloke/vicuna-13B-1.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Chronos-Hermes-13B-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "microsoft/DialogRPT-human-vs-machine": {"architectures": ["GPT2ForSequenceClassification"], "n_embd": 1024, "n_head": 16, "n_layer": 24, "vocab_size": 50257}, "uer/gpt2-distil-chinese-cluecorpussmall": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 6, "vocab_size": 21128}, "h2oai/h2ogpt-gm-oasst1-en-2048-falcon-40b-v1": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "davidkim205/komt-Llama-2-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ibm/qcpg-questions": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32155}, "gavin124/gpt2-finetuned-cnn-summarization-v2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50260}, "hogru/MolReactGen-GuacaMol-Molecules": {"architectures": ["GPT2LMHeadModel"], "n_embd": 144, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 93}, "stanford-crfm/darkmatter-gpt2-small-x343": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "conceptofmind/Yarn-Llama-2-7b-64k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Radicalkiddo/DialoGPT-small-Radical": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Ninja5000/DialoGPT-medium-HarryPotter": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "theblackcat102/alpaca-title-generator-mt0-large": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 250112}, "transfaeries/Twilight-Sparkle-GPT": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/Vigogne-2-7B-Instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "markofhope/DialoGPT-medium-HarringtonBot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "seeksery/DialoGPT-calig3": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "beomi/kcgpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 55000}, "vilm/vietcuna-3b": {"architectures": ["BloomForCausalLM"], "hidden_size": 2560, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "IDEA-CCNL/Randeng-T5-784M": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32596}, "vwxyzjn/starcoderbase-triviaqa": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49152}, "IDEA-CCNL/Wenzhong2.0-GPT2-3.5B-chinese": {"architectures": ["GPT2LMHeadModel"], "n_embd": 3072, "n_head": 32, "n_inner": 12288, "n_layer": 30, "vocab_size": 50304}, "TheBloke/Llama-2-7b-Chat-GGUF": {}, "MingZhong/unieval-dialog": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "OpenAssistant/falcon-40b-megacode2-oasst": {"architectures": ["FalconForCausalLM"], "hidden_size": 8192, "num_attention_heads": 128, "num_hidden_layers": 60, "vocab_size": 65152}, "axiong/PMC_LLaMA_13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "codeparrot/codeparrot-small-multi": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 12, "vocab_size": 32768}, "EleutherAI/pythia-6.9b-deduped-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "Riiid/sheep-duck-llama-2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "alibaba-pai/pai-bloom-1b1-text2prompt-sd": {"architectures": ["BloomForCausalLM"], "hidden_size": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "TheBloke/Chronos-Beluga-v2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "malmarjeh/t5-arabic-text-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 110080}, "GarfExit/DialogGPT-medium-707": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "audreycl/DialoGPT-RPF": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "florentiino/DialoGPT-small-harrypotter": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "JazzyLucas/DialoGPT-small-TonyStark": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "marblyso/DialoGPT-medium-marina": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "polandball/GPT-Polen": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "GarrisonBot/DialoGPT-medium-herbertgarrison": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "XuYipei/kw-cutegpt-13b-ift": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 49954}, "TheBloke/Pygmalion-7B-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "timothykim04/DialoGPT-medium-harrypotter": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "allegro/plt5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50048}, "lengoctuong/gpt2-finetuned-wikitext2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "PyaeSoneK/Fine_Tuned_Pythia_smallest_140_legal": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 1024, "intermediate_size": 4096, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "psyche/KoT5-paraphrase-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "microsoft/DialogRPT-width": {"architectures": ["GPT2ForSequenceClassification"], "n_embd": 1024, "n_head": 16, "n_layer": 24, "vocab_size": 50257}, "Dahoas/pythia-1B-static-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "jerteh/gpt2-orao": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 49152}, "TheBloke/LosslessMegaCoder-Llama2-13B-Mini-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32007}, "Ngao/DialoGPT-small-ngao": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "4i-ai/Llama-2-7b-alpaca-es": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "asifhugs/open_llama_7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "RajuKandasamy/tamillama_tiny_30m": {"architectures": ["LlamaForCausalLM"], "hidden_size": 256, "intermediate_size": 786, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 32000}, "stabilityai/StableBeluga1-Delta": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Linly-AI/Chinese-LLaMA-2-7B-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 40076}, "flax-community/gpt2-base-thai": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "shalomma/llama-7b-embeddings": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/llama2-7b-chat-codeCherryPop-qLoRA-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "KhanAdeeb/model-tony-stark": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "spy24/autonlp-UK-to-US-600416931": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "DKYoon/mt5-small-lm-adapt": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "TheBloke/Llama-2-70B-GGML": {}, "TheBloke/model_007-70B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "abhi-8/DialoGPT-medium-Joshua-twevy": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "camenduru/MiniGPT4-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "paripi/Malishka": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "SiberiaSoft/SiberianPersonaFred_large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 50364}, "Alred/t5-small-finetuned-summarization-cnn": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "Leomas/DialoGPT-medium-Leomas": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TehVenom/Pygmalion-7b-Merged-Safetensors": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "marblyso/DialoGPT-medium-pearl": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "lmqg/mt5-small-dequad-qg-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "TheBloke/WizardLM-Uncensored-Falcon-40B-GPTQ": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65025}, "NlpHUST/t5-small-vi-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "Elucia/Diluc_Bot_1.3": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "h2oai/h2ogpt-16k-codellama-34b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "microsoft/CodeGPT-small-java": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 52000}, "Starry/COUNTNARC": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "OpenMEDLab/PULSE-7bv5": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "marblyso/DialoGPT-medium-aubrey": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "Ashypaws/DialoGPT-medium-Ashybot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "YTTD/DialoGPT-medium-sou": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "marblyso/DialoGPT-medium-hero": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "Trelis/Llama-2-7b-chat-hf-function-calling-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "NousResearch/CodeLlama-7b-hf-flash": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "TheBloke/CodeLlama-34B-Python-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "musabgultekin/functionary-7b-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "amasand/gpt2-imdb-pos-ppo": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "bigscience/bloomz-7b1-p3": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 30, "num_attention_heads": 32, "vocab_size": 250880}, "rirv938/wizard-vicuna-13b-uncensored-awq-4bit-g128": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "marblyso/DialoGPT-medium-marblesbagel": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "vilm/vietcuna-7b-v3": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "stas/t5-very-small-random": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 256, "d_model": 64, "num_heads": 4, "num_layers": 8, "vocab_size": 32128}, "KeLiu/Title-Gen": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "vampiregirl/DialoGPT-medium-lennoxram": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "sharpbai/Llama-2-7b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "sam2ai/openllama_odia_3b_base": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "lmqg/mt5-small-esquad-qg-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "stanfordnlp/SteamSHP-flan-t5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "allenai/tulu-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "JNDankwah/DialoGPT-small-ThorCB": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "lmqg/mt5-small-ruquad-qg": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "lmqg/mt5-small-ruquad-qg-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "Dinocroth/DialoGPT-medium-Trevor-PhilipsV2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "Speedemon/jake-peralta-ai": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "chanind/frame-semantic-transformer-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "stanford-crfm/music-medium-800k": {"vocab_size": 55028, "n_embd": 1024, "n_layer": 24, "n_head": 16, "n_inner": null, "architectures": null}, "h2oai/h2ogpt-16k-codellama-7b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "TheBloke/Pygmalion-2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "huggingface-course/codeparrot-ds": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50000}, "KakoSi/AcciGPT-smol": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "lmqg/mt5-small-itquad-qg-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "EggsInAJar/DialoGPT-small-MerrickBot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "razent/SciFive-large-Pubmed": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "armandnlp/gpt2-TOD_finetuned_SGD": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50271}, "RuterNorway/Llama-2-13b-chat-norwegian": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "AIDC-ai-business/Marcoroni-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "deep-learning-analytics/GrammarCorrector": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "redrussianarmy/gpt2-turkish-cased": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "lmqg/mt5-small-frquad-qg-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "psyche/KoT5-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "loitran/DialoGPT-medium-peppapig": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "openchat/openchat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "saikatc/NatGen": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "Coderhuynin/DialoGPT-large-TonyStark": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "declare-lab/flan-sharegpt-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "TheBloke/Chronos-Hermes-13B-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "usvsnsp/pythia-6.9b-rm-full-hh-rlhf": {"architectures": ["GPTNeoXForSequenceClassification"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50277}, "yujiepan/llama-2-tiny-3layers-random": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8, "intermediate_size": 32, "num_attention_heads": 2, "num_hidden_layers": 3, "vocab_size": 32000}, "allenai/unifiedqa-v2-t5-3b-1363200": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "gsarti/it5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32103}, "simple2312/DialoGPT-Ellie": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "kashif/llama-7b_stack-exchange_RM_peft-adapter-merged": {"architectures": ["LlamaForSequenceClassification"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "larryvrh/mt5-translation-ja_zh": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 250112}, "j5ng/et5-typos-corrector": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 45100}, "vilsonrodrigues/falcon-7b-sharded": {"architectures": ["FalconForCausalLM"], "hidden_size": 4544, "num_attention_heads": 71, "num_hidden_layers": 32, "vocab_size": 65024}, "felinecity/ScaraBot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "persiannlp/mt5-base-parsinlu-translation_en_fa": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "Jonesy/HomersNightOut": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "conceptofmind/LLongMA-2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/LoKuS-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "shibing624/mengzi-t5-base-chinese-correction": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Lamia/DialoGPT-small-Sundrop": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Blizzchor/DialoGPT-medium-gamora": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "jlsalty9999/DialoGPT-medium-Riddle": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "uer/gpt2-chinese-lyric": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 21128}, "LMFlow/Full-Robin-7b-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "llm-book/t5-base-long-livedoor-news-corpus": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "nuggster/DialoGPT-small-ianbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Tristan/gpt2_reward_summarization": {"architectures": ["GPT2ForSequenceClassification"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "MysteriousAmazon/DialoGPT-medium-freddy": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "wdidfau/Pygmalion-13b-Landmark-Attention-Merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "kaiyuy/leandojo-lean3-retriever-byt5-small": {"architectures": ["T5EncoderModel"], "d_ff": 3584, "d_model": 1472, "num_heads": 6, "num_layers": 12, "vocab_size": 384}, "kz919/ntk_scaled_open_llama_3b_32k": {"architectures": ["NTKScaledLlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "abhi-8/DialoGPT-medium-Rick": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "polymath707/llama-2-13b-miniguanaco": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Langboat/bloom-389m-zh": {"architectures": ["BloomForCausalLM"], "hidden_size": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 42437}, "Techcs002/DialoGPT-medium-AboTalkTest": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "AIDC-ai-business/Marcoroni-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "ybelkada/t5-3b-sharded": {"architectures": ["T5WithLMHeadModel"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "benjamin/gerpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "abhi-8/DialoGPT-medium-Michael": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "cahya/gpt2-small-indonesian-522M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "marianna13/flan-t5-base-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Lakoc/fisher_dec_6_layers": {"architectures": ["GPT2Model"], "n_embd": 512, "n_head": 4, "n_inner": null, "n_layer": 6, "vocab_size": 5000}, "simple2312/DialoGPT-nayeon": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "sjrhuschlee/flan-t5-base-squad2": {"architectures": ["T5ForQuestionAnswering"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "eqhylxx/full-vicuna-160m": {"architectures": ["LlamaForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 32000}, "Ashypaws/DialoGPT-medium-Kitaibot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/Wizard-Vicuna-7B-Uncensored-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "NHStudios/DialoGPT-small-jake": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "ZipperXYZ/DialoGPT-medium-TheWorldMachineExpressive2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "IIC/mt5-spanish-mlsum": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "mattymchen/gense-base-plus": {"architectures": ["T5Model"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "DAMO-NLP/SeqGPT-560M": {"architectures": ["BloomForCausalLM"], "hidden_size": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "AMHR/T5-for-Adversarial-Paraphrasing": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Blizzchor/DialoGPT-medium-HarryBotter": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "brianveebee/DialoGPT-medium-bender": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "YTTD/DialoGPT-medium-keiji": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "anon8231489123/gpt4-x-alpaca-13b-native-4bit-128g": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "Pcik/DialoGPT-medium-Dante": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "mHossain/bangla-para-v3-500000": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "TheBloke/Llama-2-7B-GGUF": {}, "diwas7777/HarryBot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "seduerr/t5-small-pytorch": {"architectures": ["T5WithLMHeadModel"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "felinecity/DioloGPT-small-KaeyaBot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "lmsys/vicuna-7b-delta-v0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "inu-ai/dolly-japanese-gpt-1b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 44928}, "TheBloke/Vicuna-33B-1-3-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "Dahoas/pythia-125M-static-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50304}, "Blizzchor/DialoGPT-medium-QuillLord": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "beomi/KoAlpaca-llama-1-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "patrickNLP/Graphix-3B": {"architectures": ["Model"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "Starry/HELLORUKAS": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "keans/DialoGPT-small-highjacker": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "DoesNoPro/DialoGPT-small-RaidenG": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "ThatSkyFox/DialoGPT-medium-whatsapp": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "EnterNameBros/Senko-san-medium-scl": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/CodeLlama-7B-Python-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "mrm8488/t5-small-finetuned-quora-for-paraphrasing": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "NonzeroCornet34/DialoGPT-small-hansolo": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "d0rj/rut5-base-summ": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "CAIRE-CedarsSinai/falcon-7b-qlora-chat-support-bot-faq-alzkb-version-2": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "el-profesor/code_t5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "Soumyajit1008/DialoGPT-small-harryPotterssen": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "malteos/bloom-1b5-clp-german": {"architectures": ["BloomForCausalLM"], "hidden_size": 2048, "n_head": 16, "n_layer": 24, "vocab_size": 50304}, "yesuns/DialoGPT-small-yesun": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Stevo/DiagloGPT-medium-spamton": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Vision-CAIR/vicuna-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "TheBloke/airoboros-33B-gpt4-1-4-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "tanishqvashisht/DialoGPT-small-Joshua": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TurkuNLP/gpt3-finnish-3B": {"architectures": ["BloomModel"], "hidden_size": 2560, "n_head": 32, "n_layer": 32, "vocab_size": 131072}, "lizhuang144/flan-t5-base-VG-factual-sg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "TheBloke/Athena-v1-GGUF": {}, "xxyyy123/test-28b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "pastlecry/DialoGPT-small-harrypotter": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "DiscordRequestsAPI/NurDeeps-Bot-2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "channashi/DialoGPT-small-rocket": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "ritog/bangla-gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TheBloke/Redmond-Puffin-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "Shakerlicious/DialoGPT-small-raquelbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "lmqg/mt5-base-jaquad-qag": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250101}, "anon8231489123/vicuna-13b-GPTQ-4bit-128g": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "jacobmorrison/tk-instruct-small-lora-experiments": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32100}, "TheBloke/open-llama-13b-open-instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "cedpsam/chatbot_fr": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "Photolens/llama-2-13b-langchain-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "avinashshrangee/DialoGPT-small-Ricky": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "YeungNLP/firefly-llama2-7b-pretrain": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 55296}, "efederici/it5-efficient-small-fanpage": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 32, "vocab_size": 32100}, "saikiranmaddukuri/chat_to_sql0.17": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "TheBloke/Llama2-28B-Air03-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 7296, "intermediate_size": 22016, "num_attention_heads": 57, "num_hidden_layers": 40, "vocab_size": 32000}, "crodri/falcon_aguila_meteocat": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 50257}, "Narsil/starcoder-gptq": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49152}, "CobraMamba/mamba-gpt-3b-v4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "YeungNLP/firefly-llama2-13b-pretrain": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 55296}, "TheBloke/airoboros-l2-7b-gpt4-1.4.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "DecafNosebleed/DialoGPT-small-ScaraBot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "yazdipour/text-to-sparql-t5-small-qald9": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "ClassCat/gpt2-base-french": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50000}, "TheBloke/airoboros-33B-GPT4-m2.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "quantumaikr/KoreanLM-1.5b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 1024, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "toyfreak/DialoGPT-small-addy": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "spursyy/mT5_multilingual_XLSum_rust": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "lengoctuong/gpt2-finetuned-chatbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50261}, "KnutJaegersberg/megatron-gpt2-345m-evol_instruct_v2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": 4096, "n_layer": 24, "vocab_size": 50257}, "zkdtckk/falcon40-instruct-qlora-tta-v1": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "TheBloke/Nous-Hermes-13B-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "TheBloke/Nous-Hermes-Llama2-GGML": {}, "IkariDev/Athena-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/WizardLM-7B-V1.0-Uncensored-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/llama-2-13B-German-Assistant-v2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "cahya/gpt2-large-indonesian-522M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50257}, "VietAI/envit5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50048}, "kam1run/DialoGPT-large-kami": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50257}, "uukuguy/speechless-codellama-dolphin-orca-platypus-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "aluserhuggingface/DialoGPT-small-harrypotter": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TheBloke/gpt4-x-vicuna-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "Pcik/DialoGPT-medium-Ruby": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/LLaMA-30b-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "sdadas/polish-gpt2-small": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 12, "vocab_size": 51200}, "ahxt/llama2_xs_460M_experimental": {"architectures": ["LlamaForCausalLM"], "hidden_size": 1024, "intermediate_size": 4096, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "lemon234071/t5-base-Chinese": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 35364}, "4bit/pyg-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "squarelike/Gugugo-koen-1.3B-V1.0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 30080}, "lvwerra/t5-imdb": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "psymon/KoLlama2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Maxwere/DiabloGPT-medium-maxbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "nafisehNik/mt5-persian-summary": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "nams/nams-bot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "lmqg/mt5-small-esquad-qag": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "mattbit/gpt2wb": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "ghazikhanihamed/TooT-PLM-P2S": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 48, "vocab_size": 144}, "lonewanderer27/YoshinoriBot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "VinVanGogh/Llama-2-7b-Aixiety-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "GroNLP/gpt2-medium-italian-embeddings": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 30001}, "IDEA-CCNL/Randeng-T5-784M-QA-Chinese": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32601}, "kingbri/airo-llongma-2-13B-16k-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "lvwerra/starcoderbase-gsm8k": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49152}, "mofawzy/gpt2-arabic-sentence-generator": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50000}, "lmqg/mt5-small-itquad-qag": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "sharpbai/Llama-2-13b-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "lomahony/eleuther-pythia70m-hh-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "Salesforce/codet5-large-ntp-py": {"architectures": ["T5WithLMHeadModel"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "TheBloke/Samantha-1.11-CodeLlama-34B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "Lenza/DialoGPT-medium-Kobayashi": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "davidviriato/DialoGPT-small-joshua": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Shakerlicious/DialoGPT-small-descentbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TurkuNLP/gpt3-finnish-xl": {"architectures": ["BloomModel"], "hidden_size": 2064, "n_head": 24, "n_layer": 24, "vocab_size": 131072}, "TheBloke/starcoderplus-GPTQ": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49152}, "TheBloke/Airoboros-L2-7B-2.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Mikivis/gpt2-large-lora-sft1": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50257}, "gagan3012/k2t": {"architectures": ["T5WithLMHeadModel"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "MerlynMind/merlyn-education-safety": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50688}, "explosion-testing/refined-web-model-test": {"architectures": ["RWForCausalLM"], "hidden_size": 32, "n_head": 4, "n_layer": 5, "vocab_size": 1024}, "explosion-testing/falcon-no-parallel-attn-test": {"architectures": ["RWForCausalLM"], "hidden_size": 32, "n_head": 4, "n_layer": 5, "vocab_size": 1024}, "Marxav/frpron": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 268}, "AmbricJohnson5888/claura": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TheBloke/CodeLlama-7B-Instruct-GGUF": {}, "felinecity/DioloGPT-small-LisaBot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "lmqg/mt5-small-frquad-qag": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "RobiKenobi/DialoGPT-medium-pete": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/Vicuna-13B-CoT-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/airoboros-33B-gpt4-1.4-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "SEBIS/code_trans_t5_base_code_documentation_generation_java_multitask": {"architectures": ["T5Model"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "retrieva-jp/t5-base-medium": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "elinas/chronos-13b-4bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "abhinavkulkarni/meta-llama-Llama-2-7b-chat-hf-w4-g128-awq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Luban-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "uer/t5-base-chinese-cluecorpussmall": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 21228}, "ClueAI/ChatYuan-large-v1": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "helenai/gpt2-ov": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "fireballoon/baichuan-vicuna-chinese-7b-gptq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 64000}, "Trelis/Llama-2-7b-chat-hf-hosted-inference-8bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Starry/KARENTRIES": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "umm-maybe/SportsFanGhost": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/airoboros-13B-gpt4-1.4-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TabbyML/StarCoder-1B": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 49152}, "TFLai/Nova-13B-50-step": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "Mikivis/gpt2-large-lora-sft2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50257}, "CHIH-HUNG/llama-2-13b-Open_Platypus_and_ccp_2.6w-3_epoch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "BlackSamorez/falcon-40b-tiny-testing": {"architectures": ["RWForCausalLM"], "hidden_size": 256, "n_head": 4, "n_layer": 2, "vocab_size": 65024}, "Rocketknight1/tiny-random-falcon-40b": {"architectures": ["FalconForCausalLM"], "hidden_size": 1024, "num_attention_heads": 128, "num_hidden_layers": 2, "vocab_size": 65024}, "TheBloke/WizardLM-1.0-Uncensored-Llama2-13B-GGML": {}, "TheBloke/Zarafusionex-1.1-L2-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "lmqg/t5-large-squad-qg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32101}, "casperhansen/falcon-7b-awq": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "Azure99/blossom-v2-llama2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "DeepESP/gpt2-spanish-medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "StudentLLM/Alpagasus-2-13b-QLoRA-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Weni/WeniGPT": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "niicovila/llama-v2-tst-law": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Undi95/CreativityEngine": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "DB13067/Peterbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "EleutherAI/pythia-12b-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50688}, "allenai/tulu-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "TheBloke/airoboros-l2-13b-gpt4-m2.0-GGML": {}, "TheBloke/Griffin-3B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "imthanhlv/vigpt2medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "m3hrdadfi/gpt2-persian-qa": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50000}, "TheBloke/MythoMax-L2-Kimiko-v2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "CHIH-HUNG/llama-2-13b-FINETUNE1_17w-r16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ppn/DialoGPT-small-harrypotter": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "lmqg/mt5-base-ruquad-qag": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250101}, "TheBloke/Firefly-Llama2-13B-v1.2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "persiannlp/mt5-large-parsinlu-opus-translation_fa_en": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 250112}, "simple2312/DialoGPT-Twice": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "declare-lab/flan-alpaca-xxl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "ChanceFocus/finma-7b-nlp": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "osunlp/attrscore-flan-t5-xl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "likenneth/honest_llama2_chat_7B": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Hugherinit/hi": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32119}, "vaibhav9/GPT2-qa": {"architectures": ["GPT2ModelForQuestionAnswering"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "st3rl4nce/t5-small-finetuned-pubmed": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "uonlp/okapi-ro-llama": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ThomasNLG/t5-weighter_cnndm-en": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "google/t5-11b-ssm-tqa": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "lizhuang144/flan-t5-small-VG-factual-sg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "hyunjae/skt-kogpt2-kullm-v2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 51200}, "Mirage-Studio/llama-gaan-2-7b-chat-hf-dutch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/LosslessMegaCoder-Llama2-7B-Mini-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32007}, "lmqg/t5-small-squad-qg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32101}, "castorini/doc2query-t5-large-msmarco": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TheBloke/manticore-13b-chat-pyg-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "22h/open-cabrita3b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 52000}, "alzoubi36/priva_t5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "TheBloke/vicuna-7B-v0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "TheBloke/falcon-7b-instruct-GGML": {}, "Rozi05/QuoteVibes_Model_Trained": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Tidum/DialoGPT-large-Michael": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "valhalla/t5-small-qg-prepend": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32102}, "lmqg/t5-large-squad-qag": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32101}, "abhiramtirumala/DialoGPT-sarcastic": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "mindrage/Manticore-13B-Chat-Pyg-Guanaco-GGML": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Salesforce/dialogstudio-t5-base-v1.0": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32103}, "allenai/unifiedqa-v2-t5-base-1363200": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "kleinay/qanom-seq2seq-model-joint": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32101}, "puugz/DialoGPT-small-spiderman": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "UrukHan/t5-russian-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "HuggingFaceH4/tiny-random-LlamaForSeqClass": {"architectures": ["LlamaForSequenceClassification"], "hidden_size": 16, "intermediate_size": 64, "num_attention_heads": 4, "num_hidden_layers": 2, "vocab_size": 32000}, "JosephusCheung/Qwen-LLaMAfied-7B-Chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 151936}, "Abzu/orca-mini-v3-70b-gptq-q4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "wnic00/t5-small-finetune-bilingual-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "ChukSamuels/DialoGPT-small-Dr.FauciBot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "macavaney/doc2query-t5-base-msmarco": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "nlp-waseda/comet-t5-base-japanese": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32000}, "stjiris/t5-portuguese-legal-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Icaruas/V2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "imxly/t5-pegasus": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50000}, "stefan-it/german-gpt2-larger": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50265}, "noahkim/KoT5_news_summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50358}, "hoskinson-center/proofGPT-v0.1": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "TheBloke/WizardMath-7B-V1.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "FieldSu/distil_student_24": {"architectures": ["RWForCausalLM"], "hidden_size": 1136, "n_head": 71, "n_layer": 8, "vocab_size": 65024}, "shyamsn97/Mario-GPT2-700-context-length": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 6, "vocab_size": 50257}, "dgnk007/eagle": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "sharpbai/Llama-2-7b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "jackyv/DialoGPT-small-pinocchio": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "felinecity/DioloGPT-small-KaeyaBot2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "toyfreak/DialoGPT-small-shy": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "chavinlo/alpaca-13b": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "TehVenom/Pygmalion-7b-4bit-GPTQ-Safetensors": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "georgesung/open_llama_7b_qlora_uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ostorc/rick-sanchez-chatbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "polymath707/llama-2-7b-miniguanaco": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "KBlueLeaf/guanaco-7b-leh-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Chronos-Hermes-13B-v2-GGML": {}, "approach0/mathy-vicuna-13B-FFT-phase2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "gorilla-llm/gorilla-7b-hf-delta-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "j5ng/kullm-5.8b-GPTQ-8bit": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 16, "num_hidden_layers": 28, "vocab_size": 30080}, "bitadin/checkpoint-230167": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "NekoPunchBBB/Llama2-13b-hf-Open-Platypus-QLoRA-att": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "mrm8488/t5-small-finetuned-wikiSQL": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "ozcangundes/T5-base-for-BioQA": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "AriakimTaiyo/gpt2-chat": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_layer": 36, "vocab_size": 50257}, "TheBloke/WizardLM-13B-V1.2-GGML": {}, "TheBloke/Trurl-2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "ccore/opt-125-smart-test": {"architectures": ["OPTForCausalLM"], "hidden_size": 768, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50272}, "James-WYang/BigTranslate": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 53613}, "Trelis/Llama-2-7b-chat-hf-function-calling": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Wikidepia/IndoT5-base-paraphrase": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "csebuetnlp/mT5_m2m_crossSum": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "seanmor5/tiny-llama-test": {"architectures": ["LlamaForCausalLM"], "hidden_size": 32, "intermediate_size": 64, "num_attention_heads": 2, "num_hidden_layers": 2, "vocab_size": 32000}, "explosion-testing/refined-web-model-new-decoder-test": {"architectures": ["RWModel"], "hidden_size": 256, "n_head": 4, "n_layer": 5, "vocab_size": 1024}, "jondurbin/airocoder-34b-2.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "lmqg/t5-base-squad-qg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "PORTULAN/gervasio-ptpt-base": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 50304}, "UWB-AIR/barticzech-1.0": {"architectures": ["MBartForConditionalGeneration"], "d_model": 1024, "num_hidden_layers": 12, "vocab_size": 50265}, "TokenBender/llama2-7b-chat-hf-codeCherryPop-qLoRA-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Voicelab/trurl-2-7b-8bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "TheBloke/Llama-2-13B-chat-GGUF": {}, "VietAI/vit5-base-vietnews-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 36096}, "lmqg/t5-small-squad-ae": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32101}, "retrieva-jp/t5-base-short": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "grammarly/coedit-xxl": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32100}, "heack/HeackMT5-ZhSum100k": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "TheBloke/LLaMA-13b-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TFMC/ELYZA-japanese-Llama-2-7b-instruct-GPTQ-4bit-64g": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "mxmax/Chinese_Chat_T5_Base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "elinas/chronos-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "kajdun/iubaris-13b-v3_GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jmeadows17/MathT5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32104}, "TheBloke/Kimiko-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "nlp-waseda/gpt2-small-japanese": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 32000}, "rshrott/description-together-ai": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "noah-ai/mt5-base-question-generation-vi": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "AI4PD/ZymCTRL": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 458}, "bitadin/gpt-4-long-titles-v2-flan-t5-base-llm-12": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "shorthillsai/flan-t5-large-absa": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TheBloke/CodeLlama-13B-oasst-sft-v10-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "prithivida/active_to_passive_styletransfer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "lcw99/t5-large-korean-text-summary": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 50358}, "EleutherAI/pythia-1.4b-deduped-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "sdadas/polish-gpt2-large": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": 5120, "n_layer": 36, "vocab_size": 51200}, "uonlp/okapi-vi-bloom": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "cenkersisman/gpt2-turkish-900m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "IlyaGusev/rugpt_large_turbo_instructed": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50258}, "Waterhorse/chessgpt-base-v1": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "jondurbin/spicyboros-13b-2.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "echarlaix/t5-small-openvino": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "mrm8488/santacoder-finetuned-the-stack-bash-shell": {"architectures": ["GPT2LMHeadCustomModel"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 49280}, "ckip-joint/bloom-3b-zh": {"architectures": ["BloomForCausalLM"], "hidden_size": 2560, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "Dawnstarhunter/DialoGPT-medium-Eveline": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "lmqg/t5-base-squad-ae": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "mesolitica/finetune-translation-t5-small-standard-bahasa-cased-v2": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "liuhaotian/LLaVA-7b-delta-v0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32004}, "yzhuang/autotree_llama_small_snxor_l1_2_vit": {"architectures": ["LlamaForAutoTree"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 6, "vocab_size": 32000}, "mrm8488/t5-base-finetuned-wikiSQL-sql-to-en": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "aleksickx/llama-7b-hf": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "yongzx/pythia-70m-sft-hh": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "sonoisa/t5-base-english-japanese": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "BramVanroy/Llama-2-13b-chat-dutch": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Bhuvana/t5-base-spellchecker": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "PlanTL-GOB-ES/gpt2-base-bne": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50261}, "lmqg/mt5-small-jaquad-qg": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "Mirage-Studio/llama-gaan-2-7b-chat-hf-dutch-epoch-5": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "microsoft/DialogRPT-human-vs-rand": {"architectures": ["GPT2ForSequenceClassification"], "n_embd": 1024, "n_head": 16, "n_layer": 24, "vocab_size": 50257}, "aubmindlab/aragpt2-mega": {"architectures": ["GPT2LMHeadModel"], "intermediate_size": 6144, "n_embd": 1536, "n_head": 24, "n_inner": null, "n_layer": 48, "vocab_size": 64000}, "liyuesen/druggpt": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 53083}, "conceptofmind/Hermes-LLongMA-2-7b-8k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/scarlett-33B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/EverythingLM-13b-V2-16K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "sartmis1/starcoder-v2-openapi-special-tokens": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49152}, "TheBloke/Phind-CodeLlama-34B-v1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "TheBloke/Yarn-Llama-2-7B-64K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Dolphin-Llama-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "kfkas/Legal-Llama-2-ko-7b-Chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 46336}, "Ichsan2895/Merak-7B-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "allenai/unifiedqa-v2-t5-base-1251000": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "sagawa/ReactionT5-product-prediction": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 268}, "lmqg/mt5-small-jaquad-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "Narrativa/mT5-base-finetuned-tydiQA-xqa": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "allenai/macaw-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "gagan3012/k2t-new": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "google/t5-efficient-tiny-nl2": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 256, "num_heads": 4, "num_layers": 2, "vocab_size": 32128}, "sam2ai/open_llama_3b_odia_gptq_128_4bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "lmqg/mt5-small-dequad-qg": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "mrm8488/mT5-small-finetuned-tydiqa-for-xqa": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "zjunlp/knowlm-13b-zhixi": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "h2oai/h2ogpt-16k-codellama-13b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "mymusise/gpt2-medium-chinese": {"architectures": ["TFGPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 8021}, "ai-forever/mGPT-13B": {"architectures": ["GPT2LMHeadModel"], "n_embd": 5120, "n_head": 40, "n_inner": null, "n_layer": 40, "vocab_size": 100000}, "TinaLiHF/fined-tuned-T5small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "TheBloke/airoboros-l2-7B-gpt4-2.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Mihakram/AraT5-base-question-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 110080}, "fjungstedt/t5-criteria-text-to-json": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "luqh/ClinicalT5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "h2oai/h2ogpt-16k-codellama-13b-python": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "masakhane/afri-mt5-base": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "YeungNLP/bloom-1b4-zh": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 24, "num_attention_heads": 16, "vocab_size": 46145}, "shekharchatterjee/temp-model-174": {}, "TheBloke/Kimiko-v2-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jeffwan/vicuna-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "kz919/ntk_scaled_open_llama_13b_32k": {"architectures": ["NTKScaledLlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "lmqg/t5-base-squad-qg-ae": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "r3dhummingbird/DialoGPT-small-harrypotter": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "camenduru/MiniGPT4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "TheBloke/open-llama-7b-open-instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "MoinFaisal/Llama-2-7b-chat-finetune": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/CodeLlama-13B-Instruct-GGUF": {}, "fbellame/llama2-pdf-to-quizz-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "fractalego/fact-checking": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "michelecafagna26/gpt2-medium-finetuned-sst2-sentiment": {"architectures": ["GPT2ForSequenceClassification"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/WizardLM-Uncensored-SuperCOT-StoryTelling-30B-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32001}, "TheBloke/Airoboros-7B-GPT4-1-4-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Airoboros-L2-70B-GPT4-m2.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Arc53/docsgpt-7b-falcon": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "alenusch/mt5large-ruparaphraser": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 250112}, "ApoTro/slovak-t5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32100}, "microsoft/dolly-v2-7b-olive-optimized": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50280}, "huggingtweets/gordonramsay": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "prithivida/formal_to_informal_styletransfer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "model-attribution-challenge/gpt2-xl": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_layer": 48, "vocab_size": 50257}, "saiful9379/Bangla_GPT2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 33391}, "deepse/CodeUp-Llama-2-7b-chat-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ziqingyang/chinese-llama-2-13b-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 55296}, "ChandlerU11/t5_fine": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "TheBloke/Guanaco-3B-Uncensored-v2-GPTQ": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "mamiksik/T5-commit-message-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32104}, "conceptofmind/Yarn-Llama-2-13b-64k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "mesolitica/llama-13b-hf-16384-fpf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Sao10K/Stheno-1.2-L2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "gsarti/it5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32103}, "Den4ikAI/FRED-T5-XL-interpreter": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1536, "num_heads": 24, "num_layers": 24, "vocab_size": 50364}, "TheBloke/WizardCoder-Guanaco-15B-V1.1-GPTQ": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49153}, "seonglae/llama-2-7b-chat-hf-gptq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/llama2_7b_chat_uncensored-GGML": {}, "ecosumit/gpt-model": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "allegro/plt5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 50048}, "cointegrated/rut5-small": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 20100}, "it5/it5-large-question-answering": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32103}, "tscholak/1zha5ono": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32102}, "optible/unifiedqa-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "CleverShovel/falcon-7b-instruct-sharded-bf16": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "TheBloke/Pygmalion-13B-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "BlackSamorez/llama-2-tiny-testing": {"architectures": ["LlamaForCausalLM"], "hidden_size": 128, "intermediate_size": 11008, "num_attention_heads": 8, "num_hidden_layers": 2, "vocab_size": 2000}, "ianagra/Llama-2-7b-ALLM-virtual-sales-assistant": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "quantumaikr/KoreanLM-3B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 2048, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "quantumaikr/llama-2-70B-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "Deniskin/gpt3_medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50264}, "ozcangundes/mt5-small-turkish-summarization": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "EleutherAI/pythia-1b-deduped-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 50304}, "flozi00/Llama-2-7b-german-assistant-v3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Mikivis/gpt2-large-lora-stf4": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50257}, "AK270802/DialoGPT-small-harrypotter": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "EleutherAI/pythia-12b-deduped-v0": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50688}, "EricPeter/Llama-2-multilingual": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Pygmalion-2-7B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "miguelvictor/python-gpt2-large": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_layer": 36, "vocab_size": 50257}, "h2oai/h2ogpt-16k-codellama-7b-python": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "ammarinjtkrbh/llama-2-7b-food-search": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "GroNLP/gpt2-small-dutch": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 40000}, "pszemraj/opt-350m-email-generation": {"architectures": ["OPTForCausalLM"], "hidden_size": 1024, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50265}, "caffsean/t5-small-finetuned-keyword-to-text-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "lmqg/mt5-small-dequad-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "yuyijiong/T5-large-sentiment-analysis-Chinese-MultiTask": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32596}, "sonoisa/t5-qiita-title-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "YeungNLP/firefly-bloom-1b4": {"architectures": ["BloomForCausalLM"], "hidden_size": 2048, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 46145}, "samwit/koala-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Vicuna-13B-1-3-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Photolens/OpenOrcaxOpenChat-2-13b-langchain-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "Ichsan2895/Merak-7B-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "flozi00/Llama-2-7b-german-assistant-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ss1612/loki-chat": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "OpenBuddy/openbuddy-falcon-7b-v5-fp16": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 70144}, "wellecks/llmstep-mathlib4-pythia2.8b": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50278}, "dariolopez/llama-2-7b-oasst1-es": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/WizardLM-1.0-Uncensored-CodeLlama-34B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "brad1141/gpt2-finetuned-comp2": {"architectures": ["GPT2ForTokenClassification"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TheBloke/chronos-hermes-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "lizhuang144/flan-t5-large-VG-factual-sg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "nivos/pythia-410m-deduped-finetuned-final-activity-text-10epoch": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 1024, "intermediate_size": 4096, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "HamidRezaAttar/gpt2-product-description-generator": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TheBloke/ORCA_LLaMA_70B_QLoRA-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "lmsys/vicuna-13b-delta-v0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "jacobmorrison/tk-instruct-xl-lora-experiments": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32100}, "GroNLP/gpt2-small-italian": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 30001}, "yihsuan/mt5_chinese_small": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250100}, "YTTD/DialoGPT-medium-souv2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "arubenruben/ptt5-portuguese-cnn-dailymail-azure-pt-pt": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "localmodels/Llama-2-7B-Chat-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "lgaalves/llama-2-13b-chat-platypus": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "it5/it5-large-question-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32103}, "psyche/KoT5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "TheBloke/Llama2-70B-OASST-SFT-v10-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32007}, "deepparag/Aeona": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "lmqg/mt5-small-koquad-qg": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "lmqg/mt5-small-esquad-qg": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "NinedayWang/PolyCoder-0.4B": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 1024, "intermediate_size": 4096, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "ConvLab/t5-small-nlu-multiwoz21": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "SIC98/GPT2-python-code-generator": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "lmqg/mt5-small-itquad-qg": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "kaiyuy/leandojo-lean4-tacgen-byt5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3584, "d_model": 1472, "num_heads": 6, "num_layers": 12, "vocab_size": 384}, "usvsnsp/pythia-6.9b-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "PlanTL-GOB-ES/gpt2-large-bne": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_layer": 36, "vocab_size": 50261}, "jordiclive/flan-t5-11b-summarizer-filtered": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "Jordine/scpoo": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "behnamsh/gpt2_camel_physics": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "lmqg/mt5-small-esquad-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "MerlynMind/merlyn-education-teacher-assistant": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 36, "vocab_size": 50688}, "mesolitica/llama-7b-hf-16384-fpf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "MatthisHoules/rat-t5-qdmr-grounded-with-db": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "persiannlp/mt5-small-parsinlu-qqp-query-paraphrasing": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "lmqg/mt5-small-koquad-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "lmqg/mt5-small-itquad-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "openthaigpt/openthaigpt-gpt2-instructgpt-poc-0.0.4": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50268}, "ChanceFocus/finma-7b-full": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "vivekraina/Llama-2-7b-hf-8bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "dpml/vicuna_mt_450s": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "burberg92/resume_summary": {"architectures": ["BloomForCausalLM"], "hidden_size": 2048, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "Monero/Pygmalion-Metharme-7b-4bit-TopScore": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Icaruas/7bill8k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32002}, "dahara1/ELYZA-japanese-Llama-2-7b-fast-instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 45043}, "TheBloke/Yarn-Llama-2-13B-64K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "prithivida/passive_to_active_styletransfer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "lmqg/mt5-small-frquad-qg": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "JamesStratford/PLord-bot-DialoGPT-medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "yizhangliu/prompt-extend": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 52000}, "lmqg/mt5-small-frquad-ae": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250101}, "Beltenebros/DialoGPT-small-PerionOfGaul": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "sominw/rel23_conll": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "mncai/SGPT-5.8B-wiki-mirae-bank_securities-epoch5": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 16, "num_hidden_layers": 28, "vocab_size": 30080}, "MickyMike/VulRepair": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32105}, "ybelkada/t5-11b-sharded": {"architectures": ["T5WithLMHeadModel"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "Einmalumdiewelt/T5-Base_GNAD_MaxSamples": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "digitous/13B-HyperMantis_GPTQ_4bit-128g": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "weqweasdas/hh_rlhf_rm_open_llama_3b": {"architectures": ["LlamaForSequenceClassification"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/WizardMath-13B-V1.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "ziqingyang/chinese-alpaca-2-7b-16k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 55296}, "valhalla/t5-base-squad": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "ELiRF/mt5-base-dacsa-es": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "abhitopia/question-answer-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32102}, "TurkuNLP/gpt3-finnish-large": {"architectures": ["BloomModel"], "hidden_size": 1536, "n_head": 16, "n_layer": 24, "vocab_size": 131072}, "Abyss-fyf/DialoGPT-small-discord": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TheBloke/OpenOrca-Platypus2-13B-GGML": {}, "TheBloke/Airoboros-L2-7B-2.1-GGUF": {}, "huggingtweets/googleai": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "it5/it5-base-question-answering": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32103}, "woodmtaylor/DialoGPT-medium-Heej": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "kimdwan/t5-base-korean-summarize-LOGAN": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50358}, "Narrativa/mT5-base-finetuned-tydiQA-question-generation": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "huggingtweets/normmacdonald": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "r3dhummingbird/DialoGPT-medium-neku": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "yhavinga/t5-v1.1-base-dutch-cnn-test": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32103}, "kennethhendricks/DialoGPT-medium-jared-hendricks-gen1": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "retrieva-jp/t5-small-long": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "TheBloke/Vigogne-2-7B-Chat-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TigerResearch/tigerbot-7b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 60928}, "Fredithefish/Guanaco-13B-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "consciousAI/question-answering-generative-t5-v1-base-s-q-c": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "TheBloke/open-llama-7B-v2-open-instruct-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "mosama/Llama-2-Medical-Merged-LoRA": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "bullmount/quanIta_t5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32103}, "YeungNLP/bloomz-396m-zh": {"architectures": ["BloomForCausalLM"], "hidden_size": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 46145}, "GreenBitAI/LLaMA-7B-2bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "chgk13/decicoder-1b-openvino-int8": {"architectures": ["DeciCoderForCausalLM"], "hidden_size": 2048, "intermediate_size": 5888, "num_attention_heads": 32, "num_hidden_layers": 20, "vocab_size": 49152}, "bigscience/bloomz-mt": {"architectures": ["BloomForCausalLM"], "n_layer": 70, "num_attention_heads": 112, "vocab_size": 250880}, "LarkAI/codet5p-770m_nl2sql_oig": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "Linly-AI/Chinese-Falcon-7B": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 90046}, "ckip-joint/bloom-3b-zh-instruct": {"architectures": ["BloomForCausalLM"], "hidden_size": 2560, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "sgr23/llama2-fine-tuned-dolly-15k-dto": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "edbeeching/gpt2-imdb": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "cardiffnlp/flan-t5-small-tweet-emotion": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "TheBloke/airoboros-7B-gpt4-1.4-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/CodeLlama-7B-GGUF": {}, "TheBloke/Airoboros-c34B-2.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "byeongal/Ko-DialoGPT": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 51200}, "ismaelfaro/gpt2-poems.en": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "tuner007/t5_abs_qa": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "kennethhendricks/DialoGPT-medium-PowPowGaming": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "DunnBC22/flan-t5-base-text_summarization_data": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "zarakiquemparte/hermeslimarp-l2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "MagicLEMP/llamavocat_13B_mixed_16K": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "4bit/ELYZA-japanese-Llama-2-7b-instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "EnglishVoice/t5-base-us-to-uk-english": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "devanshipatel/t5-gec-english-125k": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "helloollel/vicuna-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "nferroukhi/WizardLM-Uncensored-Falcon-7b-sharded-bf16": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65025}, "dacorvo/tiny-random-gpt2-neuronx": {"intermediate_size": 37, "n_embd": 32, "n_head": 4, "n_inner": null, "n_layer": 5, "vocab_size": 1000}, "JamesStratford/Pidrow-bot-DialoGPT-Large-Feb2023": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50257}, "tsuyuan/Llama-2-7b-unit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 41218}, "OFA-Sys/gsm8k-rft-llama7b2-u13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "uer/gpt2-chinese-ancient": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 25370}, "YTTD/DialoGPT-medium-safv3": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "Neko-Institute-of-Science/LLaMA-65B-HF": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TheBloke/Spicyboros-13B-2.2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "IDEA-CCNL/Randeng-T5-77M-MultiTask-Chinese": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32596}, "coreml-projects/Llama-2-7b-chat-coreml": {"architectures": ["LlamaForCausalLM"], "vocab_size": 32000}, "oscorrea/scores-lince-sm": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "morzecrew/FRED-T5-RefinedPersonaChat": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1536, "num_heads": 24, "num_layers": 24, "vocab_size": 50364}, "anjakuzev/harry_7": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Mythalion-13B-GGUF": {}, "Kryptone/monikAI": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "TheBloke/Luna-AI-Llama2-Uncensored-GGML": {}, "mlabonne/llama-2-7b-miniguanaco": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Hermes-LLongMA-2-7B-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "zlsl/l_erotic_kink_chat": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50259}, "Sao10K/Stheno-Inverted-1.2-L2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "castorini/duot5-base-msmarco": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "mrm8488/t5-base-finetuned-qasc": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "entropy/gpt2_zinc_87m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 2707}, "MarkyMarx/DialoGPT-medium-jimmybot2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "stefan-it/secret-gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Narrativa/byt5-base-tweet-hate-detection": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3968, "d_model": 1536, "num_heads": 12, "num_layers": 18, "vocab_size": 384}, "nicholasKluge/Aira-2-124M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50261}, "TheBloke/Samantha-1.11-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "castorini/monot5-large-msmarco": {"architectures": ["T5Model"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "PoloHuggingface/French_grammar_error_corrector": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32102}, "cambridgeltl/magic_mscoco": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50259}, "Gatozu35/tortoise-tts": {"architectures": ["GPT2InferenceModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 30, "vocab_size": 604}, "abacusai/Giraffe-v1-delta-13b-scaled-16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "flozi00/Llama-2-13B-german-assistant-v3-4bit-autogptq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "HAERAE-HUB/tulu_13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32001}, "doc2query/msmarco-14langs-mt5-base-v1": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "Maciel/T5Corrector-base-v2": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "vilm/vietcuna-3b-v2": {"architectures": ["BloomForCausalLM"], "hidden_size": 2560, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "TitanML/ct2-int8-falcon-7b-instruct": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "ybelkada/llama-7b-GPTQ-test": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "h2oai/h2ogpt-16k-codellama-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32016}, "TigerResearch/tigerbot-70b-chat-v1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 60928}, "Supiri/t5-base-conversation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "msterbentz/t5-base-break-high": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "igorktech/rut5-small-chit-chat-intelligent": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 20100}, "kuleshov/llama-7b-4bit": {"architectures": ["LLaMAForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "hipnologo/gpt2-imdb-finetune": {"architectures": ["GPT2ForSequenceClassification"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "qwopqwop/danbooru-llama-gptq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "t-dai-con/gpt-fine-tuned-v2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Platypus2-70B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "KETI-AIR/ke-t5-base-ko": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 64128}, "doc2query/all-t5-base-v1": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "GT4SD/multitask-text-and-chemistry-t5-base-standard": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "uer/gpt2-medium-chinese-cluecorpussmall": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 21128}, "UBC-NLP/AraT5-base-title-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 110080}, "dsivakumar/text2sql": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "power-greg/super-fast-llm": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2048, "n_head": 16, "n_inner": 2048, "n_layer": 4, "vocab_size": 2048}, "AlexWortega/instruct_rugptMedium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50263}, "hiyouga/Llama-2-Chinese-13b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "frank098/llama2-13b-8k-vnf-virtualization": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "OFA-Sys/gsm8k-rft-llama7b-sample100": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "EnterNameBros/Senko-ai-medium": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "PeanutJar/LLaMa-2-PeanutButter_v19_R8-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Sao10K/Medusa-1.1-L2-7B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "ChrisVCB/DialoGPT-medium-cmjs": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "indonesian-nlp/gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "persiannlp/mt5-small-parsinlu-squad-reading-comprehension": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "stmnk/codet5-small-code-summarization-python": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32100}, "emozilla/LLongMA-2-13b-16k-flash": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "bigscience/bloom-petals": {"architectures": ["BloomForCausalLM"], "hidden_size": 14336, "n_head": 112, "n_layer": 70, "vocab_size": 250880}, "procesaur/gpt2-srlat": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 52000}, "ashwinR/CodeExplainer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32100}, "Chirayu/nl2pandas": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "OpenBuddy/openbuddy-falcon-7b-v6-bf16": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 70144}, "swbaek/tulu_65b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32001}, "huggingtweets/wallstreetbets": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Sultannn/gpt2-ft-id-puisi": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 35000}, "sonoisa/sentence-t5-base-ja-mean-tokens": {"architectures": ["T5Model"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "sdadas/polish-gpt2-xl": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_inner": 6400, "n_layer": 48, "vocab_size": 51200}, "sjrhuschlee/flan-t5-large-squad2": {"architectures": ["T5ForQuestionAnswering"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "Hnabil/t5-address-standardizer": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Gryphe/MythoLogic-Mini-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Athena-v1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Undi95/MythoMax-L2-Kimiko-v2-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "magnifi/llama-augmented-contextual-2-epoch-6-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "doc2query/msmarco-chinese-mt5-base-v1": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "Sakuna/t5_grammar_checker": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Dahoas/pythia-1B-response-full-static-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "localmodels/Vicuna-7B-v1.3-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Sao10K/Stheno-1.1-L2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "mlabonne/drllama-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "IlyaGusev/rugpt3medium_sum_gazeta": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "describeai/gemini": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "mojians/E2E-QA-Mining": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32102}, "TheBloke/Wizard-Vicuna-13B-Uncensored-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "dnagpt/human_gpt2-v1": {"architectures": ["GPT2Model"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 25000}, "heegyu/WizardVicuna-Uncensored-pythia-160m-deduped": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50304}, "maximuslee07/llama-2-7b-rockwell": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "DylanJHJ/fidt5-base-nq": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "laituan245/molt5-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "DancingIguana/music-generation": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 6, "vocab_size": 25000}, "Qiliang/flan-t5-large-summarization-finetuned-xsum": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "TheBloke/Vicuna-7B-CoT-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "hpcaitech/openmoe-base": {"architectures": ["OpenMoeForCausalLM"], "hidden_size": 768, "intermediate_size": 2048, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 256384}, "CalderaAI/13B-Thorns-l2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "CHIH-HUNG/llama-2-13b-FINETUNE1_17w-r4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "IlyaGusev/rugpt_medium_turbo_instructed": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50258}, "pankajmathur/orca_alpaca_3b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 3200, "intermediate_size": 8640, "num_attention_heads": 32, "num_hidden_layers": 26, "vocab_size": 32000}, "TheBloke/Wizard-Vicuna-7B-Uncensored-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "abacusai/Giraffe-v1-delta-13b-scaled-4": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Huginn-v3-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "bloom-testing/test-bloomd-350m-main": {"architectures": ["BloomModel"], "n_inner": null, "n_layer": 24, "num_attention_heads": 16, "vocab_size": 250880}, "AI-Sweden/gpt-sw3-356m": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": 4096, "n_layer": 24, "vocab_size": 64000}, "raymondho/DialoGPT-small-harry": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "TheBloke/airochronos-33B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "TheBloke/OpenChat_v3.2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "ahnyeonchan/OpenOrca-AYT-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "stanford-crfm/expanse-gpt2-small-x777": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "doc2query/msmarco-german-mt5-base-v1": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "ku-nlp/gpt2-medium-japanese-char": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 6000}, "llm-blender/gen_fuser_3b": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32100}, "lomahony/eleuther-pythia2.8b-hh-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50304}, "TheBloke/Llama2-22B-GPLATTY-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "grammarly/coedit-xl-composite": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32100}, "imuncomfortable/DiabloGPT-small-CocoAtarashi": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "kaiyuy/leandojo-lean3-retriever-tacgen-byt5-small": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3584, "d_model": 1472, "num_heads": 6, "num_layers": 12, "vocab_size": 384}, "michaelwzhu/Chinese-LlaMA2-13B-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 55296}, "Xenova/llama2.c-stories110M": {"architectures": ["LlamaForCausalLM"], "hidden_size": 768, "intermediate_size": 2048, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 32000}, "Youngwoo9/T5_Pyeongsan": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50358}, "literallywood/DialoGPT-small-ekansh": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "jondurbin/spicyboros-7b-2.2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "indobenchmark/indogpt": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 40005}, "it5/it5-efficient-small-el32-news-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 32, "vocab_size": 32100}, "mesolitica/finetune-translation-t5-base-standard-bahasa-cased-v2": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "Den4ikAI/FRED-T5-XL_instructor": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1536, "num_heads": 24, "num_layers": 24, "vocab_size": 50365}, "mlabonne/gpt2-GPTQ-4bit": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "steerapi/Llama-2-7b-chat-hf-onnx": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Langboat/bloom-1b4-zh": {"architectures": ["BloomForCausalLM"], "hidden_size": 2048, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 46145}, "neulab/docprompting-codet5-python-doc-retriever": {"architectures": ["BERTScorerForCL"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "AI-Sweden/gpt-sw3-20b": {"architectures": ["GPT2LMHeadModel"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 44, "vocab_size": 64000}, "syndi-models/article-title-generator": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "vgaraujov/Dummy5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "TFLai/Orca-Nova-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32002}, "allenai/tk-instruct-11b-def-pos": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "aspis/gpt2-genre-story-generation": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50267}, "lcw99/t5-base-korean-paraphrase": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 50358}, "Celestinian/TopicGPT": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50257}, "TheBloke/Redmond-Hermes-Coder-GPTQ": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49153}, "TheBloke/h2ogpt-gm-oasst1-en-2048-falcon-7b-v3-GPTQ": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "abhinavkulkarni/meta-llama-Llama-2-13b-chat-hf-w4-g128-awq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "jypppp/llama-2-7b-manual_GPT_ver2": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Llama-2-7B-32K-Instruct-GGML": {}, "TheBloke/Yarn-Llama-2-7B-128K-GGML": {}, "quantumaikr/KoreanLM-llama-2-7B-finetuned": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 46336}, "google/t5-xl-ssm-nq": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "nikokons/gpt2-greek": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 22000}, "NYTK/PULI-GPT-3SX": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50048}, "Futyn-Maker/rugpt3small_based_on_gpt2-finetuned_teachers_quotes_small": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50264}, "localmodels/Llama-2-13B-Chat-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "SebastianSchramm/UniNER-7B-all-GPTQ-4bit-128g-actorder_True": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Airoboros-L2-70B-2.1-Creative-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "casperhansen/vicuna-7b-v1.5-awq-gemv": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "IDEA-CCNL/Wenzhong-GPT2-3.5B": {"architectures": ["GPT2LMHeadModel"], "n_embd": 3072, "n_head": 32, "n_inner": 12288, "n_layer": 30, "vocab_size": 50304}, "antoinelouis/belgpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50257}, "atkh6673/DialoGPT-small-trump": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "huggingface-course/mt5-small-finetuned-amazon-en-es": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "malteos/gpt2-xl-wechsel-german": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_inner": 6400, "n_layer": 48, "vocab_size": 50304}, "KES/caribe-capitalise": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "pszemraj/flan-t5-large-instruct-dolly_hhrlhf": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "Tanmay09516/StableBeluga-7B-sharded-bf16-5GB": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "abhinavkulkarni/codellama-CodeLlama-7b-Python-hf-w4-g128-awq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Spicyboros-7B-2.2-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "huggingtweets/elonmusk": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "BelleGroup/BELLE-7B-2M": {"architectures": ["BloomModel"], "n_inner": null, "n_layer": 30, "num_attention_heads": 32, "vocab_size": 250880}, "snoop2head/Gomoku-GPT2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 256, "n_head": 4, "n_inner": null, "n_layer": 4, "vocab_size": 404}, "AnimusOG/pygmalion-7b-4bit-128g-cuda-2048Token": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/airoboros-l2-7B-gpt4-m2.0-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Juniplayground/Mist_LLaMA-2-7B-1024_V3": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "DataLinguistic/DataLinguistic-34B-V1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32001}, "erikycd/chatbot_hadita": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50262}, "medicalai/ClinicalGPT-base-zh": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "TheBloke/orca_mini_v2_13b-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "NIRVANA/T5_academic_paraphraser": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "josmunpen/mt5-small-spanish-summarization": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "shahp7575/gpt2-horoscopes": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50260}, "yihsuan/best_model_0427_small_long": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250100}, "malteos/bloom-6b4-clp-german-oasst-v0.1": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_layer": 30, "vocab_size": 50272}, "openllmplayground/openalpaca_7b_700bt_preview": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Gaivoronsky/ruGPT-3.5-13B-fp16": {"architectures": ["GPT2LMHeadModel"], "n_embd": 5120, "n_head": 40, "n_inner": null, "n_layer": 40, "vocab_size": 50272}, "universeTBD/astrollama": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "gorkemgoknar/gpt2-small-turkish": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "huggingtweets/joejoinerr": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Prarabdha/T5-Transformer-RickBot": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "beomi/kollama-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 52000}, "mohammadtaghizadeh/flan-t5-base-imdb-text-classification": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "nicholasKluge/Aira-Instruct-774M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1280, "n_head": 20, "n_inner": null, "n_layer": 36, "vocab_size": 50259}, "bhenrym14/airoboros-7b-gpt4-1.4.1-lxctx-PI-16384-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Alireza1044/michael_bert_lm": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 6, "vocab_size": 50257}, "shibing624/gpt2-dialogbot-base-chinese": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 10, "vocab_size": 13317}, "mesolitica/finetune-summarization-ms-t5-base-standard-bahasa-cased": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "lmqg/flan-t5-large-squad-qg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32101}, "circulus/alpaca-7b": {"architectures": ["LlaMAForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "reeducator/vicuna-13b-free": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "flozi00/Llama-2-13b-german-assistant-v6-4bit-autogptq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 37632}, "SasnayaLetovka/tinkoff-zhientaev-model": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50262}, "mesolitica/t5-base-standard-bahasa-cased": {"architectures": ["T5Model"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "EllyPony/flutterbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "pszemraj/flan-t5-xl-grammar-synthesis": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 5120, "d_model": 2048, "num_heads": 32, "num_layers": 24, "vocab_size": 32128}, "jinxuewen/vicuna-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "fireballoon/baichuan-llama-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 64000}, "TheBloke/Vicuna-7B-v1-3-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "scural/arxiv_model": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "Undi95/CodeEngine": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "Codexister/DialoGPT-medium-KafkaBotV1": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "google/t5-xxl-ssm-nq": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 10240, "d_model": 4096, "num_heads": 64, "num_layers": 24, "vocab_size": 32128}, "uer/gpt2-chinese-couplet": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 21128}, "nicholasKluge/Aira-Instruct-355M": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50259}, "HIT-SCIR/huozi-7b-sft": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250880}, "NousResearch/CodeLlama-13b-Instruct-hf-flash": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32016}, "Enno-Ai/vigogne2-enno-13b-sft-lora-4bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "sonoisa/t5-base-japanese-article-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "Kyrmasch/t5-kazakh-qa": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 18947}, "TheBloke/airoboros-13b-gpt4-1.4-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TheBloke/Kimiko-13B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "arya555/vicuna-7b-v1.5-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Geo/gpt2_custom_c_q_and_a": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "laituan245/molt5-small-smiles2caption": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "cloudqi/cqi_brain_memory_summarizer_large_pt_v0": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "ybelkada/bloom-1b7-8bit": {"architectures": ["BloomForCausalLM"], "hidden_size": 2048, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "snipaid/snip-igel-500-v2-adapter-merged": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_layer": 30, "vocab_size": 50304}, "TabbyML/SantaCoder-1B": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 2048, "n_head": 16, "n_inner": 8192, "n_layer": 24, "vocab_size": 49280}, "TheBloke/Guanaco-33B-SuperHOT-8K-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 60, "vocab_size": 32000}, "hanseokhyeon/kullm-polyglot-5.8b-v2-GPTQ": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 4096, "intermediate_size": 16384, "num_attention_heads": 16, "num_hidden_layers": 28, "vocab_size": 30080}, "CAIRE-CedarsSinai/falcon-7b-qlora-chat-support-bot-faq-alzkb-version-1": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "pranavpsv/genre-story-generator-v2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50266}, "nandakishormpai/t5-small-machine-articles-tag-generation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "ITG/DialoGPT-medium-spanish-chitchat": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "4bit/falcon-7b-instruct-GPTQ": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65024}, "OpenBuddy/openbuddy-openllama-7b-v5-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 38449}, "papahawk/keya-560m": {"architectures": ["BloomForCausalLM"], "n_inner": null, "n_layer": 24, "num_attention_heads": 16, "vocab_size": 250880}, "abhinavkulkarni/tiiuae-falcon-40b-instruct-w4-g128-awq": {"architectures": ["RWForCausalLM"], "hidden_size": 8192, "n_head": 128, "n_layer": 60, "vocab_size": 65024}, "funstoryai/immersiveL-exp": {"architectures": ["BloomForCausalLM"], "hidden_size": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "Benson/llama-2-7b-miniguanaco-hf": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "clancystudios/DialoGPT-medium-Morty": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "huggingtweets/realdonaldtrump": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "charanhu/text_to_sql_2": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32102}, "beomi/kollama-13b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 52000}, "IDEA-CCNL/Ziya-LLaMA-13B-v1.1": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 39424}, "nicholasKluge/Aira-Instruct-PT-1B7": {"architectures": ["BloomForCausalLM"], "hidden_size": 2048, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250683}, "TheBloke/Llama2-22B-Daydreamer-v3-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 6656, "intermediate_size": 17920, "num_attention_heads": 52, "num_hidden_layers": 40, "vocab_size": 32000}, "yongzx/pythia-160m-sft-hh": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 768, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 12, "vocab_size": 50304}, "h2oai/h2ogpt-16k-codellama-34b-python": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "nedima68/author_articles_GPT2_textgen_TR": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 52001}, "IronChef/MascotAI_Open_LLaMA_FINAL": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "unionai/pythia-1B-deduped-wikipedia-8bit": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2560, "intermediate_size": 10240, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 50432}, "Chirayu/nl2cql": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32100}, "TheBloke/Nous-Puffin-70B-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TheBloke/Llama-2-70B-Orca-200k-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 28672, "num_attention_heads": 64, "num_hidden_layers": 80, "vocab_size": 32000}, "TheBloke/Llama-2-70B-chat-GGUF": {}, "sartmis1/CodeLlama-34b-instruct-openapi": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "flax-community/bengali-t5-base": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32103}, "csebuetnlp/mT5_m2o_hindi_crossSum": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "huggingtweets/fabrizioromano": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "yshen99/ZhiGuoLiZheng-GPT2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 21128}, "malalejandra/putinspeaks": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "Intel/fid_flan_t5_base_nq": {"architectures": ["FusionInDecoderForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "sjrhuschlee/flan-t5-base-mnli": {"architectures": ["T5ForSequenceClassification"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "TheBloke/Codegen25-7B-mono-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 51200}, "frank098/starcoder-vyatta": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49152}, "Xenova/llama2.c-stories42M": {"architectures": ["LlamaForCausalLM"], "hidden_size": 512, "intermediate_size": 1376, "num_attention_heads": 8, "num_hidden_layers": 8, "vocab_size": 32000}, "flozi00/Llama-2-13b-german-assistant-v5": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 37632}, "Andrei-Alex/Fine-Tuned-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/vicuna-7B-1.1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "sharpbai/alpaca-7b-merged": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}, "Clakmann/t5-base-Clakmann-thesis": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "RuterNorway/Llama-2-13b-chat-norwegian-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "osieosie/bloom-560m-4bit": {"architectures": ["BloomForCausalLM"], "hidden_size": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 250880}, "paulowoicho/t5-podcast-summarisation": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "liujch1998/rainier-large": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "gsdas/qct5": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "nicholasKluge/Aira-Instruct-1B5": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1600, "n_head": 25, "n_inner": null, "n_layer": 48, "vocab_size": 50259}, "kajdun/iubaris-13b-v3_GGML": {}, "csebuetnlp/mT5_m2o_english_crossSum": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "dehio/german-qg-t5-quad": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "emil2000/dialogpt-for-french-language": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "khalidsaifullaah/bengali-lyricist-gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "thinhda/chatbot": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50257}, "Finnish-NLP/llama-7b-finnish": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 64256}, "ehartford/WizardLM-7B-V1.0-Uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/WizardCoder-Guanaco-15B-V1.0-GPTQ": {"architectures": ["GPTBigCodeForCausalLM"], "n_embd": 6144, "n_head": 48, "n_inner": 24576, "n_layer": 40, "vocab_size": 49153}, "DUOMO-Lab/TransGPT-v0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 49954}, "TheBloke/Platypus2-70B-Instruct-GGUF": {}, "lmqg/t5-large-squad-qg-ae": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32101}, "rubentito/hivt5-base-mpdocvqa": {"architectures": ["HiVT5"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "cosimoiaia/Loquace-70m": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "metamyth/jennyNew": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "AlexWortega/LLama2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "totally-not-an-llm/AlpacaCielo2-7b-8k": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/orca_mini_v3_7B-GGML": {}, "zjunlp/knowlm-13b-base-v1.0": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ticoAg/gpt2-tigerbot-pt-zh": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "akshat3492/mT5": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "TheBloke/Falcon-180B-Chat-GGUF": {}, "unicamp-dl/mt5-base-mmarco-v2": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "malteos/gpt2-wechsel-german-ds-meg": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": 3072, "n_layer": 12, "vocab_size": 50304}, "phpaiola/ptt5-base-summ-temario": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "mesolitica/finetune-translation-t5-super-tiny-standard-bahasa-cased": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 256, "num_heads": 6, "num_layers": 2, "vocab_size": 32100}, "ademfatnassi/bonjourGPT-small": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "pr1me/llama2_13b_eros_instruct": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "Xenova/llama2.c-stories15M": {"architectures": ["LlamaForCausalLM"], "hidden_size": 288, "intermediate_size": 768, "num_attention_heads": 6, "num_hidden_layers": 6, "vocab_size": 32000}, "sekarmulyani/gpt2-ulasan-beauty-products-gen": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "akhooli/gpt2-small-arabic-poetry": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50257}, "mrm8488/spanish-t5-small-sqac-for-qa": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32103}, "flozi00/falcon-7b-german-assistant-v2": {"architectures": ["RWForCausalLM"], "hidden_size": 4544, "n_head": 71, "n_layer": 32, "vocab_size": 65040}, "TheBloke/llama-2-13B-chat-limarp-v2-merged-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ticoAg/gpt2-tiger-sft-zh": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "thiagomf/Llama-2-7b-hf-sharded-bf16-1GB": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "THUMT/mGPT": {"architectures": ["GPT2LMHeadModel"], "vocab_size": 250100, "n_embd": 1024, "n_layer": 24, "n_head": 16, "n_inner": 4096}, "lmqg/flan-t5-base-squad-qg": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32101}, "h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-700bt": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "TheBloke/Phind-CodeLlama-34B-Python-v1-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 8192, "intermediate_size": 22016, "num_attention_heads": 64, "num_hidden_layers": 48, "vocab_size": 32000}, "arogov/llama2_13b_chat_uncensored": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "ai-forever/mGPT-1.3B-bulgarian": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2048, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 100000}, "davesoma/SageBeluga13": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "pssubitha/llama-2-7b-sales-force-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "PyaeSoneK/pythia_70m_legalQA": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 512, "intermediate_size": 2048, "num_attention_heads": 8, "num_hidden_layers": 6, "vocab_size": 50304}, "hidude562/OpenMusenet-2.1-L": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1024, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50259}, "abeiler/huggingface-goatLora-goatV9-testData-morePushes": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "abinayam/gpt-2-tamil": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "persiannlp/mt5-base-parsinlu-squad-reading-comprehension": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 250112}, "pierreguillou/t5-base-qa-squad-v1.1-portuguese": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "lchaloupsky/czech-gpt2-oscar": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_layer": 12, "vocab_size": 50257}, "OpenHust/viet-gpt2": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "tiansz/ChatYuan-7B-merge": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "voidful/llama-v2-unit-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 8195}, "taaredikahan23/Llama-2-7b-chat-finetune": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "deutsche-telekom/mt5-small-sum-de-en-v1": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250100}, "hetpandya/t5-small-tapaco": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "sunhao666/chi-sum2": {"architectures": ["T5Model"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 21228}, "smartik/mt5-small-finetuned-gec-0.2": {"architectures": ["MT5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 250112}, "PORTULAN/gervasio-ptbr-base": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 50304}, "clibrain/Llama-2-13b-ft-instruct-es-gptq-4bit": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "soketlabs/bhasha-7b-2k-hi": {"architectures": ["MPTForCausalLM"], "d_model": 4096, "vocab_size": 61772}, "codefuse-ai/CodeFuse-13B": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 5120, "intermediate_size": 20480, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 100831}, "Sentdex/GPyT": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 52000}, "it5/it5-large-news-summarization": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2816, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32103}, "FredZhang7/distilgpt2-stable-diffusion": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 6, "vocab_size": 50257}, "Rostlab/ProstT5_fp16": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 16384, "d_model": 1024, "num_heads": 32, "num_layers": 24, "vocab_size": 150}, "approach0/mathy-vicuna-13B-FFT": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "lighteternal/gpt2-finetuned-greek": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "stanford-crfm/battlestar-gpt2-small-x49": {"architectures": ["GPT2LMHeadModel"], "n_embd": 768, "n_head": 12, "n_inner": null, "n_layer": 12, "vocab_size": 50257}, "stacked-summaries/flan-t5-small-stacked-samsum-1024": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 1024, "d_model": 512, "num_heads": 6, "num_layers": 8, "vocab_size": 32128}, "TigerResearch/tigerbot-7b-base-v1": {"architectures": ["BloomForCausalLM"], "hidden_size": 4096, "n_head": 32, "n_inner": null, "n_layer": 30, "vocab_size": 250680}, "Chang-Su/llama-2-13b-chat-ko": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 39478}, "Clakmann/t5-base-Clakmann-thesis-epoch10": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "yekaraoglann/results": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 512, "num_heads": 8, "num_layers": 6, "vocab_size": 32128}, "bitadin/gpt-4-medium-titles-v2-flan-t5-base-llm-6": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 2048, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "google/t5_11b_trueteacher_and_anli": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 65536, "d_model": 1024, "num_heads": 128, "num_layers": 24, "vocab_size": 32128}, "TaylorAI/Flash-Llama-30M": {"architectures": ["LlamaForCausalLM"], "hidden_size": 384, "intermediate_size": 1024, "num_attention_heads": 12, "num_hidden_layers": 4, "vocab_size": 32000}, "flax-community/t5-base-wikisplit": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 3072, "d_model": 768, "num_heads": 12, "num_layers": 12, "vocab_size": 32128}, "razent/SciFive-large-Pubmed_PMC": {"architectures": ["T5ForConditionalGeneration"], "d_ff": 4096, "d_model": 1024, "num_heads": 16, "num_layers": 24, "vocab_size": 32128}, "inkoziev/rugpt_chitchat": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50258}, "lomahony/eleuther-pythia410m-hh-sft": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 1024, "intermediate_size": 4096, "num_attention_heads": 16, "num_hidden_layers": 24, "vocab_size": 50304}, "TheBloke/Vicuna-13B-v1.3-German-GPTQ": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "emozilla/LLongMA-2-13b-storysummarizer": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32032}, "yongzx/pythia-1b-sft-hh": {"architectures": ["GPTNeoXForCausalLM"], "hidden_size": 2048, "intermediate_size": 8192, "num_attention_heads": 8, "num_hidden_layers": 16, "vocab_size": 50304}, "TheBloke/airoboros-13b-gpt4-1.4-SuperHOT-8K-fp16": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "heegyu/llama-2-ko-7b-chat": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 46336}, "flozi00/Llama-2-7b-german-assistant-v3-4bit-autogptq": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "zarakiquemparte/zararp-l2-7b": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32000}, "Sao10K/Stheno-1.3-L2-13B": {"architectures": ["LlamaForCausalLM"], "hidden_size": 5120, "intermediate_size": 13824, "num_attention_heads": 40, "num_hidden_layers": 40, "vocab_size": 32000}, "TsinghuaAI/CPM-Generate": {"architectures": ["GPT2LMHeadModel"], "n_embd": 2560, "n_head": 32, "n_inner": null, "n_layer": 32, "vocab_size": 30000}, "AlexWortega/instruct_rugptlarge": {"architectures": ["GPT2LMHeadModel"], "n_embd": 1536, "n_head": 16, "n_inner": null, "n_layer": 24, "vocab_size": 50263}, "tatsu-lab/alpaca-7b-wdiff": {"architectures": ["LlamaForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, "num_attention_heads": 32, "num_hidden_layers": 32, "vocab_size": 32001}} \ No newline at end of file diff --git a/asset-manifest.json b/asset-manifest.json index 21303c8..0dcf5e5 100644 --- a/asset-manifest.json +++ b/asset-manifest.json @@ -1,15 +1,15 @@ { "files": { - "main.css": "/gpu_poor/static/css/main.66e7bbdc.css", - "main.js": "/gpu_poor/static/js/main.8b24599d.js", + "main.css": "/gpu_poor/static/css/main.456c3b59.css", + "main.js": "/gpu_poor/static/js/main.48479645.js", "static/js/787.dccdf937.chunk.js": "/gpu_poor/static/js/787.dccdf937.chunk.js", "index.html": "/gpu_poor/index.html", - "main.66e7bbdc.css.map": "/gpu_poor/static/css/main.66e7bbdc.css.map", - "main.8b24599d.js.map": "/gpu_poor/static/js/main.8b24599d.js.map", + "main.456c3b59.css.map": "/gpu_poor/static/css/main.456c3b59.css.map", + "main.48479645.js.map": "/gpu_poor/static/js/main.48479645.js.map", "787.dccdf937.chunk.js.map": "/gpu_poor/static/js/787.dccdf937.chunk.js.map" }, "entrypoints": [ - "static/css/main.66e7bbdc.css", - "static/js/main.8b24599d.js" + "static/css/main.456c3b59.css", + "static/js/main.48479645.js" ] } \ No newline at end of file diff --git a/index.html b/index.html index 3279e2e..443b649 100644 --- a/index.html +++ b/index.html @@ -1 +1 @@ -
a||125d?(a.sortIndex=c,f(t,a),null===h(r)&&a===h(t)&&(B?(E(L),L=-1):B=!0,K(H,c-d))):(a.sortIndex=e,f(r,a),A||z||(A=!0,I(J)));return a};\nexports.unstable_shouldYield=M;exports.unstable_wrapCallback=function(a){var b=y;return function(){var c=y;y=b;try{return a.apply(this,arguments)}finally{y=c}}};\n","'use strict';\n\nif (process.env.NODE_ENV === 'production') {\n module.exports = require('./cjs/scheduler.production.min.js');\n} else {\n module.exports = require('./cjs/scheduler.development.js');\n}\n","/**\n * Copyright (c) 2014-present, Facebook, Inc.\n *\n * This source code is licensed under the MIT license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n'use strict';\n\n/**\n * Similar to invariant but only logs a warning if the condition is not met.\n * This can be used to log issues in development environments in critical\n * paths. Removing the logging code for production environments will keep the\n * same logic and follow the same code paths.\n */\n\nvar __DEV__ = process.env.NODE_ENV !== 'production';\n\nvar warning = function() {};\n\nif (__DEV__) {\n var printWarning = function printWarning(format, args) {\n var len = arguments.length;\n args = new Array(len > 1 ? len - 1 : 0);\n for (var key = 1; key < len; key++) {\n args[key - 1] = arguments[key];\n }\n var argIndex = 0;\n var message = 'Warning: ' +\n format.replace(/%s/g, function() {\n return args[argIndex++];\n });\n if (typeof console !== 'undefined') {\n console.error(message);\n }\n try {\n // --- Welcome to debugging React ---\n // This error was thrown as a convenience so that you can use this stack\n // to find the callsite that caused this warning to fire.\n throw new Error(message);\n } catch (x) {}\n }\n\n warning = function(condition, format, args) {\n var len = arguments.length;\n args = new Array(len > 2 ? len - 2 : 0);\n for (var key = 2; key < len; key++) {\n args[key - 2] = arguments[key];\n }\n if (format === undefined) {\n throw new Error(\n '`warning(condition, format, ...args)` requires a warning ' +\n 'message argument'\n );\n }\n if (!condition) {\n printWarning.apply(null, [format].concat(args));\n }\n };\n}\n\nmodule.exports = warning;\n","// The module cache\nvar __webpack_module_cache__ = {};\n\n// The require function\nfunction __webpack_require__(moduleId) {\n\t// Check if module is in cache\n\tvar cachedModule = __webpack_module_cache__[moduleId];\n\tif (cachedModule !== undefined) {\n\t\treturn cachedModule.exports;\n\t}\n\t// Create a new module (and put it into the cache)\n\tvar module = __webpack_module_cache__[moduleId] = {\n\t\t// no module.id needed\n\t\t// no module.loaded needed\n\t\texports: {}\n\t};\n\n\t// Execute the module function\n\t__webpack_modules__[moduleId](module, module.exports, __webpack_require__);\n\n\t// Return the exports of the module\n\treturn module.exports;\n}\n\n// expose the modules object (__webpack_modules__)\n__webpack_require__.m = __webpack_modules__;\n\n","// getDefaultExport function for compatibility with non-harmony modules\n__webpack_require__.n = function(module) {\n\tvar getter = module && module.__esModule ?\n\t\tfunction() { return module['default']; } :\n\t\tfunction() { return module; };\n\t__webpack_require__.d(getter, { a: getter });\n\treturn getter;\n};","// define getter functions for harmony exports\n__webpack_require__.d = function(exports, definition) {\n\tfor(var key in definition) {\n\t\tif(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n\t\t\tObject.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n\t\t}\n\t}\n};","__webpack_require__.f = {};\n// This file contains only the entry chunk.\n// The chunk loading function for additional chunks\n__webpack_require__.e = function(chunkId) {\n\treturn Promise.all(Object.keys(__webpack_require__.f).reduce(function(promises, key) {\n\t\t__webpack_require__.f[key](chunkId, promises);\n\t\treturn promises;\n\t}, []));\n};","// This function allow to reference async chunks\n__webpack_require__.u = function(chunkId) {\n\t// return url for filenames based on template\n\treturn \"static/js/\" + chunkId + \".\" + \"dccdf937\" + \".chunk.js\";\n};","// This function allow to reference async chunks\n__webpack_require__.miniCssF = function(chunkId) {\n\t// return url for filenames based on template\n\treturn undefined;\n};","__webpack_require__.o = function(obj, prop) { return Object.prototype.hasOwnProperty.call(obj, prop); }","var inProgress = {};\nvar dataWebpackPrefix = \"gpu_mem:\";\n// loadScript function to load a script via script tag\n__webpack_require__.l = function(url, done, key, chunkId) {\n\tif(inProgress[url]) { inProgress[url].push(done); return; }\n\tvar script, needAttach;\n\tif(key !== undefined) {\n\t\tvar scripts = document.getElementsByTagName(\"script\");\n\t\tfor(var i = 0; i < scripts.length; i++) {\n\t\t\tvar s = scripts[i];\n\t\t\tif(s.getAttribute(\"src\") == url || s.getAttribute(\"data-webpack\") == dataWebpackPrefix + key) { script = s; break; }\n\t\t}\n\t}\n\tif(!script) {\n\t\tneedAttach = true;\n\t\tscript = document.createElement('script');\n\n\t\tscript.charset = 'utf-8';\n\t\tscript.timeout = 120;\n\t\tif (__webpack_require__.nc) {\n\t\t\tscript.setAttribute(\"nonce\", __webpack_require__.nc);\n\t\t}\n\t\tscript.setAttribute(\"data-webpack\", dataWebpackPrefix + key);\n\n\t\tscript.src = url;\n\t}\n\tinProgress[url] = [done];\n\tvar onScriptComplete = function(prev, event) {\n\t\t// avoid mem leaks in IE.\n\t\tscript.onerror = script.onload = null;\n\t\tclearTimeout(timeout);\n\t\tvar doneFns = inProgress[url];\n\t\tdelete inProgress[url];\n\t\tscript.parentNode && script.parentNode.removeChild(script);\n\t\tdoneFns && doneFns.forEach(function(fn) { return fn(event); });\n\t\tif(prev) return prev(event);\n\t}\n\tvar timeout = setTimeout(onScriptComplete.bind(null, undefined, { type: 'timeout', target: script }), 120000);\n\tscript.onerror = onScriptComplete.bind(null, script.onerror);\n\tscript.onload = onScriptComplete.bind(null, script.onload);\n\tneedAttach && document.head.appendChild(script);\n};","// define __esModule on exports\n__webpack_require__.r = function(exports) {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","__webpack_require__.p = \"/gpu_poor/\";","// no baseURI\n\n// object to store loaded and loading chunks\n// undefined = chunk not loaded, null = chunk preloaded/prefetched\n// [resolve, reject, Promise] = chunk loading, 0 = chunk loaded\nvar installedChunks = {\n\t179: 0\n};\n\n__webpack_require__.f.j = function(chunkId, promises) {\n\t\t// JSONP chunk loading for javascript\n\t\tvar installedChunkData = __webpack_require__.o(installedChunks, chunkId) ? installedChunks[chunkId] : undefined;\n\t\tif(installedChunkData !== 0) { // 0 means \"already installed\".\n\n\t\t\t// a Promise means \"currently loading\".\n\t\t\tif(installedChunkData) {\n\t\t\t\tpromises.push(installedChunkData[2]);\n\t\t\t} else {\n\t\t\t\tif(true) { // all chunks have JS\n\t\t\t\t\t// setup Promise in chunk cache\n\t\t\t\t\tvar promise = new Promise(function(resolve, reject) { installedChunkData = installedChunks[chunkId] = [resolve, reject]; });\n\t\t\t\t\tpromises.push(installedChunkData[2] = promise);\n\n\t\t\t\t\t// start chunk loading\n\t\t\t\t\tvar url = __webpack_require__.p + __webpack_require__.u(chunkId);\n\t\t\t\t\t// create error before stack unwound to get useful stacktrace later\n\t\t\t\t\tvar error = new Error();\n\t\t\t\t\tvar loadingEnded = function(event) {\n\t\t\t\t\t\tif(__webpack_require__.o(installedChunks, chunkId)) {\n\t\t\t\t\t\t\tinstalledChunkData = installedChunks[chunkId];\n\t\t\t\t\t\t\tif(installedChunkData !== 0) installedChunks[chunkId] = undefined;\n\t\t\t\t\t\t\tif(installedChunkData) {\n\t\t\t\t\t\t\t\tvar errorType = event && (event.type === 'load' ? 'missing' : event.type);\n\t\t\t\t\t\t\t\tvar realSrc = event && event.target && event.target.src;\n\t\t\t\t\t\t\t\terror.message = 'Loading chunk ' + chunkId + ' failed.\\n(' + errorType + ': ' + realSrc + ')';\n\t\t\t\t\t\t\t\terror.name = 'ChunkLoadError';\n\t\t\t\t\t\t\t\terror.type = errorType;\n\t\t\t\t\t\t\t\terror.request = realSrc;\n\t\t\t\t\t\t\t\tinstalledChunkData[1](error);\n\t\t\t\t\t\t\t}\n\t\t\t\t\t\t}\n\t\t\t\t\t};\n\t\t\t\t\t__webpack_require__.l(url, loadingEnded, \"chunk-\" + chunkId, chunkId);\n\t\t\t\t}\n\t\t\t}\n\t\t}\n};\n\n// no prefetching\n\n// no preloaded\n\n// no HMR\n\n// no HMR manifest\n\n// no on chunks loaded\n\n// install a JSONP callback for chunk loading\nvar webpackJsonpCallback = function(parentChunkLoadingFunction, data) {\n\tvar chunkIds = data[0];\n\tvar moreModules = data[1];\n\tvar runtime = data[2];\n\t// add \"moreModules\" to the modules object,\n\t// then flag all \"chunkIds\" as loaded and fire callback\n\tvar moduleId, chunkId, i = 0;\n\tif(chunkIds.some(function(id) { return installedChunks[id] !== 0; })) {\n\t\tfor(moduleId in moreModules) {\n\t\t\tif(__webpack_require__.o(moreModules, moduleId)) {\n\t\t\t\t__webpack_require__.m[moduleId] = moreModules[moduleId];\n\t\t\t}\n\t\t}\n\t\tif(runtime) var result = runtime(__webpack_require__);\n\t}\n\tif(parentChunkLoadingFunction) parentChunkLoadingFunction(data);\n\tfor(;i < chunkIds.length; i++) {\n\t\tchunkId = chunkIds[i];\n\t\tif(__webpack_require__.o(installedChunks, chunkId) && installedChunks[chunkId]) {\n\t\t\tinstalledChunks[chunkId][0]();\n\t\t}\n\t\tinstalledChunks[chunkId] = 0;\n\t}\n\n}\n\nvar chunkLoadingGlobal = self[\"webpackChunkgpu_mem\"] = self[\"webpackChunkgpu_mem\"] || [];\nchunkLoadingGlobal.forEach(webpackJsonpCallback.bind(null, 0));\nchunkLoadingGlobal.push = webpackJsonpCallback.bind(null, chunkLoadingGlobal.push.bind(chunkLoadingGlobal));","export default function _typeof(o) {\n \"@babel/helpers - typeof\";\n\n return _typeof = \"function\" == typeof Symbol && \"symbol\" == typeof Symbol.iterator ? function (o) {\n return typeof o;\n } : function (o) {\n return o && \"function\" == typeof Symbol && o.constructor === Symbol && o !== Symbol.prototype ? \"symbol\" : typeof o;\n }, _typeof(o);\n}","import _typeof from \"./typeof.js\";\nimport toPrimitive from \"./toPrimitive.js\";\nexport default function _toPropertyKey(arg) {\n var key = toPrimitive(arg, \"string\");\n return _typeof(key) === \"symbol\" ? key : String(key);\n}","import _typeof from \"./typeof.js\";\nexport default function _toPrimitive(input, hint) {\n if (_typeof(input) !== \"object\" || input === null) return input;\n var prim = input[Symbol.toPrimitive];\n if (prim !== undefined) {\n var res = prim.call(input, hint || \"default\");\n if (_typeof(res) !== \"object\") return res;\n throw new TypeError(\"@@toPrimitive must return a primitive value.\");\n }\n return (hint === \"string\" ? String : Number)(input);\n}","import toPropertyKey from \"./toPropertyKey.js\";\nexport default function _defineProperty(obj, key, value) {\n key = toPropertyKey(key);\n if (key in obj) {\n Object.defineProperty(obj, key, {\n value: value,\n enumerable: true,\n configurable: true,\n writable: true\n });\n } else {\n obj[key] = value;\n }\n return obj;\n}","import defineProperty from \"./defineProperty.js\";\nfunction ownKeys(e, r) {\n var t = Object.keys(e);\n if (Object.getOwnPropertySymbols) {\n var o = Object.getOwnPropertySymbols(e);\n r && (o = o.filter(function (r) {\n return Object.getOwnPropertyDescriptor(e, r).enumerable;\n })), t.push.apply(t, o);\n }\n return t;\n}\nexport default function _objectSpread2(e) {\n for (var r = 1; r < arguments.length; r++) {\n var t = null != arguments[r] ? arguments[r] : {};\n r % 2 ? ownKeys(Object(t), !0).forEach(function (r) {\n defineProperty(e, r, t[r]);\n }) : Object.getOwnPropertyDescriptors ? Object.defineProperties(e, Object.getOwnPropertyDescriptors(t)) : ownKeys(Object(t)).forEach(function (r) {\n Object.defineProperty(e, r, Object.getOwnPropertyDescriptor(t, r));\n });\n }\n return e;\n}","export default function _arrayLikeToArray(arr, len) {\n if (len == null || len > arr.length) len = arr.length;\n for (var i = 0, arr2 = new Array(len); i < len; i++) arr2[i] = arr[i];\n return arr2;\n}","import arrayLikeToArray from \"./arrayLikeToArray.js\";\nexport default function _unsupportedIterableToArray(o, minLen) {\n if (!o) return;\n if (typeof o === \"string\") return arrayLikeToArray(o, minLen);\n var n = Object.prototype.toString.call(o).slice(8, -1);\n if (n === \"Object\" && o.constructor) n = o.constructor.name;\n if (n === \"Map\" || n === \"Set\") return Array.from(o);\n if (n === \"Arguments\" || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n)) return arrayLikeToArray(o, minLen);\n}","import arrayWithHoles from \"./arrayWithHoles.js\";\nimport iterableToArrayLimit from \"./iterableToArrayLimit.js\";\nimport unsupportedIterableToArray from \"./unsupportedIterableToArray.js\";\nimport nonIterableRest from \"./nonIterableRest.js\";\nexport default function _slicedToArray(arr, i) {\n return arrayWithHoles(arr) || iterableToArrayLimit(arr, i) || unsupportedIterableToArray(arr, i) || nonIterableRest();\n}","export default function _arrayWithHoles(arr) {\n if (Array.isArray(arr)) return arr;\n}","export default function _iterableToArrayLimit(r, l) {\n var t = null == r ? null : \"undefined\" != typeof Symbol && r[Symbol.iterator] || r[\"@@iterator\"];\n if (null != t) {\n var e,\n n,\n i,\n u,\n a = [],\n f = !0,\n o = !1;\n try {\n if (i = (t = t.call(r)).next, 0 === l) {\n if (Object(t) !== t) return;\n f = !1;\n } else for (; !(f = (e = i.call(t)).done) && (a.push(e.value), a.length !== l); f = !0);\n } catch (r) {\n o = !0, n = r;\n } finally {\n try {\n if (!f && null != t[\"return\"] && (u = t[\"return\"](), Object(u) !== u)) return;\n } finally {\n if (o) throw n;\n }\n }\n return a;\n }\n}","export default function _nonIterableRest() {\n throw new TypeError(\"Invalid attempt to destructure non-iterable instance.\\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.\");\n}","import _typeof from \"./typeof.js\";\nexport default function _regeneratorRuntime() {\n \"use strict\"; /*! regenerator-runtime -- Copyright (c) 2014-present, Facebook, Inc. -- license (MIT): https://github.com/facebook/regenerator/blob/main/LICENSE */\n _regeneratorRuntime = function _regeneratorRuntime() {\n return e;\n };\n var t,\n e = {},\n r = Object.prototype,\n n = r.hasOwnProperty,\n o = Object.defineProperty || function (t, e, r) {\n t[e] = r.value;\n },\n i = \"function\" == typeof Symbol ? Symbol : {},\n a = i.iterator || \"@@iterator\",\n c = i.asyncIterator || \"@@asyncIterator\",\n u = i.toStringTag || \"@@toStringTag\";\n function define(t, e, r) {\n return Object.defineProperty(t, e, {\n value: r,\n enumerable: !0,\n configurable: !0,\n writable: !0\n }), t[e];\n }\n try {\n define({}, \"\");\n } catch (t) {\n define = function define(t, e, r) {\n return t[e] = r;\n };\n }\n function wrap(t, e, r, n) {\n var i = e && e.prototype instanceof Generator ? e : Generator,\n a = Object.create(i.prototype),\n c = new Context(n || []);\n return o(a, \"_invoke\", {\n value: makeInvokeMethod(t, r, c)\n }), a;\n }\n function tryCatch(t, e, r) {\n try {\n return {\n type: \"normal\",\n arg: t.call(e, r)\n };\n } catch (t) {\n return {\n type: \"throw\",\n arg: t\n };\n }\n }\n e.wrap = wrap;\n var h = \"suspendedStart\",\n l = \"suspendedYield\",\n f = \"executing\",\n s = \"completed\",\n y = {};\n function Generator() {}\n function GeneratorFunction() {}\n function GeneratorFunctionPrototype() {}\n var p = {};\n define(p, a, function () {\n return this;\n });\n var d = Object.getPrototypeOf,\n v = d && d(d(values([])));\n v && v !== r && n.call(v, a) && (p = v);\n var g = GeneratorFunctionPrototype.prototype = Generator.prototype = Object.create(p);\n function defineIteratorMethods(t) {\n [\"next\", \"throw\", \"return\"].forEach(function (e) {\n define(t, e, function (t) {\n return this._invoke(e, t);\n });\n });\n }\n function AsyncIterator(t, e) {\n function invoke(r, o, i, a) {\n var c = tryCatch(t[r], t, o);\n if (\"throw\" !== c.type) {\n var u = c.arg,\n h = u.value;\n return h && \"object\" == _typeof(h) && n.call(h, \"__await\") ? e.resolve(h.__await).then(function (t) {\n invoke(\"next\", t, i, a);\n }, function (t) {\n invoke(\"throw\", t, i, a);\n }) : e.resolve(h).then(function (t) {\n u.value = t, i(u);\n }, function (t) {\n return invoke(\"throw\", t, i, a);\n });\n }\n a(c.arg);\n }\n var r;\n o(this, \"_invoke\", {\n value: function value(t, n) {\n function callInvokeWithMethodAndArg() {\n return new e(function (e, r) {\n invoke(t, n, e, r);\n });\n }\n return r = r ? r.then(callInvokeWithMethodAndArg, callInvokeWithMethodAndArg) : callInvokeWithMethodAndArg();\n }\n });\n }\n function makeInvokeMethod(e, r, n) {\n var o = h;\n return function (i, a) {\n if (o === f) throw new Error(\"Generator is already running\");\n if (o === s) {\n if (\"throw\" === i) throw a;\n return {\n value: t,\n done: !0\n };\n }\n for (n.method = i, n.arg = a;;) {\n var c = n.delegate;\n if (c) {\n var u = maybeInvokeDelegate(c, n);\n if (u) {\n if (u === y) continue;\n return u;\n }\n }\n if (\"next\" === n.method) n.sent = n._sent = n.arg;else if (\"throw\" === n.method) {\n if (o === h) throw o = s, n.arg;\n n.dispatchException(n.arg);\n } else \"return\" === n.method && n.abrupt(\"return\", n.arg);\n o = f;\n var p = tryCatch(e, r, n);\n if (\"normal\" === p.type) {\n if (o = n.done ? s : l, p.arg === y) continue;\n return {\n value: p.arg,\n done: n.done\n };\n }\n \"throw\" === p.type && (o = s, n.method = \"throw\", n.arg = p.arg);\n }\n };\n }\n function maybeInvokeDelegate(e, r) {\n var n = r.method,\n o = e.iterator[n];\n if (o === t) return r.delegate = null, \"throw\" === n && e.iterator[\"return\"] && (r.method = \"return\", r.arg = t, maybeInvokeDelegate(e, r), \"throw\" === r.method) || \"return\" !== n && (r.method = \"throw\", r.arg = new TypeError(\"The iterator does not provide a '\" + n + \"' method\")), y;\n var i = tryCatch(o, e.iterator, r.arg);\n if (\"throw\" === i.type) return r.method = \"throw\", r.arg = i.arg, r.delegate = null, y;\n var a = i.arg;\n return a ? a.done ? (r[e.resultName] = a.value, r.next = e.nextLoc, \"return\" !== r.method && (r.method = \"next\", r.arg = t), r.delegate = null, y) : a : (r.method = \"throw\", r.arg = new TypeError(\"iterator result is not an object\"), r.delegate = null, y);\n }\n function pushTryEntry(t) {\n var e = {\n tryLoc: t[0]\n };\n 1 in t && (e.catchLoc = t[1]), 2 in t && (e.finallyLoc = t[2], e.afterLoc = t[3]), this.tryEntries.push(e);\n }\n function resetTryEntry(t) {\n var e = t.completion || {};\n e.type = \"normal\", delete e.arg, t.completion = e;\n }\n function Context(t) {\n this.tryEntries = [{\n tryLoc: \"root\"\n }], t.forEach(pushTryEntry, this), this.reset(!0);\n }\n function values(e) {\n if (e || \"\" === e) {\n var r = e[a];\n if (r) return r.call(e);\n if (\"function\" == typeof e.next) return e;\n if (!isNaN(e.length)) {\n var o = -1,\n i = function next() {\n for (; ++o < e.length;) if (n.call(e, o)) return next.value = e[o], next.done = !1, next;\n return next.value = t, next.done = !0, next;\n };\n return i.next = i;\n }\n }\n throw new TypeError(_typeof(e) + \" is not iterable\");\n }\n return GeneratorFunction.prototype = GeneratorFunctionPrototype, o(g, \"constructor\", {\n value: GeneratorFunctionPrototype,\n configurable: !0\n }), o(GeneratorFunctionPrototype, \"constructor\", {\n value: GeneratorFunction,\n configurable: !0\n }), GeneratorFunction.displayName = define(GeneratorFunctionPrototype, u, \"GeneratorFunction\"), e.isGeneratorFunction = function (t) {\n var e = \"function\" == typeof t && t.constructor;\n return !!e && (e === GeneratorFunction || \"GeneratorFunction\" === (e.displayName || e.name));\n }, e.mark = function (t) {\n return Object.setPrototypeOf ? Object.setPrototypeOf(t, GeneratorFunctionPrototype) : (t.__proto__ = GeneratorFunctionPrototype, define(t, u, \"GeneratorFunction\")), t.prototype = Object.create(g), t;\n }, e.awrap = function (t) {\n return {\n __await: t\n };\n }, defineIteratorMethods(AsyncIterator.prototype), define(AsyncIterator.prototype, c, function () {\n return this;\n }), e.AsyncIterator = AsyncIterator, e.async = function (t, r, n, o, i) {\n void 0 === i && (i = Promise);\n var a = new AsyncIterator(wrap(t, r, n, o), i);\n return e.isGeneratorFunction(r) ? a : a.next().then(function (t) {\n return t.done ? t.value : a.next();\n });\n }, defineIteratorMethods(g), define(g, u, \"Generator\"), define(g, a, function () {\n return this;\n }), define(g, \"toString\", function () {\n return \"[object Generator]\";\n }), e.keys = function (t) {\n var e = Object(t),\n r = [];\n for (var n in e) r.push(n);\n return r.reverse(), function next() {\n for (; r.length;) {\n var t = r.pop();\n if (t in e) return next.value = t, next.done = !1, next;\n }\n return next.done = !0, next;\n };\n }, e.values = values, Context.prototype = {\n constructor: Context,\n reset: function reset(e) {\n if (this.prev = 0, this.next = 0, this.sent = this._sent = t, this.done = !1, this.delegate = null, this.method = \"next\", this.arg = t, this.tryEntries.forEach(resetTryEntry), !e) for (var r in this) \"t\" === r.charAt(0) && n.call(this, r) && !isNaN(+r.slice(1)) && (this[r] = t);\n },\n stop: function stop() {\n this.done = !0;\n var t = this.tryEntries[0].completion;\n if (\"throw\" === t.type) throw t.arg;\n return this.rval;\n },\n dispatchException: function dispatchException(e) {\n if (this.done) throw e;\n var r = this;\n function handle(n, o) {\n return a.type = \"throw\", a.arg = e, r.next = n, o && (r.method = \"next\", r.arg = t), !!o;\n }\n for (var o = this.tryEntries.length - 1; o >= 0; --o) {\n var i = this.tryEntries[o],\n a = i.completion;\n if (\"root\" === i.tryLoc) return handle(\"end\");\n if (i.tryLoc <= this.prev) {\n var c = n.call(i, \"catchLoc\"),\n u = n.call(i, \"finallyLoc\");\n if (c && u) {\n if (this.prev < i.catchLoc) return handle(i.catchLoc, !0);\n if (this.prev < i.finallyLoc) return handle(i.finallyLoc);\n } else if (c) {\n if (this.prev < i.catchLoc) return handle(i.catchLoc, !0);\n } else {\n if (!u) throw new Error(\"try statement without catch or finally\");\n if (this.prev < i.finallyLoc) return handle(i.finallyLoc);\n }\n }\n }\n },\n abrupt: function abrupt(t, e) {\n for (var r = this.tryEntries.length - 1; r >= 0; --r) {\n var o = this.tryEntries[r];\n if (o.tryLoc <= this.prev && n.call(o, \"finallyLoc\") && this.prev < o.finallyLoc) {\n var i = o;\n break;\n }\n }\n i && (\"break\" === t || \"continue\" === t) && i.tryLoc <= e && e <= i.finallyLoc && (i = null);\n var a = i ? i.completion : {};\n return a.type = t, a.arg = e, i ? (this.method = \"next\", this.next = i.finallyLoc, y) : this.complete(a);\n },\n complete: function complete(t, e) {\n if (\"throw\" === t.type) throw t.arg;\n return \"break\" === t.type || \"continue\" === t.type ? this.next = t.arg : \"return\" === t.type ? (this.rval = this.arg = t.arg, this.method = \"return\", this.next = \"end\") : \"normal\" === t.type && e && (this.next = e), y;\n },\n finish: function finish(t) {\n for (var e = this.tryEntries.length - 1; e >= 0; --e) {\n var r = this.tryEntries[e];\n if (r.finallyLoc === t) return this.complete(r.completion, r.afterLoc), resetTryEntry(r), y;\n }\n },\n \"catch\": function _catch(t) {\n for (var e = this.tryEntries.length - 1; e >= 0; --e) {\n var r = this.tryEntries[e];\n if (r.tryLoc === t) {\n var n = r.completion;\n if (\"throw\" === n.type) {\n var o = n.arg;\n resetTryEntry(r);\n }\n return o;\n }\n }\n throw new Error(\"illegal catch attempt\");\n },\n delegateYield: function delegateYield(e, r, n) {\n return this.delegate = {\n iterator: values(e),\n resultName: r,\n nextLoc: n\n }, \"next\" === this.method && (this.arg = t), y;\n }\n }, e;\n}","function asyncGeneratorStep(gen, resolve, reject, _next, _throw, key, arg) {\n try {\n var info = gen[key](arg);\n var value = info.value;\n } catch (error) {\n reject(error);\n return;\n }\n if (info.done) {\n resolve(value);\n } else {\n Promise.resolve(value).then(_next, _throw);\n }\n}\nexport default function _asyncToGenerator(fn) {\n return function () {\n var self = this,\n args = arguments;\n return new Promise(function (resolve, reject) {\n var gen = fn.apply(self, args);\n function _next(value) {\n asyncGeneratorStep(gen, resolve, reject, _next, _throw, \"next\", value);\n }\n function _throw(err) {\n asyncGeneratorStep(gen, resolve, reject, _next, _throw, \"throw\", err);\n }\n _next(undefined);\n });\n };\n}","import React from 'react';\n\nfunction TextInput(props) {\n// const [value, setValue] = useState(''); // useState hook to manage the input value\n \n const fun = props.setValue;\n\n return (\n fun(e.target.value)}\n placeholder={props.placeholder}\n />\n );\n}\n\nexport default TextInput;\n","import React, { useState, useEffect } from \"react\";\nimport TextInput from \"./textBox\";\nimport Modal from \"react-modal\";\n\nconst billion = 1000000000;\nlet configPath = \"/gpu_poor/all_configs.json\";\nif (\n window.location.hostname === \"localhost\" ||\n window.location.hostname === \"127.0.0.1\"\n) {\n configPath = \"/gpu_poor/all_configs.json\";\n}\nconst MAX_FILE_SIZE = 500000;\n\n// console.log(configPath);\n\n/*\ndropdownTrnOrNot: 'inf', 'trn', 'inf_vLLM','inf_exL','inf_ggml'\ndropdownFullOrNot: 'no_trn', 'lora_trn, 'full_trn'\ndropdownOpt: 'no_opt', 'sgd_opt','adam_opt'\ndropdownQuant: 'no_quant','bnb_int8','bnb_q4'\n*/\nconst specialNamesMapping = {\n \"meta-llama/Llama-2-7b\": \"meta-llama/Llama-2-7b-hf\",\n \"meta-llama/Llama-13-7b\": \"meta-llama/Llama-13-7b-hf\",\n \"meta-llama/Llama-2-70b\": \"meta-llama/Llama-13-70b-hf\",\n};\n\nfunction specialMapping(name) {\n if (name in specialNamesMapping) {\n return specialNamesMapping[name];\n }\n return name;\n}\n\nfunction getKey(keys, obj, defaultVal) {\n let toReturn = null;\n for (const key of keys) {\n if (obj.hasOwnProperty(key)) {\n // console.log(\"found: \",key);\n toReturn = obj[key];\n break;\n }\n }\n if (toReturn == null) {\n return defaultVal;\n }\n return toReturn;\n}\n\nfunction computeOverheadGGML(contextLen) {\n return 0.1 * contextLen;\n}\n\nfunction computeInferenceOnlyActivationMemory(contextLen, parsedConfig) {\n const hiddenDim = parsedConfig[\"hiddenDim\"];\n const heads = parsedConfig[\"heads\"];\n\n //return ((1000*4096*5)*2 + (1000*1000*32*2))/(1024*1024)\n return (\n (contextLen * hiddenDim * 5 * 2 + contextLen * contextLen * heads * 2) /\n (1024 * 1024)\n );\n}\n\n//floatBytes, quant\nfunction computeModelSizeGGML(parsedConfig, quant) {\n const vocab = parsedConfig[\"vocab\"],\n heads = parsedConfig[\"heads\"],\n numLayers = parsedConfig[\"num_layers\"],\n hiddenDim = parsedConfig[\"hiddenDim\"],\n interDim = parsedConfig[\"interDim\"];\n\n const totalParams =\n vocab * hiddenDim * 2 +\n numLayers * 4 * hiddenDim * hiddenDim +\n numLayers * 3 * interDim * hiddenDim;\n\n const other_v_down_params =\n numLayers * hiddenDim * hiddenDim + numLayers * hiddenDim * interDim;\n\n const other_params_Q2K =\n totalParams -\n (hiddenDim * hiddenDim * numLayers * 2 + 2 * vocab * hiddenDim);\n\n const mult_factor_dic = {\n ggml_QK4_0: 18,\n ggml_QK4_1: 20,\n ggml_QK5_0: 22,\n ggml_QK5_1: 24,\n ggml_QK8_0: 34,\n ggml_QK8_1: 40,\n };\n\n const mult_factor_dic_64 = {\n ggml_Q6_K: 54.0,\n ggml_Q3: 26.0,\n ggml_Q4: 38.0,\n ggml_Q5: 46.0,\n };\n\n //Q2_K is 22.0\n\n const mult_factor_dic_combination = {\n ggml_Q3_K_L: [38.0, 26.0],\n ggml_Q3_K_M: [46.0, 26.0],\n ggml_QK4_K_S: [46.0, 38.0],\n ggml_QK4_K_M: [54.0, 38.0],\n ggml_QK5_K_M: [54.0, 46.0],\n ggml_Q2_K: [26.0, 22.0],\n };\n\n let total = 0;\n if (mult_factor_dic.hasOwnProperty(quant)) {\n total = (mult_factor_dic[quant] * totalParams) / (32 * 1024 * 1024);\n }\n if (mult_factor_dic_64.hasOwnProperty(quant)) {\n total = (mult_factor_dic_64[quant] * totalParams) / (64 * 1024 * 1024);\n }\n if (mult_factor_dic_combination.hasOwnProperty(quant)) {\n const factors = mult_factor_dic_combination[quant];\n\n if (quant === \"ggml_Q2_K\") {\n total =\n ((totalParams - other_params_Q2K) * factors[1] +\n other_params_Q2K * factors[0]) /\n (64 * 1024 * 1024);\n } else {\n total =\n ((totalParams - other_v_down_params) * factors[1] +\n other_v_down_params * factors[0]) /\n (64 * 1024 * 1024);\n }\n }\n\n return total;\n}\n\nfunction computeModelSize(parsedConfig) {\n const vocab = parsedConfig[\"vocab\"],\n heads = parsedConfig[\"heads\"],\n numLayers = parsedConfig[\"num_layers\"],\n hiddenDim = parsedConfig[\"hiddenDim\"],\n interDim = parsedConfig[\"interDim\"];\n\n // console.log(vocab, heads, numLayers, hiddenDim, interDim);\n // let fB = floatBytes;\n // if (quant === 'bnb_int8'){fB = 1;}\n // if (quant === 'bnb_q4'){fB = 0.5;}\n\n const out =\n vocab * hiddenDim * 2 +\n numLayers * 4 * hiddenDim * hiddenDim +\n numLayers * 3 * interDim * hiddenDim;\n // console.log(\"this is out: \", out)\n\n return out;\n}\n\nfunction getGradOptMemory(\n dropdownFullOrNot,\n dropdownOpt,\n dropdownQuant,\n modelSize,\n floatBytes,\n parsedConfig,\n batchSize = 1\n) {\n const full = dropdownFullOrNot,\n opt = dropdownOpt,\n quant = dropdownQuant;\n console.log(full, opt, quant);\n\n if (full === \"full_trn\" && opt === \"adam_opt\" && quant === \"no_quant\") {\n return modelSize * 3 * floatBytes;\n }\n\n if (full === \"full_trn\" && opt === \"adam_opt\" && quant === \"bnb_int8\") {\n return (\n modelSize * 3 * 1 + getExtraMemory(parsedConfig, quant) * batchSize\n ); //Some extra mmeory that bnb int8 takes\n }\n\n if (full === \"full_trn\" && opt === \"adam_opt\" && quant === \"bnb_q4\") {\n //Need to check if q4 also takes extra memory\n return (\n modelSize * 3 * 0.5 +\n getExtraMemory(parsedConfig, quant) * batchSize\n );\n }\n //------------\n if (full === \"full_trn\" && opt === \"sgd_opt\" && quant === \"no_quant\") {\n return modelSize * 1 * floatBytes;\n }\n\n if (full === \"full_trn\" && opt === \"sgd_opt\" && quant === \"bnb_int8\") {\n return (\n modelSize * 1 * 1 + getExtraMemory(parsedConfig, quant) * batchSize\n );\n }\n\n if (full === \"full_trn\" && opt === \"sgd_opt\" && quant === \"bnb_q4\") {\n return (\n modelSize * 1 * 0.5 +\n getExtraMemory(parsedConfig, quant) * batchSize\n );\n }\n\n //4*layer*8*hid*4*2\n\n //------------\n if (full === \"lora_trn\" && opt === \"adam_opt\" && quant === \"no_quant\") {\n return (\n parsedConfig.num_layers * 8 * parsedConfig.hiddenDim * 2 * 4 * 3 * 2\n );\n }\n\n if (full === \"lora_trn\" && opt === \"adam_opt\" && quant === \"bnb_int8\") {\n console.log(\"here!\");\n return (\n parsedConfig.num_layers * 8 * parsedConfig.hiddenDim * 2 * 4 * 3 +\n getExtraMemory(parsedConfig, quant) * batchSize\n );\n }\n\n if (full === \"lora_trn\" && opt === \"adam_opt\" && quant === \"bnb_q4\") {\n return (\n parsedConfig.num_layers * 8 * parsedConfig.hiddenDim * 2 * 4 * 3 +\n getExtraMemory(parsedConfig, quant) * batchSize\n );\n }\n //------------\n if (full === \"lora_trn\" && opt === \"sgd_opt\" && quant === \"no_quant\") {\n return parsedConfig.num_layers * 8 * parsedConfig.hiddenDim * 2 * 4 * 2;\n }\n\n if (full === \"lora_trn\" && opt === \"sgd_opt\" && quant === \"bnb_int8\") {\n return (\n parsedConfig.num_layers * 8 * parsedConfig.hiddenDim * 2 * 4 * 1 +\n getExtraMemory(parsedConfig, quant) * batchSize\n );\n }\n\n if (full === \"lora_trn\" && opt === \"sgd_opt\" && quant === \"bnb_q4\") {\n return (\n parsedConfig.num_layers * 8 * parsedConfig.hiddenDim * 2 * 4 * 1 +\n getExtraMemory(parsedConfig, quant) * batchSize\n );\n }\n\n console.log(full, opt, quant);\n throw new Error(\"Invalid combination of values\");\n}\n\nfunction getExtraMemory(parsedConfig, quant) {\n const constant_8_overhead = 200.0,\n constant_8_extra = 350.0;\n const constant_4_overhead = 350.0,\n constant_4_extra = 550.0;\n\n const common =\n (10 * parsedConfig.hiddenDim +\n 5 * parsedConfig.hiddenDim +\n 4 * parsedConfig.interDim +\n 2 * parsedConfig.interDim) *\n parsedConfig.num_layers;\n\n let extra_mem = 0;\n\n if (quant === \"bnb_int8\") {\n extra_mem = constant_8_overhead * common + constant_8_extra * common;\n }\n\n if (quant === \"bnb_q4\") {\n extra_mem = constant_4_overhead * common + constant_4_extra * common;\n }\n\n console.log(\"extra mem\", extra_mem);\n return extra_mem;\n}\n\nfunction getActivationMemory(\n parsedConfig,\n contextLen,\n floatBytes,\n quant,\n dropdownFullOrNot,\n batchSize = 1\n) {\n const heads = parsedConfig[\"heads\"],\n numLayers = parsedConfig[\"num_layers\"],\n hiddenDim = parsedConfig[\"hiddenDim\"],\n interDim = parsedConfig[\"interDim\"];\n\n let fB = floatBytes;\n const len = contextLen;\n\n // if (quant==='bnb_int8'){fB=1;}\n // if (quant==='bnb_q4'){fB=0.5;}\n\n console.log(\"activation: \", heads, numLayers, hiddenDim, interDim);\n\n //const attn_per_layer = qkv + qk (transpose) + attn mat + attn mat convert tp fp32 + attn mat divided by sqrt +\n const attn_per_layer =\n len * hiddenDim * 3 * fB +\n len * hiddenDim * 2 * fB +\n len * len * heads * fB +\n len * len * heads * 4 +\n len * len * heads * fB +\n len * hiddenDim * fB +\n len * hiddenDim * fB +\n len * hiddenDim * fB;\n\n // heads*len*len*4 + heads*len*len*fB + 3*hiddenDim*len*fB + hiddenDim*len*fB + hiddenDim*len*fB\n\n const ffn_per_layer =\n hiddenDim * len * fB +\n hiddenDim * len * fB +\n fB * 5 * len * interDim +\n interDim * len * fB;\n\n const norm = len * 4 * 2 + len * hiddenDim * fB * 6;\n\n let lora = 0;\n // if (dropdownFullOrNot==='lora_trn'){\n // lora = (8*len*2 + hiddenDim*len*2)*4;\n // }\n\n const total_per_layer = attn_per_layer + ffn_per_layer + norm + lora;\n console.log(\n \"total per layer: \",\n convertToMB(attn_per_layer),\n convertToMB(ffn_per_layer),\n convertToMB(norm),\n convertToMB(lora)\n );\n\n //total per layer: 4.2724609375 5.55419921875 6.409454345703125 8.02001953125\n let total = total_per_layer * numLayers;\n total = total * batchSize;\n\n console.log(\"this is total: \", total, attn_per_layer + ffn_per_layer);\n\n return total;\n}\n\nfunction checkCombinationInference(\n trnType,\n quantType,\n setErrorMessage,\n openModal\n) {\n const ggml_quants = [\n \"ggml_QK4_0\",\n \"ggml_QK4_1\",\n \"ggml_QK5_0\",\n \"ggml_QK5_1\",\n \"ggml_QK8_0\",\n \"ggml_QK8_1\",\n\n \"ggml_Q2_K\",\n \n \"ggml_Q3_K_L\",\n \"ggml_Q3_K_M\",\n\n \"ggml_QK4_K_M\",\n \"ggml_QK4_K_S\",\n\n \"ggml_QK5_K_M\",\n \"ggml_Q6_K\",\n ];\n\n if (ggml_quants.includes(quantType)) {\n if (trnType != \"inf_ggml\") {\n setErrorMessage(\n \"Invalid combination of inference type/quantization\"\n );\n openModal();\n return false;\n }\n }\n if (quantType != \"no_quant\" && trnType === \"inf_vLLM\") {\n setErrorMessage(\"vLLm doesn't support quant (maybe)\");\n openModal();\n return false;\n }\n if (\n trnType === \"inf_ggml\" &&\n (quantType === \"bnb_int8\" || quantType === \"bnb_q4\")\n ) {\n setErrorMessage(\"ggml doesn't support bnb\");\n openModal();\n return false;\n }\n if (\n trnType === \"inf_ggml\" &&\n (quantType === \"no_quant\")\n ) {\n setErrorMessage(\"If you want no quant then pick vLLM/HF inference framework\");\n openModal();\n return false;\n }\n\n if (trnType === \"inf_exL\") {\n setErrorMessage(\"exLlama hasn't been added yet :)\");\n openModal();\n return false;\n }\n return true;\n}\n\nfunction sanityUploadedConfig(jsonUploadedData, setErrorMessage, openModal) {\n function uploadError() {\n setErrorMessage(\n \"upload config doesn't have correct keys. make sure your config has the keys present in https://huggingface.co/codellama/CodeLlama-7b-hf/blob/main/config.json\"\n );\n openModal();\n return null;\n }\n\n if (Object.keys(jsonUploadedData).length === 0) {\n setErrorMessage(\"Uploaded json is empty :)\");\n openModal();\n return null; // JSON is empty\n }\n\n console.log(jsonUploadedData);\n\n let vocab = 0,\n hiddenDim = 0,\n heads = 0,\n interDim = 0,\n num_layers = 0;\n\n if (jsonUploadedData.hasOwnProperty(\"vocab_size\")) {\n vocab = jsonUploadedData[\"vocab_size\"];\n } else {\n uploadError();\n return null;\n }\n\n if (jsonUploadedData.hasOwnProperty(\"hidden_size\")) {\n hiddenDim = jsonUploadedData[\"hidden_size\"];\n } else {\n uploadError();\n return null;\n }\n\n if (jsonUploadedData.hasOwnProperty(\"num_attention_heads\")) {\n heads = jsonUploadedData[\"num_attention_heads\"];\n } else {\n uploadError();\n return null;\n }\n\n if (jsonUploadedData.hasOwnProperty(\"intermediate_size\")) {\n interDim = jsonUploadedData[\"intermediate_size\"];\n } else {\n uploadError();\n return null;\n }\n\n if (jsonUploadedData.hasOwnProperty(\"num_hidden_layers\")) {\n num_layers = jsonUploadedData[\"num_hidden_layers\"];\n } else {\n uploadError();\n return null;\n }\n\n return {\n vocab: vocab,\n hiddenDim: hiddenDim,\n heads: heads,\n interDim: interDim,\n num_layers: num_layers,\n };\n}\n\nfunction getParseConfig(parsedJSONData, setErrorMessage, openModal) {\n console.log(Object.keys(parsedJSONData).length);\n if (Object.keys(parsedJSONData).length == 0) {\n setErrorMessage(\n \"Huggingface config of this id doesn't have correct keys. e.g. this is a ggml model. Please upload your config in correct format\"\n );\n openModal();\n return null;\n }\n\n const vocab = getKey([\"vocab_size\"], parsedJSONData, 32000);\n const hiddenDim = getKey(\n [\"hidden_size\", \"d_model\", \"n_embd\"],\n parsedJSONData,\n 64\n );\n const heads = getKey(\n [\"num_attention_heads\", \"num_heads\", \"n_head\"],\n parsedJSONData,\n 12\n );\n const interDim = getKey(\n [\"intermediate_size\", \"n_inner\", \"d_ff\"],\n parsedJSONData,\n 12\n );\n const num_layers = getKey(\n [\"num_layers\", \"num_hidden_layers\", \"n_layer\"],\n parsedJSONData,\n 12\n );\n\n return {\n vocab: vocab,\n hiddenDim: hiddenDim,\n heads: heads,\n interDim: interDim,\n num_layers: num_layers,\n };\n}\n\nfunction getDefault(modelSize) {\n //If only model size is provided. Guess the values\n\n let vocab = 32000;\n let heads = 32;\n let numLayers = 32;\n\n //vocab*h + numLayers*4*h*h + 3*4*h*h*numLayers = modelSize*10^9\n const A = numLayers * 4 + 3 * 4 * numLayers;\n const B = vocab;\n const C = -1 * modelSize * billion;\n\n let h = (-B + Math.sqrt(B * B - 4 * A * C)) / (2 * A);\n h = Math.ceil(h);\n\n return {\n vocab: vocab,\n hiddenDim: h,\n heads: heads,\n interDim: 4 * h,\n num_layers: numLayers,\n };\n}\n\nfunction convertToMB(value) {\n return value / (1024 * 1024);\n}\n\nfunction convertToMBModelSize(value, quant) {\n let extra = 0;\n let fB = 2;\n let size = (value * fB) / (1024 * 1024);\n if (quant === \"bnb_int8\" || quant === \"bnb_q4\") {\n extra = 0.06 * size;\n }\n\n if (quant === \"bnb_int8\") {\n size = size / 2;\n }\n if (quant === \"bnb_q4\") {\n size = size / 4;\n }\n return size + extra;\n}\n\nfunction convertToBytes(floatType) {\n return 2.0;\n}\n\nfunction getAllComputedData(\n parsedJSONData,\n jsonUploadedData,\n modelSize,\n contextLen,\n floatType,\n selections,\n setErrorMessage,\n openModal,\n batchSize\n) {\n let parsedConfig = null,\n modelSizeinB = null;\n let activationMemory = 0,\n gradAndOptMemory = 0;\n let inferenceMemory = 0;\n let totalMemory = 0;\n const floatBytes = convertToBytes(floatType);\n const quantType = selections.dropdownQuant;\n const trnType = selections.dropdownTrnOrNot;\n\n if (batchSize === \"\") {\n batchSize = \"1\";\n }\n\n let overHead = 650;\n if (!isValidPositiveInteger(contextLen)) {\n setErrorMessage(\n \"Context len can't be blank or have non numeric or negative/zero values.\"\n );\n openModal();\n return null;\n }\n\n if (!isValidPositiveInteger(batchSize)) {\n setErrorMessage(\n \"Batch size cant have non numeric or negative/zero values\"\n );\n openModal();\n return null;\n }\n\n if (parsedJSONData == null) {\n if (jsonUploadedData != null) {\n parsedConfig = sanityUploadedConfig(\n jsonUploadedData,\n setErrorMessage,\n openModal\n );\n console.log(parsedConfig, \"uploaded\");\n if (parsedConfig == null) {\n return null;\n }\n modelSizeinB = computeModelSize(parsedConfig);\n } else {\n if (!isNumberOrFloat(modelSize)) {\n console.log(\"error with model size\");\n setErrorMessage(\n \"Hugginface model id not available, enter model size(>0) or upload config\"\n );\n openModal();\n return null;\n }\n\n parsedConfig = getDefault(modelSize);\n modelSizeinB = modelSize * billion;\n }\n } else {\n parsedConfig = getParseConfig(\n parsedJSONData,\n setErrorMessage,\n openModal\n );\n if (parsedConfig == null) {\n return null;\n }\n console.log(parsedConfig);\n modelSizeinB = computeModelSize(parsedConfig);\n }\n\n let fB = floatBytes;\n if (quantType === \"bnb_int8\") {\n fB = 1;\n }\n if (quantType === \"bnb_q4\") {\n fB = 0.5;\n }\n let modelSizeinMB = convertToMBModelSize(modelSizeinB, quantType);\n // console.log(modelSizeinB);\n\n //!Inference\n if (trnType != \"trn\") {\n const checkSanity = checkCombinationInference(\n trnType,\n quantType,\n setErrorMessage,\n openModal\n );\n if (!checkSanity) {\n return null;\n }\n\n if (trnType === \"inf\" || trnType === \"inf_vLLM\") {\n let fB = 2;\n //If bnb quant\n if (quantType === \"bnb_int8\") {\n fB = 1;\n }\n if (quantType === \"bnb_q4\") {\n fB = 0.5;\n }\n\n inferenceMemory = convertToMB(\n 2 *\n contextLen *\n 2 *\n 2 *\n parsedConfig[\"hiddenDim\"] *\n parsedConfig[\"num_layers\"]\n );\n\n activationMemory = computeInferenceOnlyActivationMemory(\n contextLen,\n parsedConfig\n );\n\n console.log(\n \"HERE!!!\",\n inferenceMemory,\n modelSizeinMB,\n overHead,\n activationMemory\n );\n }\n if (trnType === \"inf_ggml\") {\n modelSizeinMB = computeModelSizeGGML(parsedConfig, quantType);\n inferenceMemory = convertToMB(\n 1 *\n contextLen *\n 2 *\n 2 *\n parsedConfig[\"hiddenDim\"] *\n parsedConfig[\"num_layers\"]\n );\n activationMemory = computeInferenceOnlyActivationMemory(\n contextLen,\n parsedConfig\n );\n overHead = overHead + computeOverheadGGML(contextLen);\n }\n\n totalMemory =\n inferenceMemory + modelSizeinMB + overHead + activationMemory;\n } else {\n //! Train\n activationMemory = getActivationMemory(\n parsedConfig,\n contextLen,\n floatBytes,\n quantType,\n selections.dropdownFullOrNot,\n batchSize\n );\n\n activationMemory = convertToMB(activationMemory);\n console.log(\"got activation\", activationMemory);\n\n gradAndOptMemory = getGradOptMemory(\n selections.dropdownFullOrNot,\n selections.dropdownOpt,\n quantType,\n modelSizeinB,\n floatBytes,\n parsedConfig\n );\n\n console.log(\"got gradOpt\", gradAndOptMemory);\n\n gradAndOptMemory = convertToMB(gradAndOptMemory);\n totalMemory = modelSizeinMB + gradAndOptMemory + activationMemory;\n\n console.log(\"got total\", totalMemory);\n\n totalMemory = totalMemory + overHead;\n }\n\n return {\n Total: Math.ceil(totalMemory),\n \"KV Cache\": Math.ceil(inferenceMemory),\n \"Model Size\": Math.ceil(modelSizeinMB),\n \"Activation Memory\": Math.ceil(activationMemory),\n \"Grad & Optimizer memory\": Math.ceil(gradAndOptMemory),\n \"cuda + other overhead\": overHead,\n };\n}\n\n// function getAllNamesFromJsonCache(jsonData){\n// return Object.keys(jsonData)\n// }\n\n///Users/rahulchand/gpu_mem/public/all_configs.json\nasync function fetchParams(name) {\n // let output = fetch('https://huggingface.co/meta-llama/Llama-2-7b/raw/main/params.json');\n\n let response = await fetch(configPath);\n response = await response.json();\n // console.log(response.hasOwnProperty(name));\n\n return response.hasOwnProperty(name) ? response[name] : null;\n}\n\n// function isNumberOrFloat(value) {\n// return /^-?\\d+(\\.\\d+)?$/.test(value);\n// }\n\nfunction isNumberOrFloat(value) {\n const num = parseFloat(value);\n return !isNaN(num) && num > 0;\n}\n\nfunction isValidPositiveInteger(input) {\n const num = parseFloat(input);\n console.log(num, input);\n return Number.isInteger(num) && num > 0 && input.trim() !== \"\";\n}\n\n\n\nfunction App() {\n // let subtitle;\n const [modelSize, setModelSize] = useState(\"\");\n const [modelName, setModelName] = useState(\"\");\n const [contextLen, setContextLen] = useState(\"\");\n const [batchSize, setBatchSize] = useState(\"\");\n const [totalMemoryShown, setTotalMemoryShown] = useState(\" \");\n const [breakDownMemory, setBreakDownMemory] = useState(\" \");\n const [errorMessage, setErrorMessage] = useState(\"\");\n\n\n const [fileNameUpload, setFileNameUpload] = useState(\"\");\n\n const [modalIsOpen, setIsOpen] = React.useState(false);\n\n const [responseCache, setResponseCache] = useState(null); \n const [responseCacheKeys, setResponseCacheKeys] = useState(null); \n\n const [suggestions, setSuggestions] = useState([]);\n\n const [jsonData, setJsonData] = useState(null);\n\n function openModal() {\n setIsOpen(true);\n }\n\n function closeModal() {\n setIsOpen(false);\n }\n\n const handleFileClear = (event) => {\n setFileNameUpload(\"\");\n setJsonData(null);\n setTotalMemoryShown(\"\");\n setBreakDownMemory(\"\");\n };\n\n const handleFileChange = (event) => {\n const file = event.target.files[0];\n if (file) {\n // Check file size\n if (file.size > MAX_FILE_SIZE) {\n alert(\"File is too large. Please upload a smaller JSON file.\");\n return;\n }\n\n const reader = new FileReader();\n reader.onload = (e) => {\n try {\n const json = JSON.parse(e.target.result);\n setJsonData(json);\n event.target.value = null;\n } catch (error) {\n console.error(\"Error parsing JSON:\", error);\n alert(\"Invalid JSON file.\");\n }\n };\n setFileNameUpload(file.name);\n reader.readAsText(file);\n console.log(jsonData);\n }\n };\n\n const [selections, setSelections] = useState({\n dropdownTrnOrNot: \"inf\",\n dropdownFullOrNot: \"full_trn\",\n dropdownOpt: \"adam_opt\",\n dropdownQuant: \"no_quant\",\n dropdownGPU: \"rtx_3090\",\n });\n\n const handleChangeSelection = (e) => {\n const { name, value } = e.target;\n setSelections((prevState) => ({\n ...prevState,\n [name]: value,\n }));\n };\n\n // const handleChangeInText1 = (event) => {\n // setModelSize(event.target.value);\n // };\n\n const [output1, setOutput1] = useState(\"\");\n\n async function handleClickTokS() {\n setErrorMessage(\"To be added\");\n openModal();\n return;\n }\n\n async function handleReset() {\n setFileNameUpload(\"\");\n setJsonData(null);\n setTotalMemoryShown(\"\");\n setBreakDownMemory(\"\");\n setContextLen(\"\");\n setBatchSize(\"\");\n setModelSize(\"\");\n setModelName(\"\");\n }\n\n async function handleClick() {\n // let parsedConfig = await fetchParams(specialMapping(modelName));\n let parsedConfig = responseCache.hasOwnProperty(modelName) ? responseCache[modelName] : null; \n const out = getAllComputedData(\n parsedConfig,\n jsonData,\n modelSize,\n contextLen,\n 2,\n selections,\n setErrorMessage,\n openModal,\n batchSize\n );\n\n if (out == null) {\n return;\n }\n\n setTotalMemoryShown(`Total Memory: ${out[\"Total\"]} MB`);\n const jsonOut = JSON.stringify(out);\n setBreakDownMemory(`Breakdown(in MB): ${jsonOut}`);\n }\n\n // const handleClick = () => {\n\n // const trnVal = selections.dropdownTrnOrNot;\n // let totalMemory = 0;\n // let size = parseFloat(modelSize);\n // if (trnVal==='trn'){\n\n // }\n\n // console.log(modelSize);\n // console.log(isNumberOrFloat(modelSize));\n\n // // console.log(\"clicking\");\n // // setOutput1(selections.dropdownTrnOrNot + ' ' + selections.dropdownFullOrNot);\n\n // // console.log()\n\n // };\n\n useEffect(() => {\n // Your function here to populate myVariable\n const fetchData = async () => {\n // Fetch data or perform some other operation\n let response = await fetch(configPath);\n response = await response.json();\n setResponseCache(response);\n setResponseCacheKeys(Object.keys(response));\n };\n \n fetchData();\n }, []); \n\n useEffect(() => {\n if (modelName) {\n if (modelName.length>2){\n const filtered = responseCacheKeys.filter(item => item.startsWith(modelName));\n setSuggestions(filtered.slice(0,10));\n }\n else{\n setSuggestions([]);\n }\n }\n else{\n setSuggestions([]);\n }\n }, [modelName]);\n\n console.log(responseCache);\n\n return (\n OR a||125d?(a.sortIndex=c,f(t,a),null===h(r)&&a===h(t)&&(B?(E(L),L=-1):B=!0,K(H,c-d))):(a.sortIndex=e,f(r,a),A||z||(A=!0,I(J)));return a};\nexports.unstable_shouldYield=M;exports.unstable_wrapCallback=function(a){var b=y;return function(){var c=y;y=b;try{return a.apply(this,arguments)}finally{y=c}}};\n","'use strict';\n\nif (process.env.NODE_ENV === 'production') {\n module.exports = require('./cjs/scheduler.production.min.js');\n} else {\n module.exports = require('./cjs/scheduler.development.js');\n}\n","/**\n * Copyright (c) 2014-present, Facebook, Inc.\n *\n * This source code is licensed under the MIT license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n'use strict';\n\n/**\n * Similar to invariant but only logs a warning if the condition is not met.\n * This can be used to log issues in development environments in critical\n * paths. Removing the logging code for production environments will keep the\n * same logic and follow the same code paths.\n */\n\nvar __DEV__ = process.env.NODE_ENV !== 'production';\n\nvar warning = function() {};\n\nif (__DEV__) {\n var printWarning = function printWarning(format, args) {\n var len = arguments.length;\n args = new Array(len > 1 ? len - 1 : 0);\n for (var key = 1; key < len; key++) {\n args[key - 1] = arguments[key];\n }\n var argIndex = 0;\n var message = 'Warning: ' +\n format.replace(/%s/g, function() {\n return args[argIndex++];\n });\n if (typeof console !== 'undefined') {\n console.error(message);\n }\n try {\n // --- Welcome to debugging React ---\n // This error was thrown as a convenience so that you can use this stack\n // to find the callsite that caused this warning to fire.\n throw new Error(message);\n } catch (x) {}\n }\n\n warning = function(condition, format, args) {\n var len = arguments.length;\n args = new Array(len > 2 ? len - 2 : 0);\n for (var key = 2; key < len; key++) {\n args[key - 2] = arguments[key];\n }\n if (format === undefined) {\n throw new Error(\n '`warning(condition, format, ...args)` requires a warning ' +\n 'message argument'\n );\n }\n if (!condition) {\n printWarning.apply(null, [format].concat(args));\n }\n };\n}\n\nmodule.exports = warning;\n","// The module cache\nvar __webpack_module_cache__ = {};\n\n// The require function\nfunction __webpack_require__(moduleId) {\n\t// Check if module is in cache\n\tvar cachedModule = __webpack_module_cache__[moduleId];\n\tif (cachedModule !== undefined) {\n\t\treturn cachedModule.exports;\n\t}\n\t// Create a new module (and put it into the cache)\n\tvar module = __webpack_module_cache__[moduleId] = {\n\t\t// no module.id needed\n\t\t// no module.loaded needed\n\t\texports: {}\n\t};\n\n\t// Execute the module function\n\t__webpack_modules__[moduleId](module, module.exports, __webpack_require__);\n\n\t// Return the exports of the module\n\treturn module.exports;\n}\n\n// expose the modules object (__webpack_modules__)\n__webpack_require__.m = __webpack_modules__;\n\n","// getDefaultExport function for compatibility with non-harmony modules\n__webpack_require__.n = function(module) {\n\tvar getter = module && module.__esModule ?\n\t\tfunction() { return module['default']; } :\n\t\tfunction() { return module; };\n\t__webpack_require__.d(getter, { a: getter });\n\treturn getter;\n};","// define getter functions for harmony exports\n__webpack_require__.d = function(exports, definition) {\n\tfor(var key in definition) {\n\t\tif(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n\t\t\tObject.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n\t\t}\n\t}\n};","__webpack_require__.f = {};\n// This file contains only the entry chunk.\n// The chunk loading function for additional chunks\n__webpack_require__.e = function(chunkId) {\n\treturn Promise.all(Object.keys(__webpack_require__.f).reduce(function(promises, key) {\n\t\t__webpack_require__.f[key](chunkId, promises);\n\t\treturn promises;\n\t}, []));\n};","// This function allow to reference async chunks\n__webpack_require__.u = function(chunkId) {\n\t// return url for filenames based on template\n\treturn \"static/js/\" + chunkId + \".\" + \"dccdf937\" + \".chunk.js\";\n};","// This function allow to reference async chunks\n__webpack_require__.miniCssF = function(chunkId) {\n\t// return url for filenames based on template\n\treturn undefined;\n};","__webpack_require__.o = function(obj, prop) { return Object.prototype.hasOwnProperty.call(obj, prop); }","var inProgress = {};\nvar dataWebpackPrefix = \"gpu_mem:\";\n// loadScript function to load a script via script tag\n__webpack_require__.l = function(url, done, key, chunkId) {\n\tif(inProgress[url]) { inProgress[url].push(done); return; }\n\tvar script, needAttach;\n\tif(key !== undefined) {\n\t\tvar scripts = document.getElementsByTagName(\"script\");\n\t\tfor(var i = 0; i < scripts.length; i++) {\n\t\t\tvar s = scripts[i];\n\t\t\tif(s.getAttribute(\"src\") == url || s.getAttribute(\"data-webpack\") == dataWebpackPrefix + key) { script = s; break; }\n\t\t}\n\t}\n\tif(!script) {\n\t\tneedAttach = true;\n\t\tscript = document.createElement('script');\n\n\t\tscript.charset = 'utf-8';\n\t\tscript.timeout = 120;\n\t\tif (__webpack_require__.nc) {\n\t\t\tscript.setAttribute(\"nonce\", __webpack_require__.nc);\n\t\t}\n\t\tscript.setAttribute(\"data-webpack\", dataWebpackPrefix + key);\n\n\t\tscript.src = url;\n\t}\n\tinProgress[url] = [done];\n\tvar onScriptComplete = function(prev, event) {\n\t\t// avoid mem leaks in IE.\n\t\tscript.onerror = script.onload = null;\n\t\tclearTimeout(timeout);\n\t\tvar doneFns = inProgress[url];\n\t\tdelete inProgress[url];\n\t\tscript.parentNode && script.parentNode.removeChild(script);\n\t\tdoneFns && doneFns.forEach(function(fn) { return fn(event); });\n\t\tif(prev) return prev(event);\n\t}\n\tvar timeout = setTimeout(onScriptComplete.bind(null, undefined, { type: 'timeout', target: script }), 120000);\n\tscript.onerror = onScriptComplete.bind(null, script.onerror);\n\tscript.onload = onScriptComplete.bind(null, script.onload);\n\tneedAttach && document.head.appendChild(script);\n};","// define __esModule on exports\n__webpack_require__.r = function(exports) {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","__webpack_require__.p = \"/gpu_poor/\";","// no baseURI\n\n// object to store loaded and loading chunks\n// undefined = chunk not loaded, null = chunk preloaded/prefetched\n// [resolve, reject, Promise] = chunk loading, 0 = chunk loaded\nvar installedChunks = {\n\t179: 0\n};\n\n__webpack_require__.f.j = function(chunkId, promises) {\n\t\t// JSONP chunk loading for javascript\n\t\tvar installedChunkData = __webpack_require__.o(installedChunks, chunkId) ? installedChunks[chunkId] : undefined;\n\t\tif(installedChunkData !== 0) { // 0 means \"already installed\".\n\n\t\t\t// a Promise means \"currently loading\".\n\t\t\tif(installedChunkData) {\n\t\t\t\tpromises.push(installedChunkData[2]);\n\t\t\t} else {\n\t\t\t\tif(true) { // all chunks have JS\n\t\t\t\t\t// setup Promise in chunk cache\n\t\t\t\t\tvar promise = new Promise(function(resolve, reject) { installedChunkData = installedChunks[chunkId] = [resolve, reject]; });\n\t\t\t\t\tpromises.push(installedChunkData[2] = promise);\n\n\t\t\t\t\t// start chunk loading\n\t\t\t\t\tvar url = __webpack_require__.p + __webpack_require__.u(chunkId);\n\t\t\t\t\t// create error before stack unwound to get useful stacktrace later\n\t\t\t\t\tvar error = new Error();\n\t\t\t\t\tvar loadingEnded = function(event) {\n\t\t\t\t\t\tif(__webpack_require__.o(installedChunks, chunkId)) {\n\t\t\t\t\t\t\tinstalledChunkData = installedChunks[chunkId];\n\t\t\t\t\t\t\tif(installedChunkData !== 0) installedChunks[chunkId] = undefined;\n\t\t\t\t\t\t\tif(installedChunkData) {\n\t\t\t\t\t\t\t\tvar errorType = event && (event.type === 'load' ? 'missing' : event.type);\n\t\t\t\t\t\t\t\tvar realSrc = event && event.target && event.target.src;\n\t\t\t\t\t\t\t\terror.message = 'Loading chunk ' + chunkId + ' failed.\\n(' + errorType + ': ' + realSrc + ')';\n\t\t\t\t\t\t\t\terror.name = 'ChunkLoadError';\n\t\t\t\t\t\t\t\terror.type = errorType;\n\t\t\t\t\t\t\t\terror.request = realSrc;\n\t\t\t\t\t\t\t\tinstalledChunkData[1](error);\n\t\t\t\t\t\t\t}\n\t\t\t\t\t\t}\n\t\t\t\t\t};\n\t\t\t\t\t__webpack_require__.l(url, loadingEnded, \"chunk-\" + chunkId, chunkId);\n\t\t\t\t}\n\t\t\t}\n\t\t}\n};\n\n// no prefetching\n\n// no preloaded\n\n// no HMR\n\n// no HMR manifest\n\n// no on chunks loaded\n\n// install a JSONP callback for chunk loading\nvar webpackJsonpCallback = function(parentChunkLoadingFunction, data) {\n\tvar chunkIds = data[0];\n\tvar moreModules = data[1];\n\tvar runtime = data[2];\n\t// add \"moreModules\" to the modules object,\n\t// then flag all \"chunkIds\" as loaded and fire callback\n\tvar moduleId, chunkId, i = 0;\n\tif(chunkIds.some(function(id) { return installedChunks[id] !== 0; })) {\n\t\tfor(moduleId in moreModules) {\n\t\t\tif(__webpack_require__.o(moreModules, moduleId)) {\n\t\t\t\t__webpack_require__.m[moduleId] = moreModules[moduleId];\n\t\t\t}\n\t\t}\n\t\tif(runtime) var result = runtime(__webpack_require__);\n\t}\n\tif(parentChunkLoadingFunction) parentChunkLoadingFunction(data);\n\tfor(;i < chunkIds.length; i++) {\n\t\tchunkId = chunkIds[i];\n\t\tif(__webpack_require__.o(installedChunks, chunkId) && installedChunks[chunkId]) {\n\t\t\tinstalledChunks[chunkId][0]();\n\t\t}\n\t\tinstalledChunks[chunkId] = 0;\n\t}\n\n}\n\nvar chunkLoadingGlobal = self[\"webpackChunkgpu_mem\"] = self[\"webpackChunkgpu_mem\"] || [];\nchunkLoadingGlobal.forEach(webpackJsonpCallback.bind(null, 0));\nchunkLoadingGlobal.push = webpackJsonpCallback.bind(null, chunkLoadingGlobal.push.bind(chunkLoadingGlobal));","export default function _typeof(o) {\n \"@babel/helpers - typeof\";\n\n return _typeof = \"function\" == typeof Symbol && \"symbol\" == typeof Symbol.iterator ? function (o) {\n return typeof o;\n } : function (o) {\n return o && \"function\" == typeof Symbol && o.constructor === Symbol && o !== Symbol.prototype ? \"symbol\" : typeof o;\n }, _typeof(o);\n}","import _typeof from \"./typeof.js\";\nimport toPrimitive from \"./toPrimitive.js\";\nexport default function _toPropertyKey(arg) {\n var key = toPrimitive(arg, \"string\");\n return _typeof(key) === \"symbol\" ? key : String(key);\n}","import _typeof from \"./typeof.js\";\nexport default function _toPrimitive(input, hint) {\n if (_typeof(input) !== \"object\" || input === null) return input;\n var prim = input[Symbol.toPrimitive];\n if (prim !== undefined) {\n var res = prim.call(input, hint || \"default\");\n if (_typeof(res) !== \"object\") return res;\n throw new TypeError(\"@@toPrimitive must return a primitive value.\");\n }\n return (hint === \"string\" ? String : Number)(input);\n}","import toPropertyKey from \"./toPropertyKey.js\";\nexport default function _defineProperty(obj, key, value) {\n key = toPropertyKey(key);\n if (key in obj) {\n Object.defineProperty(obj, key, {\n value: value,\n enumerable: true,\n configurable: true,\n writable: true\n });\n } else {\n obj[key] = value;\n }\n return obj;\n}","import defineProperty from \"./defineProperty.js\";\nfunction ownKeys(e, r) {\n var t = Object.keys(e);\n if (Object.getOwnPropertySymbols) {\n var o = Object.getOwnPropertySymbols(e);\n r && (o = o.filter(function (r) {\n return Object.getOwnPropertyDescriptor(e, r).enumerable;\n })), t.push.apply(t, o);\n }\n return t;\n}\nexport default function _objectSpread2(e) {\n for (var r = 1; r < arguments.length; r++) {\n var t = null != arguments[r] ? arguments[r] : {};\n r % 2 ? ownKeys(Object(t), !0).forEach(function (r) {\n defineProperty(e, r, t[r]);\n }) : Object.getOwnPropertyDescriptors ? Object.defineProperties(e, Object.getOwnPropertyDescriptors(t)) : ownKeys(Object(t)).forEach(function (r) {\n Object.defineProperty(e, r, Object.getOwnPropertyDescriptor(t, r));\n });\n }\n return e;\n}","import _typeof from \"./typeof.js\";\nexport default function _regeneratorRuntime() {\n \"use strict\"; /*! regenerator-runtime -- Copyright (c) 2014-present, Facebook, Inc. -- license (MIT): https://github.com/facebook/regenerator/blob/main/LICENSE */\n _regeneratorRuntime = function _regeneratorRuntime() {\n return e;\n };\n var t,\n e = {},\n r = Object.prototype,\n n = r.hasOwnProperty,\n o = Object.defineProperty || function (t, e, r) {\n t[e] = r.value;\n },\n i = \"function\" == typeof Symbol ? Symbol : {},\n a = i.iterator || \"@@iterator\",\n c = i.asyncIterator || \"@@asyncIterator\",\n u = i.toStringTag || \"@@toStringTag\";\n function define(t, e, r) {\n return Object.defineProperty(t, e, {\n value: r,\n enumerable: !0,\n configurable: !0,\n writable: !0\n }), t[e];\n }\n try {\n define({}, \"\");\n } catch (t) {\n define = function define(t, e, r) {\n return t[e] = r;\n };\n }\n function wrap(t, e, r, n) {\n var i = e && e.prototype instanceof Generator ? e : Generator,\n a = Object.create(i.prototype),\n c = new Context(n || []);\n return o(a, \"_invoke\", {\n value: makeInvokeMethod(t, r, c)\n }), a;\n }\n function tryCatch(t, e, r) {\n try {\n return {\n type: \"normal\",\n arg: t.call(e, r)\n };\n } catch (t) {\n return {\n type: \"throw\",\n arg: t\n };\n }\n }\n e.wrap = wrap;\n var h = \"suspendedStart\",\n l = \"suspendedYield\",\n f = \"executing\",\n s = \"completed\",\n y = {};\n function Generator() {}\n function GeneratorFunction() {}\n function GeneratorFunctionPrototype() {}\n var p = {};\n define(p, a, function () {\n return this;\n });\n var d = Object.getPrototypeOf,\n v = d && d(d(values([])));\n v && v !== r && n.call(v, a) && (p = v);\n var g = GeneratorFunctionPrototype.prototype = Generator.prototype = Object.create(p);\n function defineIteratorMethods(t) {\n [\"next\", \"throw\", \"return\"].forEach(function (e) {\n define(t, e, function (t) {\n return this._invoke(e, t);\n });\n });\n }\n function AsyncIterator(t, e) {\n function invoke(r, o, i, a) {\n var c = tryCatch(t[r], t, o);\n if (\"throw\" !== c.type) {\n var u = c.arg,\n h = u.value;\n return h && \"object\" == _typeof(h) && n.call(h, \"__await\") ? e.resolve(h.__await).then(function (t) {\n invoke(\"next\", t, i, a);\n }, function (t) {\n invoke(\"throw\", t, i, a);\n }) : e.resolve(h).then(function (t) {\n u.value = t, i(u);\n }, function (t) {\n return invoke(\"throw\", t, i, a);\n });\n }\n a(c.arg);\n }\n var r;\n o(this, \"_invoke\", {\n value: function value(t, n) {\n function callInvokeWithMethodAndArg() {\n return new e(function (e, r) {\n invoke(t, n, e, r);\n });\n }\n return r = r ? r.then(callInvokeWithMethodAndArg, callInvokeWithMethodAndArg) : callInvokeWithMethodAndArg();\n }\n });\n }\n function makeInvokeMethod(e, r, n) {\n var o = h;\n return function (i, a) {\n if (o === f) throw new Error(\"Generator is already running\");\n if (o === s) {\n if (\"throw\" === i) throw a;\n return {\n value: t,\n done: !0\n };\n }\n for (n.method = i, n.arg = a;;) {\n var c = n.delegate;\n if (c) {\n var u = maybeInvokeDelegate(c, n);\n if (u) {\n if (u === y) continue;\n return u;\n }\n }\n if (\"next\" === n.method) n.sent = n._sent = n.arg;else if (\"throw\" === n.method) {\n if (o === h) throw o = s, n.arg;\n n.dispatchException(n.arg);\n } else \"return\" === n.method && n.abrupt(\"return\", n.arg);\n o = f;\n var p = tryCatch(e, r, n);\n if (\"normal\" === p.type) {\n if (o = n.done ? s : l, p.arg === y) continue;\n return {\n value: p.arg,\n done: n.done\n };\n }\n \"throw\" === p.type && (o = s, n.method = \"throw\", n.arg = p.arg);\n }\n };\n }\n function maybeInvokeDelegate(e, r) {\n var n = r.method,\n o = e.iterator[n];\n if (o === t) return r.delegate = null, \"throw\" === n && e.iterator[\"return\"] && (r.method = \"return\", r.arg = t, maybeInvokeDelegate(e, r), \"throw\" === r.method) || \"return\" !== n && (r.method = \"throw\", r.arg = new TypeError(\"The iterator does not provide a '\" + n + \"' method\")), y;\n var i = tryCatch(o, e.iterator, r.arg);\n if (\"throw\" === i.type) return r.method = \"throw\", r.arg = i.arg, r.delegate = null, y;\n var a = i.arg;\n return a ? a.done ? (r[e.resultName] = a.value, r.next = e.nextLoc, \"return\" !== r.method && (r.method = \"next\", r.arg = t), r.delegate = null, y) : a : (r.method = \"throw\", r.arg = new TypeError(\"iterator result is not an object\"), r.delegate = null, y);\n }\n function pushTryEntry(t) {\n var e = {\n tryLoc: t[0]\n };\n 1 in t && (e.catchLoc = t[1]), 2 in t && (e.finallyLoc = t[2], e.afterLoc = t[3]), this.tryEntries.push(e);\n }\n function resetTryEntry(t) {\n var e = t.completion || {};\n e.type = \"normal\", delete e.arg, t.completion = e;\n }\n function Context(t) {\n this.tryEntries = [{\n tryLoc: \"root\"\n }], t.forEach(pushTryEntry, this), this.reset(!0);\n }\n function values(e) {\n if (e || \"\" === e) {\n var r = e[a];\n if (r) return r.call(e);\n if (\"function\" == typeof e.next) return e;\n if (!isNaN(e.length)) {\n var o = -1,\n i = function next() {\n for (; ++o < e.length;) if (n.call(e, o)) return next.value = e[o], next.done = !1, next;\n return next.value = t, next.done = !0, next;\n };\n return i.next = i;\n }\n }\n throw new TypeError(_typeof(e) + \" is not iterable\");\n }\n return GeneratorFunction.prototype = GeneratorFunctionPrototype, o(g, \"constructor\", {\n value: GeneratorFunctionPrototype,\n configurable: !0\n }), o(GeneratorFunctionPrototype, \"constructor\", {\n value: GeneratorFunction,\n configurable: !0\n }), GeneratorFunction.displayName = define(GeneratorFunctionPrototype, u, \"GeneratorFunction\"), e.isGeneratorFunction = function (t) {\n var e = \"function\" == typeof t && t.constructor;\n return !!e && (e === GeneratorFunction || \"GeneratorFunction\" === (e.displayName || e.name));\n }, e.mark = function (t) {\n return Object.setPrototypeOf ? Object.setPrototypeOf(t, GeneratorFunctionPrototype) : (t.__proto__ = GeneratorFunctionPrototype, define(t, u, \"GeneratorFunction\")), t.prototype = Object.create(g), t;\n }, e.awrap = function (t) {\n return {\n __await: t\n };\n }, defineIteratorMethods(AsyncIterator.prototype), define(AsyncIterator.prototype, c, function () {\n return this;\n }), e.AsyncIterator = AsyncIterator, e.async = function (t, r, n, o, i) {\n void 0 === i && (i = Promise);\n var a = new AsyncIterator(wrap(t, r, n, o), i);\n return e.isGeneratorFunction(r) ? a : a.next().then(function (t) {\n return t.done ? t.value : a.next();\n });\n }, defineIteratorMethods(g), define(g, u, \"Generator\"), define(g, a, function () {\n return this;\n }), define(g, \"toString\", function () {\n return \"[object Generator]\";\n }), e.keys = function (t) {\n var e = Object(t),\n r = [];\n for (var n in e) r.push(n);\n return r.reverse(), function next() {\n for (; r.length;) {\n var t = r.pop();\n if (t in e) return next.value = t, next.done = !1, next;\n }\n return next.done = !0, next;\n };\n }, e.values = values, Context.prototype = {\n constructor: Context,\n reset: function reset(e) {\n if (this.prev = 0, this.next = 0, this.sent = this._sent = t, this.done = !1, this.delegate = null, this.method = \"next\", this.arg = t, this.tryEntries.forEach(resetTryEntry), !e) for (var r in this) \"t\" === r.charAt(0) && n.call(this, r) && !isNaN(+r.slice(1)) && (this[r] = t);\n },\n stop: function stop() {\n this.done = !0;\n var t = this.tryEntries[0].completion;\n if (\"throw\" === t.type) throw t.arg;\n return this.rval;\n },\n dispatchException: function dispatchException(e) {\n if (this.done) throw e;\n var r = this;\n function handle(n, o) {\n return a.type = \"throw\", a.arg = e, r.next = n, o && (r.method = \"next\", r.arg = t), !!o;\n }\n for (var o = this.tryEntries.length - 1; o >= 0; --o) {\n var i = this.tryEntries[o],\n a = i.completion;\n if (\"root\" === i.tryLoc) return handle(\"end\");\n if (i.tryLoc <= this.prev) {\n var c = n.call(i, \"catchLoc\"),\n u = n.call(i, \"finallyLoc\");\n if (c && u) {\n if (this.prev < i.catchLoc) return handle(i.catchLoc, !0);\n if (this.prev < i.finallyLoc) return handle(i.finallyLoc);\n } else if (c) {\n if (this.prev < i.catchLoc) return handle(i.catchLoc, !0);\n } else {\n if (!u) throw new Error(\"try statement without catch or finally\");\n if (this.prev < i.finallyLoc) return handle(i.finallyLoc);\n }\n }\n }\n },\n abrupt: function abrupt(t, e) {\n for (var r = this.tryEntries.length - 1; r >= 0; --r) {\n var o = this.tryEntries[r];\n if (o.tryLoc <= this.prev && n.call(o, \"finallyLoc\") && this.prev < o.finallyLoc) {\n var i = o;\n break;\n }\n }\n i && (\"break\" === t || \"continue\" === t) && i.tryLoc <= e && e <= i.finallyLoc && (i = null);\n var a = i ? i.completion : {};\n return a.type = t, a.arg = e, i ? (this.method = \"next\", this.next = i.finallyLoc, y) : this.complete(a);\n },\n complete: function complete(t, e) {\n if (\"throw\" === t.type) throw t.arg;\n return \"break\" === t.type || \"continue\" === t.type ? this.next = t.arg : \"return\" === t.type ? (this.rval = this.arg = t.arg, this.method = \"return\", this.next = \"end\") : \"normal\" === t.type && e && (this.next = e), y;\n },\n finish: function finish(t) {\n for (var e = this.tryEntries.length - 1; e >= 0; --e) {\n var r = this.tryEntries[e];\n if (r.finallyLoc === t) return this.complete(r.completion, r.afterLoc), resetTryEntry(r), y;\n }\n },\n \"catch\": function _catch(t) {\n for (var e = this.tryEntries.length - 1; e >= 0; --e) {\n var r = this.tryEntries[e];\n if (r.tryLoc === t) {\n var n = r.completion;\n if (\"throw\" === n.type) {\n var o = n.arg;\n resetTryEntry(r);\n }\n return o;\n }\n }\n throw new Error(\"illegal catch attempt\");\n },\n delegateYield: function delegateYield(e, r, n) {\n return this.delegate = {\n iterator: values(e),\n resultName: r,\n nextLoc: n\n }, \"next\" === this.method && (this.arg = t), y;\n }\n }, e;\n}","function asyncGeneratorStep(gen, resolve, reject, _next, _throw, key, arg) {\n try {\n var info = gen[key](arg);\n var value = info.value;\n } catch (error) {\n reject(error);\n return;\n }\n if (info.done) {\n resolve(value);\n } else {\n Promise.resolve(value).then(_next, _throw);\n }\n}\nexport default function _asyncToGenerator(fn) {\n return function () {\n var self = this,\n args = arguments;\n return new Promise(function (resolve, reject) {\n var gen = fn.apply(self, args);\n function _next(value) {\n asyncGeneratorStep(gen, resolve, reject, _next, _throw, \"next\", value);\n }\n function _throw(err) {\n asyncGeneratorStep(gen, resolve, reject, _next, _throw, \"throw\", err);\n }\n _next(undefined);\n });\n };\n}","export default function _arrayLikeToArray(arr, len) {\n if (len == null || len > arr.length) len = arr.length;\n for (var i = 0, arr2 = new Array(len); i < len; i++) arr2[i] = arr[i];\n return arr2;\n}","import arrayLikeToArray from \"./arrayLikeToArray.js\";\nexport default function _unsupportedIterableToArray(o, minLen) {\n if (!o) return;\n if (typeof o === \"string\") return arrayLikeToArray(o, minLen);\n var n = Object.prototype.toString.call(o).slice(8, -1);\n if (n === \"Object\" && o.constructor) n = o.constructor.name;\n if (n === \"Map\" || n === \"Set\") return Array.from(o);\n if (n === \"Arguments\" || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n)) return arrayLikeToArray(o, minLen);\n}","import arrayWithHoles from \"./arrayWithHoles.js\";\nimport iterableToArrayLimit from \"./iterableToArrayLimit.js\";\nimport unsupportedIterableToArray from \"./unsupportedIterableToArray.js\";\nimport nonIterableRest from \"./nonIterableRest.js\";\nexport default function _slicedToArray(arr, i) {\n return arrayWithHoles(arr) || iterableToArrayLimit(arr, i) || unsupportedIterableToArray(arr, i) || nonIterableRest();\n}","export default function _arrayWithHoles(arr) {\n if (Array.isArray(arr)) return arr;\n}","export default function _iterableToArrayLimit(r, l) {\n var t = null == r ? null : \"undefined\" != typeof Symbol && r[Symbol.iterator] || r[\"@@iterator\"];\n if (null != t) {\n var e,\n n,\n i,\n u,\n a = [],\n f = !0,\n o = !1;\n try {\n if (i = (t = t.call(r)).next, 0 === l) {\n if (Object(t) !== t) return;\n f = !1;\n } else for (; !(f = (e = i.call(t)).done) && (a.push(e.value), a.length !== l); f = !0);\n } catch (r) {\n o = !0, n = r;\n } finally {\n try {\n if (!f && null != t[\"return\"] && (u = t[\"return\"](), Object(u) !== u)) return;\n } finally {\n if (o) throw n;\n }\n }\n return a;\n }\n}","export default function _nonIterableRest() {\n throw new TypeError(\"Invalid attempt to destructure non-iterable instance.\\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.\");\n}","import React from 'react';\n\nfunction TextInput(props) {\n// const [value, setValue] = useState(''); // useState hook to manage the input value\n \n const fun = props.setValue;\n\n return (\n fun(e.target.value)}\n placeholder={props.placeholder}\n />\n );\n}\n\nexport default TextInput;\n","import React, { useState } from \"react\";\nimport TextInput from \"./textBox\";\nimport Modal from \"react-modal\";\n\nconst billion = 1000000000;\nlet configPath = \"/gpu_poor/all_configs.json\";\nif (\n window.location.hostname === \"localhost\" ||\n window.location.hostname === \"127.0.0.1\"\n) {\n configPath = \"/gpu_poor/all_configs.json\";\n}\nconst MAX_FILE_SIZE = 500000;\nconst ggml_quants = [\n \"ggml_QK4_0\",\n \"ggml_QK4_1\",\n \"ggml_QK5_0\",\n \"ggml_QK5_1\",\n \"ggml_QK8_0\",\n \"ggml_QK8_1\",\n\n \"ggml_Q2_K\",\n\n \"ggml_Q3_K_L\",\n \"ggml_Q3_K_M\",\n\n \"ggml_QK4_K_M\",\n \"ggml_QK4_K_S\",\n\n \"ggml_QK5_K_M\",\n \"ggml_Q6_K\",\n];\n// console.log(configPath);\n\n/*\ndropdownTrnOrNot: 'inf', 'trn', 'inf_vLLM','inf_exL','inf_ggml'\ndropdownFullOrNot: 'lora_trn, 'full_trn', 'qlora'\ndropdownOpt: 'no_opt', 'sgd_opt','adam_opt'\ndropdownQuant: 'no_quant','bnb_int8','bnb_q4', \n*/\nconst specialNamesMapping = {\n \"meta-llama/Llama-2-7b\": \"meta-llama/Llama-2-7b-hf\",\n \"meta-llama/Llama-13-7b\": \"meta-llama/Llama-13-7b-hf\",\n \"meta-llama/Llama-2-70b\": \"meta-llama/Llama-13-70b-hf\",\n};\n\nfunction specialMapping(name) {\n if (name in specialNamesMapping) {\n return specialNamesMapping[name];\n }\n return name;\n}\n\nfunction getKey(keys, obj, defaultVal) {\n let toReturn = null;\n for (const key of keys) {\n if (obj.hasOwnProperty(key)) {\n // console.log(\"found: \",key);\n toReturn = obj[key];\n break;\n }\n }\n if (toReturn == null) {\n return defaultVal;\n }\n return toReturn;\n}\n\nfunction computeOverheadGGML(contextLen) {\n return 0.1 * contextLen;\n}\n\nfunction computeInferenceOnlyActivationMemory(contextLen, parsedConfig) {\n const hiddenDim = parsedConfig[\"hiddenDim\"];\n const heads = parsedConfig[\"heads\"];\n\n //return ((1000*4096*5)*2 + (1000*1000*32*2))/(1024*1024)\n return (\n (contextLen * hiddenDim * 5 * 2 + contextLen * contextLen * heads * 2) /\n (1024 * 1024)\n );\n}\n\n//floatBytes, quant\nfunction computeModelSizeGGML(parsedConfig, quant) {\n const vocab = parsedConfig[\"vocab\"],\n heads = parsedConfig[\"heads\"],\n numLayers = parsedConfig[\"num_layers\"],\n hiddenDim = parsedConfig[\"hiddenDim\"],\n interDim = parsedConfig[\"interDim\"];\n\n const totalParams =\n vocab * hiddenDim * 2 +\n numLayers * 4 * hiddenDim * hiddenDim +\n numLayers * 3 * interDim * hiddenDim;\n\n const other_v_down_params =\n numLayers * hiddenDim * hiddenDim + numLayers * hiddenDim * interDim;\n\n const other_params_Q2K =\n totalParams -\n (hiddenDim * hiddenDim * numLayers * 2 + 2 * vocab * hiddenDim);\n\n const mult_factor_dic = {\n ggml_QK4_0: 18,\n ggml_QK4_1: 20,\n ggml_QK5_0: 22,\n ggml_QK5_1: 24,\n ggml_QK8_0: 34,\n ggml_QK8_1: 40,\n };\n\n const mult_factor_dic_64 = {\n ggml_Q6_K: 54.0,\n ggml_Q3: 26.0,\n ggml_Q4: 38.0,\n ggml_Q5: 46.0,\n };\n\n //Q2_K is 22.0\n\n const mult_factor_dic_combination = {\n ggml_Q3_K_L: [38.0, 26.0],\n ggml_Q3_K_M: [46.0, 26.0],\n ggml_QK4_K_S: [46.0, 38.0],\n ggml_QK4_K_M: [54.0, 38.0],\n ggml_QK5_K_M: [54.0, 46.0],\n ggml_Q2_K: [26.0, 22.0],\n };\n\n let total = 0;\n if (mult_factor_dic.hasOwnProperty(quant)) {\n total = (mult_factor_dic[quant] * totalParams) / (32 * 1024 * 1024);\n }\n if (mult_factor_dic_64.hasOwnProperty(quant)) {\n total = (mult_factor_dic_64[quant] * totalParams) / (64 * 1024 * 1024);\n }\n if (mult_factor_dic_combination.hasOwnProperty(quant)) {\n const factors = mult_factor_dic_combination[quant];\n\n if (quant === \"ggml_Q2_K\") {\n total =\n ((totalParams - other_params_Q2K) * factors[1] +\n other_params_Q2K * factors[0]) /\n (64 * 1024 * 1024);\n } else {\n total =\n ((totalParams - other_v_down_params) * factors[1] +\n other_v_down_params * factors[0]) /\n (64 * 1024 * 1024);\n }\n }\n\n return total;\n}\n\nfunction computeModelSize(parsedConfig) {\n const vocab = parsedConfig[\"vocab\"],\n heads = parsedConfig[\"heads\"],\n numLayers = parsedConfig[\"num_layers\"],\n hiddenDim = parsedConfig[\"hiddenDim\"],\n interDim = parsedConfig[\"interDim\"];\n\n // console.log(vocab, heads, numLayers, hiddenDim, interDim);\n // let fB = floatBytes;\n // if (quant === 'bnb_int8'){fB = 1;}\n // if (quant === 'bnb_q4'){fB = 0.5;}\n\n const out =\n vocab * hiddenDim * 2 +\n numLayers * 4 * hiddenDim * hiddenDim +\n numLayers * 3 * interDim * hiddenDim;\n // console.log(\"this is out: \", out)\n\n return out;\n}\n\nfunction getGradOptMemory(\n dropdownFullOrNot,\n dropdownOpt,\n dropdownQuant,\n modelSize,\n floatBytes,\n parsedConfig,\n contextLen,\n batchSize = 1\n) {\n const full = dropdownFullOrNot,\n opt = dropdownOpt,\n quant = dropdownQuant;\n console.log(full, opt, quant);\n\n //QLora start\n // console.log(\"full: \", full);\n if (full === \"qlora\" && opt === \"adam_opt\") {\n //Need to check if q4 also takes extra memory\n console.log(\"calculating qlora\");\n return (\n parsedConfig.num_layers * 8 * parsedConfig.hiddenDim * 0.5 * 4 * 3 +\n getExtraMemory(parsedConfig, \"qlora\", contextLen) * batchSize\n );\n }\n if (full === \"qlora\" && opt === \"sgd_opt\") {\n //Need to check if q4 also takes extra memory\n return (\n parsedConfig.num_layers * 8 * parsedConfig.hiddenDim * 0.5 * 4 * 1 +\n getExtraMemory(parsedConfig, \"qlora\", contextLen) * batchSize\n );\n }\n //QLora end\n\n if (full === \"full_trn\" && opt === \"adam_opt\" && quant === \"no_quant\") {\n return modelSize * 3 * floatBytes;\n }\n\n if (full === \"full_trn\" && opt === \"adam_opt\" && quant === \"bnb_int8\") {\n return (\n modelSize * 3 * 1 +\n getExtraMemory(parsedConfig, quant, contextLen) * batchSize\n ); //Some extra mmeory that bnb int8 takes\n }\n\n if (full === \"full_trn\" && opt === \"adam_opt\" && quant === \"bnb_q4\") {\n //Need to check if q4 also takes extra memory\n return (\n modelSize * 3 * 0.5 +\n getExtraMemory(parsedConfig, quant, contextLen) * batchSize\n );\n }\n\n \n //------------\n if (full === \"full_trn\" && opt === \"sgd_opt\" && quant === \"no_quant\") {\n return modelSize * 1 * floatBytes;\n }\n\n if (full === \"full_trn\" && opt === \"sgd_opt\" && quant === \"bnb_int8\") {\n return (\n modelSize * 1 * 1 +\n getExtraMemory(parsedConfig, quant, contextLen) * batchSize\n );\n }\n\n if (full === \"full_trn\" && opt === \"sgd_opt\" && quant === \"bnb_q4\") {\n return (\n modelSize * 1 * 0.5 +\n getExtraMemory(parsedConfig, quant, contextLen) * batchSize\n );\n }\n\n\n //4*layer*8*hid*4*2\n\n //------------\n if (full === \"lora_trn\" && opt === \"adam_opt\" && quant === \"no_quant\") {\n return (\n parsedConfig.num_layers * 8 * parsedConfig.hiddenDim * 2 * 4 * 3 * 2\n );\n }\n\n if (full === \"lora_trn\" && opt === \"adam_opt\" && quant === \"bnb_int8\") {\n console.log(\"here!\");\n return (\n parsedConfig.num_layers * 8 * parsedConfig.hiddenDim * 2 * 4 * 3 +\n getExtraMemory(parsedConfig, quant, contextLen) * batchSize\n );\n }\n\n if (full === \"lora_trn\" && opt === \"adam_opt\" && quant === \"bnb_q4\") {\n return (\n parsedConfig.num_layers * 8 * parsedConfig.hiddenDim * 2 * 4 * 3 +\n getExtraMemory(parsedConfig, quant, contextLen) * batchSize\n );\n }\n\n //------------\n if (full === \"lora_trn\" && opt === \"sgd_opt\" && quant === \"no_quant\") {\n return parsedConfig.num_layers * 8 * parsedConfig.hiddenDim * 2 * 4 * 2;\n }\n\n if (full === \"lora_trn\" && opt === \"sgd_opt\" && quant === \"bnb_int8\") {\n return (\n parsedConfig.num_layers * 8 * parsedConfig.hiddenDim * 2 * 4 * 1 +\n getExtraMemory(parsedConfig, quant, contextLen) * batchSize\n );\n }\n\n if (full === \"lora_trn\" && opt === \"sgd_opt\" && quant === \"bnb_q4\") {\n return (\n parsedConfig.num_layers * 8 * parsedConfig.hiddenDim * 2 * 4 * 1 +\n getExtraMemory(parsedConfig, quant, contextLen) * batchSize\n );\n }\n\n console.log(full, opt, quant);\n throw new Error(\"Invalid combination of values\");\n}\n\nfunction getExtraMemory(parsedConfig, quant, contextLen) {\n const constant_8_extra = 0.75;\n const constant_4_extra = 1.5;\n const constant_qlora = 0.75;\n\n const common =\n (10 * parsedConfig.hiddenDim +\n 5 * parsedConfig.hiddenDim +\n 4 * parsedConfig.interDim +\n 2 * parsedConfig.interDim) *\n parsedConfig.num_layers;\n\n let extra_mem = 0;\n\n if (quant === \"bnb_int8\") {\n extra_mem = constant_8_extra * common * contextLen;\n }\n\n if (quant === \"bnb_q4\") {\n extra_mem = constant_4_extra * common * contextLen;\n \n }\n\n if (quant === \"qlora\") {\n extra_mem = constant_qlora * common * contextLen;\n \n }\n\n console.log(\"extra mem\", extra_mem);\n return extra_mem;\n}\n\nfunction getExtraMemoryOld(parsedConfig, quant) {\n const constant_8_overhead = 200.0,\n constant_8_extra = 350.0;\n const constant_4_overhead = 350.0,\n constant_4_extra = 550.0;\n\n const common =\n (10 * parsedConfig.hiddenDim +\n 5 * parsedConfig.hiddenDim +\n 4 * parsedConfig.interDim +\n 2 * parsedConfig.interDim) *\n parsedConfig.num_layers;\n\n let extra_mem = 0;\n\n if (quant === \"bnb_int8\") {\n extra_mem = constant_8_overhead * common + constant_8_extra * common;\n }\n\n if (quant === \"bnb_q4\") {\n extra_mem = constant_4_overhead * common + constant_4_extra * common;\n }\n\n console.log(\"extra mem\", extra_mem);\n return extra_mem;\n}\n\nfunction getActivationMemory(\n parsedConfig,\n contextLen,\n floatBytes,\n quant,\n dropdownFullOrNot,\n batchSize = 1\n) {\n const heads = parsedConfig[\"heads\"],\n numLayers = parsedConfig[\"num_layers\"],\n hiddenDim = parsedConfig[\"hiddenDim\"],\n interDim = parsedConfig[\"interDim\"];\n\n let fB = floatBytes;\n const len = contextLen;\n\n // if (quant==='bnb_int8'){fB=1;}\n // if (quant==='bnb_q4'){fB=0.5;}\n\n console.log(\"fb\", fB);\n\n console.log(\"activation: \", heads, numLayers, hiddenDim, interDim);\n\n //const attn_per_layer = qkv + qk (transpose) + attn mat + attn mat convert tp fp32 + attn mat divided by sqrt +\n const attn_per_layer =\n len * hiddenDim * 3 * fB +\n len * hiddenDim * 2 * fB +\n len * len * heads * fB +\n len * len * heads * 4 +\n len * len * heads * fB +\n len * hiddenDim * fB +\n len * hiddenDim * fB +\n len * hiddenDim * fB;\n\n // heads*len*len*4 + heads*len*len*fB + 3*hiddenDim*len*fB + hiddenDim*len*fB + hiddenDim*len*fB\n\n const ffn_per_layer =\n hiddenDim * len * fB +\n hiddenDim * len * fB +\n fB * 5 * len * interDim +\n interDim * len * fB;\n\n const norm = len * 4 * 2 + len * hiddenDim * fB * 6;\n\n let lora = 0;\n // if (dropdownFullOrNot==='lora_trn'){\n // lora = (8*len*2 + hiddenDim*len*2)*4;\n // }\n\n const total_per_layer = attn_per_layer + ffn_per_layer + norm + lora;\n console.log(\n \"total per layer: \",\n convertToMB(attn_per_layer),\n convertToMB(ffn_per_layer),\n convertToMB(norm),\n convertToMB(lora)\n );\n\n //total per layer: 4.2724609375 5.55419921875 6.409454345703125 8.02001953125\n let total = total_per_layer * numLayers;\n total = total * batchSize;\n\n console.log(\"this is total: \", total, attn_per_layer + ffn_per_layer);\n\n return total;\n}\n\nfunction checkCombinationTrainInference(\n quantType,\n setErrorMessage,\n openModal,\n typeOfTrn\n ){\n\n //! Can't train full with QLoRA\n if ((typeOfTrn==='full_trn') && ggml_quants.includes(quantType)){\n setErrorMessage(\"Can't use GGML for training\");\n openModal();\n return false;\n }\n if (typeOfTrn===\"qlora\" && quantType!='no_quant'){\n setErrorMessage(\"QLoRA is 4bit explicit. No need to select a quant type if you are training using QLoRA. Set it to 'None'\");\n openModal();\n return false;\n }\n return true;\n \n\n\n}\n\nfunction checkCombinationInference(\n trnType,\n quantType,\n setErrorMessage,\n openModal\n) {\n \n\n if (ggml_quants.includes(quantType)) {\n if (trnType != \"inf_ggml\") {\n setErrorMessage(\n \"Invalid combination of inference type/quantization\"\n );\n openModal();\n return false;\n }\n }\n if (quantType != \"no_quant\" && trnType === \"inf_vLLM\") {\n setErrorMessage(\"vLLm doesn't support quant (maybe)\");\n openModal();\n return false;\n }\n if (\n trnType === \"inf_ggml\" &&\n (quantType === \"bnb_int8\" ||\n quantType === \"bnb_q4\")\n ) {\n setErrorMessage(\"ggml doesn't support bnb\");\n openModal();\n return false;\n }\n if (trnType === \"inf_ggml\" && quantType === \"no_quant\") {\n setErrorMessage(\n \"If you want no quant then pick vLLM/HF inference framework\"\n );\n openModal();\n return false;\n }\n\n if (trnType === \"inf_exL\") {\n setErrorMessage(\"exLlama hasn't been added yet :)\");\n openModal();\n return false;\n }\n return true;\n}\n\nfunction sanityUploadedConfig(jsonUploadedData, setErrorMessage, openModal) {\n function uploadError() {\n setErrorMessage(\n \"upload config doesn't have correct keys. make sure your config has the keys present in https://huggingface.co/codellama/CodeLlama-7b-hf/blob/main/config.json\"\n );\n openModal();\n return null;\n }\n\n if (Object.keys(jsonUploadedData).length === 0) {\n setErrorMessage(\"Uploaded json is empty :)\");\n openModal();\n return null; // JSON is empty\n }\n\n console.log(jsonUploadedData);\n\n let vocab = 0,\n hiddenDim = 0,\n heads = 0,\n interDim = 0,\n num_layers = 0;\n\n if (jsonUploadedData.hasOwnProperty(\"vocab_size\")) {\n vocab = jsonUploadedData[\"vocab_size\"];\n } else {\n uploadError();\n return null;\n }\n\n if (jsonUploadedData.hasOwnProperty(\"hidden_size\")) {\n hiddenDim = jsonUploadedData[\"hidden_size\"];\n } else {\n uploadError();\n return null;\n }\n\n if (jsonUploadedData.hasOwnProperty(\"num_attention_heads\")) {\n heads = jsonUploadedData[\"num_attention_heads\"];\n } else {\n uploadError();\n return null;\n }\n\n if (jsonUploadedData.hasOwnProperty(\"intermediate_size\")) {\n interDim = jsonUploadedData[\"intermediate_size\"];\n } else {\n uploadError();\n return null;\n }\n\n if (jsonUploadedData.hasOwnProperty(\"num_hidden_layers\")) {\n num_layers = jsonUploadedData[\"num_hidden_layers\"];\n } else {\n uploadError();\n return null;\n }\n\n return {\n vocab: vocab,\n hiddenDim: hiddenDim,\n heads: heads,\n interDim: interDim,\n num_layers: num_layers,\n };\n}\n\nfunction getParseConfig(parsedJSONData, setErrorMessage, openModal) {\n console.log(Object.keys(parsedJSONData).length);\n if (Object.keys(parsedJSONData).length == 0) {\n setErrorMessage(\n \"Huggingface config of this id doesn't have correct keys. e.g. this is a ggml model. Please upload your config in correct format\"\n );\n openModal();\n return null;\n }\n\n const vocab = getKey([\"vocab_size\"], parsedJSONData, 32000);\n const hiddenDim = getKey(\n [\"hidden_size\", \"d_model\", \"n_embd\"],\n parsedJSONData,\n 768\n );\n const heads = getKey(\n [\"num_attention_heads\", \"num_heads\", \"n_head\"],\n parsedJSONData,\n 12\n );\n const interDim = getKey(\n [\"intermediate_size\", \"n_inner\", \"d_ff\"],\n parsedJSONData,\n hiddenDim * 4\n );\n const num_layers = getKey(\n [\"num_layers\", \"num_hidden_layers\", \"n_layer\"],\n parsedJSONData,\n 12\n );\n\n return {\n vocab: vocab,\n hiddenDim: hiddenDim,\n heads: heads,\n interDim: interDim,\n num_layers: num_layers,\n };\n}\n\nfunction getDefault(modelSize) {\n //If only model size is provided. Guess the values\n let vocab = null;\n let heads = null;\n let numLayers = null;\n\n function getApprox(modelSize){\n let vocabR=null, headsR=null, numLayersR = null\n if (modelSize<5){\n vocabR = 32000;\n headsR = 32;\n numLayersR = 24;\n return [vocabR, headsR, numLayersR] \n }\n if (modelSize<10){\n vocabR = 32000;\n headsR = 32;\n numLayersR = 32;\n return [vocabR, headsR, numLayersR] \n }\n if (modelSize<24){\n vocabR = 32000;\n headsR = 40;\n numLayersR = 40;\n return [vocabR, headsR, numLayersR] \n }\n\n if (modelSize<55){\n vocabR = 32000;\n headsR = 64;\n numLayersR = 48;\n return [vocabR, headsR, numLayersR] \n }\n\n \n vocabR = 32000;\n headsR = 64;\n numLayersR = 80;\n return [vocabR, headsR, numLayersR]; \n }\n\n \n\n [vocab,heads,numLayers] = getApprox(modelSize);\n \n\n //vocab*h + numLayers*4*h*h + 3*4*h*h*numLayers = modelSize*10^9\n const A = numLayers * 4 + 3 * 4 * numLayers;\n const B = 2*vocab;\n const C = -1 * modelSize * billion;\n\n let h = (-B + Math.sqrt(B * B - 4 * A * C)) / (2 * A);\n h = Math.ceil(h);\n\n return {\n vocab: vocab,\n hiddenDim: h,\n heads: heads,\n interDim: 4 * h,\n num_layers: numLayers,\n };\n}\n\nfunction convertToMB(value) {\n return value / (1024 * 1024);\n}\n\nfunction convertToMBModelSize(value, quant, typeOfTrn) {\n let extra = 0;\n let fB = 2;\n let size = (value * fB) / (1024 * 1024);\n if (quant === \"bnb_int8\" || quant === \"bnb_q4\" || typeOfTrn === \"qlora\") {\n extra = 0.06 * size;\n }\n\n if (quant === \"bnb_int8\") {\n size = size / 2;\n }\n if (quant === \"bnb_q4\") {\n size = size / 4;\n }\n\n if (typeOfTrn === \"qlora\") {\n size = size / 4 - (value * 2) / (64 * 1024 * 1024);\n }\n\n return size + extra;\n}\n\nfunction convertToBytes(floatType) {\n return 2.0;\n}\n\nfunction getAllComputedData(\n parsedJSONData,\n jsonUploadedData,\n modelSize,\n contextLen,\n floatType,\n selections,\n setErrorMessage,\n openModal,\n batchSize\n) {\n let parsedConfig = null,\n modelSizeinB = null;\n let activationMemory = 0,\n gradAndOptMemory = 0;\n let inferenceMemory = 0;\n let totalMemory = 0;\n const floatBytes = convertToBytes(floatType);\n const quantType = selections.dropdownQuant;\n const trnType = selections.dropdownTrnOrNot;\n const typeOfTrn = selections.dropdownFullOrNot;\n\n //trnType should be trnOrNot\n\n if (batchSize === \"\") {\n batchSize = \"1\";\n }\n\n let overHead = 650;\n if (!isValidPositiveInteger(contextLen)) {\n setErrorMessage(\n \"Context len can't be blank or have non numeric or negative/zero values.\"\n );\n openModal();\n return null;\n }\n\n if (!isValidPositiveInteger(batchSize)) {\n setErrorMessage(\n \"Batch size cant have non numeric or negative/zero values\"\n );\n openModal();\n return null;\n }\n\n if (parsedJSONData == null) {\n if (jsonUploadedData != null) {\n parsedConfig = sanityUploadedConfig(\n jsonUploadedData,\n setErrorMessage,\n openModal\n );\n console.log(parsedConfig, \"uploaded\");\n if (parsedConfig == null) {\n return null;\n }\n modelSizeinB = computeModelSize(parsedConfig);\n } else {\n if (!isNumberOrFloat(modelSize)) {\n console.log(\"error with model size\");\n setErrorMessage(\n \"Hugginface model id not available, enter model size(>0) or upload config\"\n );\n openModal();\n return null;\n }\n\n parsedConfig = getDefault(modelSize);\n modelSizeinB = modelSize * billion;\n }\n } else {\n parsedConfig = getParseConfig(\n parsedJSONData,\n setErrorMessage,\n openModal\n );\n if (parsedConfig == null) {\n return null;\n }\n console.log(parsedConfig);\n modelSizeinB = computeModelSize(parsedConfig);\n }\n\n let fB = floatBytes;\n if (quantType === \"bnb_int8\") {\n fB = 1;\n }\n if (quantType === \"bnb_q4\" || typeOfTrn === \"qlora\") {\n fB = 0.5;\n }\n let modelSizeinMB = convertToMBModelSize(modelSizeinB, quantType, typeOfTrn);\n // console.log(modelSizeinB);\n\n //!Inference\n if (trnType != \"trn\") {\n let checkSanity = checkCombinationInference(\n trnType,\n quantType,\n setErrorMessage,\n openModal\n );\n if (!checkSanity) {\n return null;\n }\n\n if (trnType === \"inf\" || trnType === \"inf_vLLM\") {\n let fB = 2;\n //If bnb quant\n if (quantType === \"bnb_int8\") {\n fB = 1;\n }\n if (quantType === \"bnb_q4\" || typeOfTrn === \"qlora\") {\n fB = 0.5;\n }\n\n inferenceMemory = convertToMB(\n 2 *\n contextLen *\n 2 *\n 2 *\n parsedConfig[\"hiddenDim\"] *\n parsedConfig[\"num_layers\"]\n );\n\n activationMemory = computeInferenceOnlyActivationMemory(\n contextLen,\n parsedConfig\n );\n\n console.log(\n \"HERE!!!\",\n inferenceMemory,\n modelSizeinMB,\n overHead,\n activationMemory\n );\n }\n if (trnType === \"inf_ggml\") {\n modelSizeinMB = computeModelSizeGGML(parsedConfig, quantType);\n inferenceMemory = convertToMB(\n 1 *\n contextLen *\n 2 *\n 2 *\n parsedConfig[\"hiddenDim\"] *\n parsedConfig[\"num_layers\"]\n );\n activationMemory = computeInferenceOnlyActivationMemory(\n contextLen,\n parsedConfig\n );\n overHead = overHead + computeOverheadGGML(contextLen);\n }\n\n totalMemory =\n inferenceMemory + modelSizeinMB + overHead + activationMemory;\n } else {\n\n // console.log(\"training!\");\n\n let checkSanity = checkCombinationTrainInference(quantType, setErrorMessage, openModal, typeOfTrn);\n if (!checkSanity) {\n return null;\n }\n //! Train\n activationMemory = getActivationMemory(\n parsedConfig,\n contextLen,\n floatBytes,\n quantType,\n typeOfTrn,\n batchSize\n );\n\n activationMemory = convertToMB(activationMemory);\n console.log(\"got activation\", activationMemory);\n\n gradAndOptMemory = getGradOptMemory(\n typeOfTrn,\n selections.dropdownOpt,\n quantType,\n modelSizeinB,\n floatBytes,\n parsedConfig,\n contextLen,\n batchSize\n );\n\n // console.log(\"got gradOpt\", gradAndOptMemory);\n\n gradAndOptMemory = convertToMB(gradAndOptMemory);\n totalMemory = modelSizeinMB + gradAndOptMemory + activationMemory;\n\n console.log(\"got total\", totalMemory);\n\n totalMemory = totalMemory + overHead;\n }\n\n return {\n Total: Math.ceil(totalMemory),\n \"KV Cache\": Math.ceil(inferenceMemory),\n \"Model Size\": Math.ceil(modelSizeinMB),\n \"Activation Memory\": Math.ceil(activationMemory),\n \"Grad & Optimizer memory\": Math.ceil(gradAndOptMemory),\n \"cuda + other overhead\": overHead,\n };\n}\n\n///Users/rahulchand/gpu_mem/public/all_configs.json\nasync function fetchParams(name) {\n // let output = fetch('https://huggingface.co/meta-llama/Llama-2-7b/raw/main/params.json');\n\n let response = await fetch(configPath);\n response = await response.json();\n // console.log(response.hasOwnProperty(name));\n\n return response.hasOwnProperty(name) ? response[name] : null;\n}\n\n// function isNumberOrFloat(value) {\n// return /^-?\\d+(\\.\\d+)?$/.test(value);\n// }\n\nfunction isNumberOrFloat(value) {\n const num = parseFloat(value);\n return !isNaN(num) && num > 0;\n}\n\nfunction isValidPositiveInteger(input) {\n const num = parseFloat(input);\n console.log(num, input);\n return Number.isInteger(num) && num > 0 && input.trim() !== \"\";\n}\n\nfunction App() {\n // let subtitle;\n const [modelSize, setModelSize] = useState(\"\");\n const [modelName, setModelName] = useState(\"\");\n const [contextLen, setContextLen] = useState(\"\");\n const [batchSize, setBatchSize] = useState(\"\");\n const [totalMemoryShown, setTotalMemoryShown] = useState(\" \");\n const [breakDownMemory, setBreakDownMemory] = useState(\" \");\n const [errorMessage, setErrorMessage] = useState(\"\");\n\n const [fileNameUpload, setFileNameUpload] = useState(\"\");\n\n const [modalIsOpen, setIsOpen] = React.useState(false);\n\n const [jsonData, setJsonData] = useState(null);\n\n function openModal() {\n setIsOpen(true);\n }\n\n function closeModal() {\n setIsOpen(false);\n }\n\n const handleFileClear = (event) => {\n setFileNameUpload(\"\");\n setJsonData(null);\n setTotalMemoryShown(\"\");\n setBreakDownMemory(\"\");\n };\n\n const handleFileChange = (event) => {\n const file = event.target.files[0];\n if (file) {\n // Check file size\n if (file.size > MAX_FILE_SIZE) {\n alert(\"File is too large. Please upload a smaller JSON file.\");\n return;\n }\n\n const reader = new FileReader();\n reader.onload = (e) => {\n try {\n const json = JSON.parse(e.target.result);\n setJsonData(json);\n event.target.value = null;\n } catch (error) {\n console.error(\"Error parsing JSON:\", error);\n alert(\"Invalid JSON file.\");\n }\n };\n setFileNameUpload(file.name);\n reader.readAsText(file);\n console.log(jsonData);\n }\n };\n\n const [selections, setSelections] = useState({\n dropdownTrnOrNot: \"inf\",\n dropdownFullOrNot: \"full_trn\",\n dropdownOpt: \"adam_opt\",\n dropdownQuant: \"no_quant\",\n dropdownGPU: \"rtx_3090\",\n });\n\n const handleChangeSelection = (e) => {\n const { name, value } = e.target;\n setSelections((prevState) => ({\n ...prevState,\n [name]: value,\n }));\n };\n\n // const handleChangeInText1 = (event) => {\n // setModelSize(event.target.value);\n // };\n\n const [output1, setOutput1] = useState(\"\");\n\n async function handleClickTokS() {\n setErrorMessage(\"To be added\");\n openModal();\n return;\n }\n\n async function handleReset() {\n setFileNameUpload(\"\");\n setJsonData(null);\n setTotalMemoryShown(\"\");\n setBreakDownMemory(\"\");\n setContextLen(\"\");\n setBatchSize(\"\");\n setModelSize(\"\");\n setModelName(\"\");\n }\n\n async function handleClick() {\n if (modelName.includes(\"GGML\") || modelName.includes(\"GGUF\")) {\n setErrorMessage(\n \"If you want info about GGML/GGUF models then enter the normal name & select GGML inference & quant type below. For example, if you want info about llama-2-7b.Q3_K_L.gguf then enter meta-llama/Llama-2-7b in the model name\"\n );\n openModal();\n return;\n }\n let parsedConfig = await fetchParams(specialMapping(modelName));\n const out = getAllComputedData(\n parsedConfig,\n jsonData,\n modelSize,\n contextLen,\n 2,\n selections,\n setErrorMessage,\n openModal,\n batchSize\n );\n\n if (out == null) {\n return;\n }\n\n setTotalMemoryShown(`Total Memory: ${out[\"Total\"]} MB`);\n const jsonOut = JSON.stringify(out);\n setBreakDownMemory(`Breakdown(in MB): ${jsonOut}`);\n }\n\n // const handleClick = () => {\n\n // const trnVal = selections.dropdownTrnOrNot;\n // let totalMemory = 0;\n // let size = parseFloat(modelSize);\n // if (trnVal==='trn'){\n\n // }\n\n // console.log(modelSize);\n // console.log(isNumberOrFloat(modelSize));\n\n // // console.log(\"clicking\");\n // // setOutput1(selections.dropdownTrnOrNot + ' ' + selections.dropdownFullOrNot);\n\n // // console.log()\n\n // };\n\n return (\n OR\n {suggestions.map((item, index) => (\n
\n
\n\n
\n {/* */}\n
\n
\n
\n
\n\n
\n\n
\n {/* */}\n
\n
\n
\n
\n\n