diff --git a/python/mlc_llm/model/model_preset.py b/python/mlc_llm/model/model_preset.py index 22a96da010..90fe11d60a 100644 --- a/python/mlc_llm/model/model_preset.py +++ b/python/mlc_llm/model/model_preset.py @@ -636,51 +636,99 @@ "use_cache": True, "vocab_size": 103168, }, - # TODO(mlc-team): enable the model presets when stabilized. - # "gemma_2b": { - # "architectures": ["GemmaForCausalLM"], - # "attention_bias": False, - # "bos_token_id": 2, - # "eos_token_id": 1, - # "head_dim": 256, - # "hidden_act": "gelu", - # "hidden_size": 2048, - # "initializer_range": 0.02, - # "intermediate_size": 16384, - # "max_position_embeddings": 8192, - # "model_type": "gemma", - # "num_attention_heads": 8, - # "num_hidden_layers": 18, - # "num_key_value_heads": 1, - # "pad_token_id": 0, - # "rms_norm_eps": 1e-06, - # "rope_theta": 10000.0, - # "torch_dtype": "bfloat16", - # "transformers_version": "4.38.0.dev0", - # "vocab_size": 256000, - # }, - # "gemma_7b": { - # "architectures": ["GemmaForCausalLM"], - # "attention_bias": False, - # "bos_token_id": 2, - # "eos_token_id": 1, - # "head_dim": 256, - # "hidden_act": "gelu", - # "hidden_size": 3072, - # "initializer_range": 0.02, - # "intermediate_size": 24576, - # "max_position_embeddings": 8192, - # "model_type": "gemma", - # "num_attention_heads": 16, - # "num_hidden_layers": 28, - # "num_key_value_heads": 16, - # "pad_token_id": 0, - # "rms_norm_eps": 1e-06, - # "rope_theta": 10000.0, - # "torch_dtype": "bfloat16", - # "transformers_version": "4.38.0.dev0", - # "vocab_size": 256000, - # }, + "gemma2_2b": { + "architectures": ["Gemma2ForCausalLM"], + "attention_bias": False, + "attention_dropout": 0.0, + "attn_logit_softcapping": 50.0, + "bos_token_id": 2, + "cache_implementation": "hybrid", + "eos_token_id": [1, 107], + "final_logit_softcapping": 30.0, + "head_dim": 256, + "hidden_act": "gelu_pytorch_tanh", + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 2304, + "initializer_range": 0.02, + "intermediate_size": 9216, + "max_position_embeddings": 8192, + "model_type": "gemma2", + "num_attention_heads": 8, + "num_hidden_layers": 26, + "num_key_value_heads": 4, + "pad_token_id": 0, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_theta": 10000.0, + "sliding_window": 4096, + "torch_dtype": "bfloat16", + "transformers_version": "4.42.4", + "use_cache": True, + "vocab_size": 256000, + }, + "gemma2_9b": { + "architectures": ["Gemma2ForCausalLM"], + "attention_bias": False, + "attention_dropout": 0.0, + "attn_logit_softcapping": 50.0, + "bos_token_id": 2, + "cache_implementation": "hybrid", + "eos_token_id": 1, + "final_logit_softcapping": 30.0, + "head_dim": 256, + "hidden_act": "gelu_pytorch_tanh", + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 3584, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "model_type": "gemma2", + "num_attention_heads": 16, + "num_hidden_layers": 42, + "num_key_value_heads": 8, + "pad_token_id": 0, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_theta": 10000.0, + "sliding_window": 4096, + "sliding_window_size": 4096, + "torch_dtype": "bfloat16", + "transformers_version": "4.42.0.dev0", + "use_cache": True, + "vocab_size": 256000, + }, + "gemma2_27b": { + "architectures": ["Gemma2ForCausalLM"], + "attention_bias": False, + "attention_dropout": 0.0, + "attn_logit_softcapping": 50.0, + "bos_token_id": 2, + "cache_implementation": "hybrid", + "eos_token_id": 1, + "final_logit_softcapping": 30.0, + "head_dim": 128, + "hidden_act": "gelu_pytorch_tanh", + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 4608, + "initializer_range": 0.02, + "intermediate_size": 36864, + "max_position_embeddings": 8192, + "model_type": "gemma2", + "num_attention_heads": 32, + "num_hidden_layers": 46, + "num_key_value_heads": 16, + "pad_token_id": 0, + "query_pre_attn_scalar": 144, + "rms_norm_eps": 1e-06, + "rope_theta": 10000.0, + "sliding_window": 4096, + "sliding_window_size": 4096, + "torch_dtype": "bfloat16", + "transformers_version": "4.42.0.dev0", + "use_cache": True, + "vocab_size": 256000, + "_attn_implementation": "eager", + }, "rwkv5_3b": { "architectures": ["RwkvForCausalLM"], "auto_map": {