From 66215433495521fc02174b6ae849b9d0c69ff2da Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Sat, 22 Jul 2023 01:52:38 -0400 Subject: [PATCH] don't resize embeddings to multiples of 32x by default --- README.md | 3 +++ src/axolotl/utils/models.py | 6 +++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 2e94d3230..4a5ec155c 100644 --- a/README.md +++ b/README.md @@ -322,6 +322,9 @@ tokenizer_type: AutoTokenizer trust_remote_code: # use_fast option for tokenizer loading from_pretrained, default to True tokenizer_use_fast: +# resize the model embeddings when new tokens are added to multiples of 32 +# this is reported to improve training speed on some models +resize_token_embeddings_to_32x: # whether you are training a 4-bit GPTQ quantized model gptq: true diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py index a88a3807e..afe1632ea 100644 --- a/src/axolotl/utils/models.py +++ b/src/axolotl/utils/models.py @@ -301,7 +301,11 @@ def load_model( **model_kwargs, ) - embeddings_len = math.ceil(len(tokenizer) / 32) * 32 + embeddings_len = ( + math.ceil(len(tokenizer) / 32) * 32 + if cfg.resize_token_embeddings_to_32x + else len(tokenizer) + ) model.resize_token_embeddings(embeddings_len) if (