Merge pull request axolotl-ai-cloud#313 from OpenAccess-AI-Collective…

…/tokenizer-llama2-embeddings don't resize embeddings to multiples of 32x by default
mkeoliya · Jul 22, 2023 · 3539732 · 3539732
2 parents 96d89e3 + 6621543
commit 3539732
Show file tree

Hide file tree

Showing 2 changed files with 8 additions and 1 deletion.
diff --git a/README.md b/README.md
@@ -322,6 +322,9 @@ tokenizer_type: AutoTokenizer
 trust_remote_code:
 # use_fast option for tokenizer loading from_pretrained, default to True
 tokenizer_use_fast:
+# resize the model embeddings when new tokens are added to multiples of 32
+# this is reported to improve training speed on some models
+resize_token_embeddings_to_32x:
 
 # whether you are training a 4-bit GPTQ quantized model
 gptq: true

diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py
@@ -301,7 +301,11 @@ def load_model(
  **model_kwargs,
  )
 
- embeddings_len = math.ceil(len(tokenizer) / 32) * 32
+ embeddings_len = (
+ math.ceil(len(tokenizer) / 32) * 32
+ if cfg.resize_token_embeddings_to_32x
+ else len(tokenizer)
+ )
  model.resize_token_embeddings(embeddings_len)
 
  if (