From 66215433495521fc02174b6ae849b9d0c69ff2da Mon Sep 17 00:00:00 2001
From: Wing Lian <wing.lian@gmail.com>
Date: Sat, 22 Jul 2023 01:52:38 -0400
Subject: [PATCH] don't resize embeddings to multiples of 32x by default

---
 README.md                   | 3 +++
 src/axolotl/utils/models.py | 6 +++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 2e94d3230..4a5ec155c 100644
--- a/README.md
+++ b/README.md
@@ -322,6 +322,9 @@ tokenizer_type: AutoTokenizer
 trust_remote_code:
 # use_fast option for tokenizer loading from_pretrained, default to True
 tokenizer_use_fast:
+# resize the model embeddings when new tokens are added to multiples of 32
+# this is reported to improve training speed on some models
+resize_token_embeddings_to_32x:
 
 # whether you are training a 4-bit GPTQ quantized model
 gptq: true
diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py
index a88a3807e..afe1632ea 100644
--- a/src/axolotl/utils/models.py
+++ b/src/axolotl/utils/models.py
@@ -301,7 +301,11 @@ def load_model(
             **model_kwargs,
         )
 
-    embeddings_len = math.ceil(len(tokenizer) / 32) * 32
+    embeddings_len = (
+        math.ceil(len(tokenizer) / 32) * 32
+        if cfg.resize_token_embeddings_to_32x
+        else len(tokenizer)
+    )
     model.resize_token_embeddings(embeddings_len)
 
     if (