From 669f1d052c996a6b6c12bd146e15be30edb9be9d Mon Sep 17 00:00:00 2001
From: NanoCode012 <kevinvong@rocketmail.com>
Date: Sat, 7 Oct 2023 01:33:43 +0900
Subject: [PATCH] Fix: Higher vram usage for mistral and sample_packing (#691)

* Fix: Higher vram usage for mistral and sample_packing

* chore: update comment

* chore: lint
---
 examples/mistral/qlora.yml  | 8 ++++----
 src/axolotl/utils/models.py | 3 ++-
 2 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/examples/mistral/qlora.yml b/examples/mistral/qlora.yml
index 5a131c5f3..9c64a8c2d 100644
--- a/examples/mistral/qlora.yml
+++ b/examples/mistral/qlora.yml
@@ -36,10 +36,10 @@ lora_target_modules:
   - k_proj
   - o_proj
 
-wandb_project: 
-wandb_entity: 
+wandb_project:
+wandb_entity:
 wandb_watch:
-wandb_run_id: 
+wandb_run_id:
 wandb_log_model:
 
 gradient_accumulation_steps: 4
@@ -76,4 +76,4 @@ fsdp_config:
 special_tokens:
   bos_token: "<s>"
   eos_token: "</s>"
-  unk_token: "<unk>"
\ No newline at end of file
+  unk_token: "<unk>"
diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py
index aa6049bd3..2c60f00c2 100644
--- a/src/axolotl/utils/models.py
+++ b/src/axolotl/utils/models.py
@@ -81,7 +81,8 @@ def load_tokenizer(cfg):
         tokenizer.add_special_tokens({"pad_token": "[PAD]"})
         os.environ["TOKENIZERS_PARALLELISM"] = "false"
 
-    if cfg.is_mistral_derived_model:
+    # Mistral's official FA implementation requires left padding
+    if cfg.is_mistral_derived_model and cfg.flash_attention and not cfg.sample_packing:
         tokenizer.padding_side = "left"
 
     if cfg.special_tokens: