From a67c7d0ff4a33fbf375567a7a2ce389899357a75 Mon Sep 17 00:00:00 2001
From: Wing Lian <wing.lian@gmail.com>
Date: Tue, 9 Jan 2024 10:57:59 -0500
Subject: [PATCH 1/9] attempt to also run e2e tests that needs gpus

---
 .github/workflows/tests-docker.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/tests-docker.yml b/.github/workflows/tests-docker.yml
index 380729637..bee483b0f 100644
--- a/.github/workflows/tests-docker.yml
+++ b/.github/workflows/tests-docker.yml
@@ -43,4 +43,4 @@ jobs:
             --tag test-axolotl
       - name: Unit Tests w docker image
         run: |
-          docker run --rm test-axolotl pytest --ignore=tests/e2e/ /workspace/axolotl/tests/
+          docker run --privileged --gpus '"all" --rm test-axolotl pytest /workspace/axolotl/tests/

From 94639383aa5e4bb4e91d04ebc67b983775791ca9 Mon Sep 17 00:00:00 2001
From: Wing Lian <wing.lian@gmail.com>
Date: Tue, 9 Jan 2024 11:02:20 -0500
Subject: [PATCH 2/9] fix stray quote

---
 .github/workflows/tests-docker.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/tests-docker.yml b/.github/workflows/tests-docker.yml
index bee483b0f..699a4cf10 100644
--- a/.github/workflows/tests-docker.yml
+++ b/.github/workflows/tests-docker.yml
@@ -43,4 +43,4 @@ jobs:
             --tag test-axolotl
       - name: Unit Tests w docker image
         run: |
-          docker run --privileged --gpus '"all" --rm test-axolotl pytest /workspace/axolotl/tests/
+          docker run --privileged --gpus "all" --rm test-axolotl pytest /workspace/axolotl/tests/

From a811e6cd29012fcd643996c09e0bb9acf651e093 Mon Sep 17 00:00:00 2001
From: Wing Lian <wing.lian@gmail.com>
Date: Tue, 9 Jan 2024 15:19:06 -0500
Subject: [PATCH 3/9] checkout specific github ref

---
 .github/workflows/tests-docker.yml | 1 +
 docker/Dockerfile                  | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/tests-docker.yml b/.github/workflows/tests-docker.yml
index 699a4cf10..988244335 100644
--- a/.github/workflows/tests-docker.yml
+++ b/.github/workflows/tests-docker.yml
@@ -39,6 +39,7 @@ jobs:
             --file ./docker/Dockerfile \
             --build-arg BASE_TAG=$BASE_TAG \
             --build-arg CUDA=$CUDA \
+            --build-arg GITHUB_REF=$GITHUB_REF \
             --build-arg PYTORCH_VERSION=$PYTORCH_VERSION \
             --tag test-axolotl
       - name: Unit Tests w docker image
diff --git a/docker/Dockerfile b/docker/Dockerfile
index f8e052856..267f8418f 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -6,6 +6,7 @@ ARG AXOLOTL_EXTRAS=""
 ARG CUDA="118"
 ENV BNB_CUDA_VERSION=$CUDA
 ARG PYTORCH_VERSION="2.0.1"
+ARG GITHUB_REF="main"
 
 ENV PYTORCH_VERSION=$PYTORCH_VERSION
 
@@ -14,7 +15,7 @@ RUN apt-get update && \
 
 WORKDIR /workspace
 
-RUN git clone --depth=1 https://github.com/OpenAccess-AI-Collective/axolotl.git
+RUN git clone --depth=1 -b $GITHUB_REF https://github.com/OpenAccess-AI-Collective/axolotl.git
 
 WORKDIR /workspace/axolotl
 

From bd57059d68432c5b4179c1e4b4f5df3a453601f5 Mon Sep 17 00:00:00 2001
From: Wing Lian <wing.lian@gmail.com>
Date: Tue, 9 Jan 2024 15:36:40 -0500
Subject: [PATCH 4/9] dockerfile for tests with proper checkout

ensure wandb is dissabled for docker pytests
clear wandb env after testing
clear wandb env after testing
make sure to provide a default val for pop
tryin skipping wandb validation tests
explicitly disable wandb in the e2e tests
explicitly report_to None to see if that fixes the docker e2e tests
split gpu from non-gpu unit tests
skip bf16 check in test for now
build docker w/o cache since it uses branch name ref
revert some changes now that caching is fixed
skip bf16 check if on gpu w support
---
 .github/workflows/tests-docker.yml  | 10 +++++---
 docker/Dockerfile                   |  3 +--
 docker/Dockerfile-tests             | 40 +++++++++++++++++++++++++++++
 src/axolotl/core/trainer_builder.py |  2 +-
 tests/e2e/test_lora_llama.py        |  8 +++++-
 tests/e2e/test_phi.py               | 12 +++++++--
 tests/test_validation.py            | 17 ++++++++++++
 7 files changed, 83 insertions(+), 9 deletions(-)
 create mode 100644 docker/Dockerfile-tests

diff --git a/.github/workflows/tests-docker.yml b/.github/workflows/tests-docker.yml
index 988244335..935dbf76f 100644
--- a/.github/workflows/tests-docker.yml
+++ b/.github/workflows/tests-docker.yml
@@ -36,12 +36,16 @@ jobs:
           PYTORCH_VERSION="${{ matrix.pytorch }}"
           # Build the Docker image
           docker build . \
-            --file ./docker/Dockerfile \
+            --file ./docker/Dockerfile-tests \
             --build-arg BASE_TAG=$BASE_TAG \
             --build-arg CUDA=$CUDA \
             --build-arg GITHUB_REF=$GITHUB_REF \
             --build-arg PYTORCH_VERSION=$PYTORCH_VERSION \
-            --tag test-axolotl
+            --tag test-axolotl \
+            --no-cache
       - name: Unit Tests w docker image
         run: |
-          docker run --privileged --gpus "all" --rm test-axolotl pytest /workspace/axolotl/tests/
+          docker run --rm test-axolotl pytest --ignore=tests/e2e/ /workspace/axolotl/tests/
+      - name: GPU Unit Tests w docker image
+        run: |
+          docker run --privileged --gpus "all" --env WANDB_DISABLED=true --rm test-axolotl pytest /workspace/axolotl/tests/e2e/
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 267f8418f..f8e052856 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -6,7 +6,6 @@ ARG AXOLOTL_EXTRAS=""
 ARG CUDA="118"
 ENV BNB_CUDA_VERSION=$CUDA
 ARG PYTORCH_VERSION="2.0.1"
-ARG GITHUB_REF="main"
 
 ENV PYTORCH_VERSION=$PYTORCH_VERSION
 
@@ -15,7 +14,7 @@ RUN apt-get update && \
 
 WORKDIR /workspace
 
-RUN git clone --depth=1 -b $GITHUB_REF https://github.com/OpenAccess-AI-Collective/axolotl.git
+RUN git clone --depth=1 https://github.com/OpenAccess-AI-Collective/axolotl.git
 
 WORKDIR /workspace/axolotl
 
diff --git a/docker/Dockerfile-tests b/docker/Dockerfile-tests
new file mode 100644
index 000000000..2ec94f868
--- /dev/null
+++ b/docker/Dockerfile-tests
@@ -0,0 +1,40 @@
+ARG BASE_TAG=main-base
+FROM winglian/axolotl-base:$BASE_TAG
+
+ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6+PTX"
+ARG AXOLOTL_EXTRAS=""
+ARG CUDA="118"
+ENV BNB_CUDA_VERSION=$CUDA
+ARG PYTORCH_VERSION="2.0.1"
+ARG GITHUB_REF="main"
+
+ENV PYTORCH_VERSION=$PYTORCH_VERSION
+
+RUN apt-get update && \
+    apt-get install -y --allow-change-held-packages vim curl nano libnccl2 libnccl-dev
+
+WORKDIR /workspace
+
+RUN git clone --depth=1 https://github.com/OpenAccess-AI-Collective/axolotl.git
+
+WORKDIR /workspace/axolotl
+
+RUN git fetch origin +$GITHUB_REF && \
+    git checkout FETCH_HEAD
+
+# If AXOLOTL_EXTRAS is set, append it in brackets
+RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
+        pip install -e .[deepspeed,flash-attn,mamba-ssm,$AXOLOTL_EXTRAS]; \
+    else \
+        pip install -e .[deepspeed,flash-attn,mamba-ssm]; \
+    fi
+
+# So we can test the Docker image
+RUN pip install pytest
+
+# fix so that git fetch/pull from remote works
+RUN git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*" && \
+    git config --get remote.origin.fetch
+
+# helper for huggingface-login cli
+RUN git config --global credential.helper store
diff --git a/src/axolotl/core/trainer_builder.py b/src/axolotl/core/trainer_builder.py
index 7798ca455..883d1d1e8 100644
--- a/src/axolotl/core/trainer_builder.py
+++ b/src/axolotl/core/trainer_builder.py
@@ -752,7 +752,7 @@ def build(self, total_num_steps):
             report_to = "wandb"
         if self.cfg.use_mlflow:
             report_to = "mlflow"
-        training_arguments_kwargs["report_to"] = report_to
+        training_arguments_kwargs["report_to"] = None
         training_arguments_kwargs["run_name"] = (
             self.cfg.wandb_name if self.cfg.use_wandb else None
         )
diff --git a/tests/e2e/test_lora_llama.py b/tests/e2e/test_lora_llama.py
index 9d795601a..dc9e31c5e 100644
--- a/tests/e2e/test_lora_llama.py
+++ b/tests/e2e/test_lora_llama.py
@@ -7,6 +7,8 @@
 import unittest
 from pathlib import Path
 
+from transformers.utils import is_torch_bf16_gpu_available
+
 from axolotl.cli import load_datasets
 from axolotl.common.cli import TrainerCliArgs
 from axolotl.train import train
@@ -101,9 +103,13 @@ def test_lora_packing(self, temp_dir):
                 "learning_rate": 0.00001,
                 "optimizer": "adamw_torch",
                 "lr_scheduler": "cosine",
-                "bf16": True,
             }
         )
+        if is_torch_bf16_gpu_available():
+            cfg.bf16 = True
+        else:
+            cfg.fp16 = True
+
         normalize_config(cfg)
         cli_args = TrainerCliArgs()
         dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
diff --git a/tests/e2e/test_phi.py b/tests/e2e/test_phi.py
index b21fc14ff..80c748cc9 100644
--- a/tests/e2e/test_phi.py
+++ b/tests/e2e/test_phi.py
@@ -8,6 +8,7 @@
 from pathlib import Path
 
 import pytest
+from transformers.utils import is_torch_bf16_gpu_available
 
 from axolotl.cli import load_datasets
 from axolotl.common.cli import TrainerCliArgs
@@ -59,7 +60,6 @@ def test_phi2_ft(self, temp_dir):
                 "learning_rate": 0.00001,
                 "optimizer": "paged_adamw_8bit",
                 "lr_scheduler": "cosine",
-                "bf16": True,
                 "flash_attention": True,
                 "max_steps": 10,
                 "save_steps": 10,
@@ -67,6 +67,10 @@ def test_phi2_ft(self, temp_dir):
                 "save_safetensors": True,
             }
         )
+        if is_torch_bf16_gpu_available():
+            cfg.bf16 = True
+        else:
+            cfg.fp16 = True
         normalize_config(cfg)
         cli_args = TrainerCliArgs()
         dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
@@ -110,9 +114,13 @@ def test_ft_packed(self, temp_dir):
                 "learning_rate": 0.00001,
                 "optimizer": "adamw_bnb_8bit",
                 "lr_scheduler": "cosine",
-                "bf16": True,
             }
         )
+        if is_torch_bf16_gpu_available():
+            cfg.bf16 = True
+        else:
+            cfg.fp16 = True
+
         normalize_config(cfg)
         cli_args = TrainerCliArgs()
         dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
diff --git a/tests/test_validation.py b/tests/test_validation.py
index 12997b023..d2518a7df 100644
--- a/tests/test_validation.py
+++ b/tests/test_validation.py
@@ -6,6 +6,7 @@
 from typing import Optional
 
 import pytest
+from transformers.utils import is_torch_bf16_gpu_available
 
 from axolotl.utils.config import validate_config
 from axolotl.utils.dict import DictDefault
@@ -354,6 +355,10 @@ def test_packing(self):
         with pytest.raises(ValueError, match=regex_exp):
             validate_config(cfg)
 
+    @pytest.mark.skipif(
+        is_torch_bf16_gpu_available(),
+        reason="test should only run on gpus w/o bf16 support",
+    )
     def test_merge_lora_no_bf16_fail(self):
         """
         This is assumed to be run on a CPU machine, so bf16 is not supported.
@@ -778,6 +783,15 @@ def test_wandb_sets_env(self):
         assert os.environ.get("WANDB_LOG_MODEL", "") == "checkpoint"
         assert os.environ.get("WANDB_DISABLED", "") != "true"
 
+        os.environ.pop("WANDB_PROJECT", None)
+        os.environ.pop("WANDB_NAME", None)
+        os.environ.pop("WANDB_RUN_ID", None)
+        os.environ.pop("WANDB_ENTITY", None)
+        os.environ.pop("WANDB_MODE", None)
+        os.environ.pop("WANDB_WATCH", None)
+        os.environ.pop("WANDB_LOG_MODEL", None)
+        os.environ.pop("WANDB_DISABLED", None)
+
     def test_wandb_set_disabled(self):
         cfg = DictDefault({})
 
@@ -798,3 +812,6 @@ def test_wandb_set_disabled(self):
         setup_wandb_env_vars(cfg)
 
         assert os.environ.get("WANDB_DISABLED", "") != "true"
+
+        os.environ.pop("WANDB_PROJECT", None)
+        os.environ.pop("WANDB_DISABLED", None)

From 501cb0c3c83114f1252d5c4330a8e4d73cd7715a Mon Sep 17 00:00:00 2001
From: Wing Lian <wing.lian@gmail.com>
Date: Tue, 9 Jan 2024 18:40:00 -0500
Subject: [PATCH 5/9] pytest skip for auto-gptq requirements

---
 src/axolotl/core/trainer_builder.py | 2 +-
 tests/e2e/test_lora_llama.py        | 4 +++-
 tests/e2e/test_mamba.py             | 4 ++--
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/axolotl/core/trainer_builder.py b/src/axolotl/core/trainer_builder.py
index 883d1d1e8..7798ca455 100644
--- a/src/axolotl/core/trainer_builder.py
+++ b/src/axolotl/core/trainer_builder.py
@@ -752,7 +752,7 @@ def build(self, total_num_steps):
             report_to = "wandb"
         if self.cfg.use_mlflow:
             report_to = "mlflow"
-        training_arguments_kwargs["report_to"] = None
+        training_arguments_kwargs["report_to"] = report_to
         training_arguments_kwargs["run_name"] = (
             self.cfg.wandb_name if self.cfg.use_wandb else None
         )
diff --git a/tests/e2e/test_lora_llama.py b/tests/e2e/test_lora_llama.py
index dc9e31c5e..21142a95c 100644
--- a/tests/e2e/test_lora_llama.py
+++ b/tests/e2e/test_lora_llama.py
@@ -7,7 +7,8 @@
 import unittest
 from pathlib import Path
 
-from transformers.utils import is_torch_bf16_gpu_available
+import pytest
+from transformers.utils import is_auto_gptq_available, is_torch_bf16_gpu_available
 
 from axolotl.cli import load_datasets
 from axolotl.common.cli import TrainerCliArgs
@@ -117,6 +118,7 @@ def test_lora_packing(self, temp_dir):
         train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta)
         assert (Path(temp_dir) / "adapter_model.bin").exists()
 
+    @pytest.mark.skipif(not is_auto_gptq_available(), reason="auto-gptq not available")
     @with_temp_dir
     def test_lora_gptq(self, temp_dir):
         # pylint: disable=duplicate-code
diff --git a/tests/e2e/test_mamba.py b/tests/e2e/test_mamba.py
index 463b0ddac..65a6d7d3d 100644
--- a/tests/e2e/test_mamba.py
+++ b/tests/e2e/test_mamba.py
@@ -19,9 +19,9 @@
 os.environ["WANDB_DISABLED"] = "true"
 
 
-class TestMistral(unittest.TestCase):
+class TestMamba(unittest.TestCase):
     """
-    Test case for Llama models using LoRA
+    Test case for Mamba models
     """
 
     @with_temp_dir

From ecfea1e2cf913dd257aee38fe45b8b447f46d295 Mon Sep 17 00:00:00 2001
From: Wing Lian <wing.lian@gmail.com>
Date: Tue, 9 Jan 2024 18:54:38 -0500
Subject: [PATCH 6/9] skip mamba tests for now, split multipack and non packed
 lora llama tests

---
 tests/e2e/test_lora_llama.py           | 101 --------------------
 tests/e2e/test_lora_llama_multipack.py | 126 +++++++++++++++++++++++++
 tests/e2e/test_mamba.py                |   3 +
 3 files changed, 129 insertions(+), 101 deletions(-)
 create mode 100644 tests/e2e/test_lora_llama_multipack.py

diff --git a/tests/e2e/test_lora_llama.py b/tests/e2e/test_lora_llama.py
index 21142a95c..c79652bef 100644
--- a/tests/e2e/test_lora_llama.py
+++ b/tests/e2e/test_lora_llama.py
@@ -7,9 +7,6 @@
 import unittest
 from pathlib import Path
 
-import pytest
-from transformers.utils import is_auto_gptq_available, is_torch_bf16_gpu_available
-
 from axolotl.cli import load_datasets
 from axolotl.common.cli import TrainerCliArgs
 from axolotl.train import train
@@ -68,101 +65,3 @@ def test_lora(self, temp_dir):
 
         train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta)
         assert (Path(temp_dir) / "adapter_model.bin").exists()
-
-    @with_temp_dir
-    def test_lora_packing(self, temp_dir):
-        # pylint: disable=duplicate-code
-        cfg = DictDefault(
-            {
-                "base_model": "JackFram/llama-68m",
-                "tokenizer_type": "LlamaTokenizer",
-                "sequence_len": 1024,
-                "sample_packing": True,
-                "flash_attention": True,
-                "load_in_8bit": True,
-                "adapter": "lora",
-                "lora_r": 32,
-                "lora_alpha": 64,
-                "lora_dropout": 0.05,
-                "lora_target_linear": True,
-                "val_set_size": 0.1,
-                "special_tokens": {
-                    "unk_token": "<unk>",
-                    "bos_token": "<s>",
-                    "eos_token": "</s>",
-                },
-                "datasets": [
-                    {
-                        "path": "mhenrichsen/alpaca_2k_test",
-                        "type": "alpaca",
-                    },
-                ],
-                "num_epochs": 2,
-                "micro_batch_size": 8,
-                "gradient_accumulation_steps": 1,
-                "output_dir": temp_dir,
-                "learning_rate": 0.00001,
-                "optimizer": "adamw_torch",
-                "lr_scheduler": "cosine",
-            }
-        )
-        if is_torch_bf16_gpu_available():
-            cfg.bf16 = True
-        else:
-            cfg.fp16 = True
-
-        normalize_config(cfg)
-        cli_args = TrainerCliArgs()
-        dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
-
-        train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta)
-        assert (Path(temp_dir) / "adapter_model.bin").exists()
-
-    @pytest.mark.skipif(not is_auto_gptq_available(), reason="auto-gptq not available")
-    @with_temp_dir
-    def test_lora_gptq(self, temp_dir):
-        # pylint: disable=duplicate-code
-        cfg = DictDefault(
-            {
-                "base_model": "TheBlokeAI/jackfram_llama-68m-GPTQ",
-                "model_type": "AutoModelForCausalLM",
-                "tokenizer_type": "LlamaTokenizer",
-                "sequence_len": 1024,
-                "sample_packing": True,
-                "flash_attention": True,
-                "load_in_8bit": True,
-                "adapter": "lora",
-                "gptq": True,
-                "gptq_disable_exllama": True,
-                "lora_r": 32,
-                "lora_alpha": 64,
-                "lora_dropout": 0.05,
-                "lora_target_linear": True,
-                "val_set_size": 0.1,
-                "special_tokens": {
-                    "unk_token": "<unk>",
-                    "bos_token": "<s>",
-                    "eos_token": "</s>",
-                },
-                "datasets": [
-                    {
-                        "path": "mhenrichsen/alpaca_2k_test",
-                        "type": "alpaca",
-                    },
-                ],
-                "num_epochs": 2,
-                "save_steps": 0.5,
-                "micro_batch_size": 8,
-                "gradient_accumulation_steps": 1,
-                "output_dir": temp_dir,
-                "learning_rate": 0.00001,
-                "optimizer": "adamw_torch",
-                "lr_scheduler": "cosine",
-            }
-        )
-        normalize_config(cfg)
-        cli_args = TrainerCliArgs()
-        dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
-
-        train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta)
-        assert (Path(temp_dir) / "adapter_model.bin").exists()
diff --git a/tests/e2e/test_lora_llama_multipack.py b/tests/e2e/test_lora_llama_multipack.py
new file mode 100644
index 000000000..04c9fb4f4
--- /dev/null
+++ b/tests/e2e/test_lora_llama_multipack.py
@@ -0,0 +1,126 @@
+"""
+E2E tests for lora llama
+"""
+
+import logging
+import os
+import unittest
+from pathlib import Path
+
+import pytest
+from transformers.utils import is_auto_gptq_available, is_torch_bf16_gpu_available
+
+from axolotl.cli import load_datasets
+from axolotl.common.cli import TrainerCliArgs
+from axolotl.train import train
+from axolotl.utils.config import normalize_config
+from axolotl.utils.dict import DictDefault
+
+from .utils import with_temp_dir
+
+LOG = logging.getLogger("axolotl.tests.e2e")
+os.environ["WANDB_DISABLED"] = "true"
+
+
+class TestLoraLlama(unittest.TestCase):
+    """
+    Test case for Llama models using LoRA w multipack
+    """
+
+    @with_temp_dir
+    def test_lora_packing(self, temp_dir):
+        # pylint: disable=duplicate-code
+        cfg = DictDefault(
+            {
+                "base_model": "JackFram/llama-68m",
+                "tokenizer_type": "LlamaTokenizer",
+                "sequence_len": 1024,
+                "sample_packing": True,
+                "flash_attention": True,
+                "load_in_8bit": True,
+                "adapter": "lora",
+                "lora_r": 32,
+                "lora_alpha": 64,
+                "lora_dropout": 0.05,
+                "lora_target_linear": True,
+                "val_set_size": 0.1,
+                "special_tokens": {
+                    "unk_token": "<unk>",
+                    "bos_token": "<s>",
+                    "eos_token": "</s>",
+                },
+                "datasets": [
+                    {
+                        "path": "mhenrichsen/alpaca_2k_test",
+                        "type": "alpaca",
+                    },
+                ],
+                "num_epochs": 2,
+                "micro_batch_size": 8,
+                "gradient_accumulation_steps": 1,
+                "output_dir": temp_dir,
+                "learning_rate": 0.00001,
+                "optimizer": "adamw_torch",
+                "lr_scheduler": "cosine",
+            }
+        )
+        if is_torch_bf16_gpu_available():
+            cfg.bf16 = True
+        else:
+            cfg.fp16 = True
+
+        normalize_config(cfg)
+        cli_args = TrainerCliArgs()
+        dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
+
+        train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta)
+        assert (Path(temp_dir) / "adapter_model.bin").exists()
+
+    @pytest.mark.skipif(not is_auto_gptq_available(), reason="auto-gptq not available")
+    @with_temp_dir
+    def test_lora_gptq_packed(self, temp_dir):
+        # pylint: disable=duplicate-code
+        cfg = DictDefault(
+            {
+                "base_model": "TheBlokeAI/jackfram_llama-68m-GPTQ",
+                "model_type": "AutoModelForCausalLM",
+                "tokenizer_type": "LlamaTokenizer",
+                "sequence_len": 1024,
+                "sample_packing": True,
+                "flash_attention": True,
+                "load_in_8bit": True,
+                "adapter": "lora",
+                "gptq": True,
+                "gptq_disable_exllama": True,
+                "lora_r": 32,
+                "lora_alpha": 64,
+                "lora_dropout": 0.05,
+                "lora_target_linear": True,
+                "val_set_size": 0.1,
+                "special_tokens": {
+                    "unk_token": "<unk>",
+                    "bos_token": "<s>",
+                    "eos_token": "</s>",
+                },
+                "datasets": [
+                    {
+                        "path": "mhenrichsen/alpaca_2k_test",
+                        "type": "alpaca",
+                    },
+                ],
+                "num_epochs": 2,
+                "save_steps": 0.5,
+                "micro_batch_size": 8,
+                "gradient_accumulation_steps": 1,
+                "output_dir": temp_dir,
+                "learning_rate": 0.00001,
+                "optimizer": "adamw_torch",
+                "lr_scheduler": "cosine",
+            }
+        )
+        normalize_config(cfg)
+        cli_args = TrainerCliArgs()
+        dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
+
+        train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta)
+        assert (Path(temp_dir) / "adapter_model.bin").exists()
diff --git a/tests/e2e/test_mamba.py b/tests/e2e/test_mamba.py
index 65a6d7d3d..8755fa4d5 100644
--- a/tests/e2e/test_mamba.py
+++ b/tests/e2e/test_mamba.py
@@ -7,6 +7,8 @@
 import unittest
 from pathlib import Path
 
+import pytest
+
 from axolotl.cli import load_datasets
 from axolotl.common.cli import TrainerCliArgs
 from axolotl.train import train
@@ -19,6 +21,7 @@
 os.environ["WANDB_DISABLED"] = "true"
 
 
+@pytest.mark.skip(reason="skipping until upstreamed into transformers")
 class TestMamba(unittest.TestCase):
     """
     Test case for Mamba models

From 68bce8916061a6398f3eb2add9c426eeedce4270 Mon Sep 17 00:00:00 2001
From: Wing Lian <wing.lian@gmail.com>
Date: Tue, 9 Jan 2024 19:40:31 -0500
Subject: [PATCH 7/9] split tests that use monkeypatches

---
 .github/workflows/tests-docker.yml                   | 5 ++++-
 tests/e2e/patched/__init__.py                        | 0
 tests/e2e/{ => patched}/test_lora_llama_multipack.py | 0
 tests/e2e/{ => patched}/test_mistral_samplepack.py   | 0
 tests/e2e/{ => patched}/test_mixtral_samplepack.py   | 0
 tests/e2e/{ => patched}/test_model_patches.py        | 0
 6 files changed, 4 insertions(+), 1 deletion(-)
 create mode 100644 tests/e2e/patched/__init__.py
 rename tests/e2e/{ => patched}/test_lora_llama_multipack.py (100%)
 rename tests/e2e/{ => patched}/test_mistral_samplepack.py (100%)
 rename tests/e2e/{ => patched}/test_mixtral_samplepack.py (100%)
 rename tests/e2e/{ => patched}/test_model_patches.py (100%)

diff --git a/.github/workflows/tests-docker.yml b/.github/workflows/tests-docker.yml
index 935dbf76f..6059946fc 100644
--- a/.github/workflows/tests-docker.yml
+++ b/.github/workflows/tests-docker.yml
@@ -48,4 +48,7 @@ jobs:
           docker run --rm test-axolotl pytest --ignore=tests/e2e/ /workspace/axolotl/tests/
       - name: GPU Unit Tests w docker image
         run: |
-          docker run --privileged --gpus "all" --env WANDB_DISABLED=true --rm test-axolotl pytest /workspace/axolotl/tests/e2e/
+          docker run --privileged --gpus "all" --env WANDB_DISABLED=true --rm test-axolotl pytest --ignore=tests/e2e/patched/ /workspace/axolotl/tests/e2e/
+      - name: GPU Unit Tests monkeypatched w docker image
+        run: |
+          docker run --privileged --gpus "all" --env WANDB_DISABLED=true --rm test-axolotl pytest /workspace/axolotl/tests/e2e/patched/
diff --git a/tests/e2e/patched/__init__.py b/tests/e2e/patched/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/e2e/test_lora_llama_multipack.py b/tests/e2e/patched/test_lora_llama_multipack.py
similarity index 100%
rename from tests/e2e/test_lora_llama_multipack.py
rename to tests/e2e/patched/test_lora_llama_multipack.py
diff --git a/tests/e2e/test_mistral_samplepack.py b/tests/e2e/patched/test_mistral_samplepack.py
similarity index 100%
rename from tests/e2e/test_mistral_samplepack.py
rename to tests/e2e/patched/test_mistral_samplepack.py
diff --git a/tests/e2e/test_mixtral_samplepack.py b/tests/e2e/patched/test_mixtral_samplepack.py
similarity index 100%
rename from tests/e2e/test_mixtral_samplepack.py
rename to tests/e2e/patched/test_mixtral_samplepack.py
diff --git a/tests/e2e/test_model_patches.py b/tests/e2e/patched/test_model_patches.py
similarity index 100%
rename from tests/e2e/test_model_patches.py
rename to tests/e2e/patched/test_model_patches.py

From ad9b7e3d4642838b0ff9c93b9febd672b9450533 Mon Sep 17 00:00:00 2001
From: Wing Lian <wing.lian@gmail.com>
Date: Tue, 9 Jan 2024 20:18:28 -0500
Subject: [PATCH 8/9] fix relative import for prev commit

---
 tests/e2e/patched/test_lora_llama_multipack.py | 2 +-
 tests/e2e/patched/test_mistral_samplepack.py   | 2 +-
 tests/e2e/patched/test_mixtral_samplepack.py   | 2 +-
 tests/e2e/patched/test_model_patches.py        | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/e2e/patched/test_lora_llama_multipack.py b/tests/e2e/patched/test_lora_llama_multipack.py
index 04c9fb4f4..079a8e924 100644
--- a/tests/e2e/patched/test_lora_llama_multipack.py
+++ b/tests/e2e/patched/test_lora_llama_multipack.py
@@ -16,7 +16,7 @@
 from axolotl.utils.config import normalize_config
 from axolotl.utils.dict import DictDefault
 
-from .utils import with_temp_dir
+from ..utils import with_temp_dir
 
 LOG = logging.getLogger("axolotl.tests.e2e")
 os.environ["WANDB_DISABLED"] = "true"
diff --git a/tests/e2e/patched/test_mistral_samplepack.py b/tests/e2e/patched/test_mistral_samplepack.py
index cefbd7dc0..c0327d7ef 100644
--- a/tests/e2e/patched/test_mistral_samplepack.py
+++ b/tests/e2e/patched/test_mistral_samplepack.py
@@ -15,7 +15,7 @@
 from axolotl.utils.config import normalize_config
 from axolotl.utils.dict import DictDefault
 
-from .utils import with_temp_dir
+from ..utils import with_temp_dir
 
 LOG = logging.getLogger("axolotl.tests.e2e")
 os.environ["WANDB_DISABLED"] = "true"
diff --git a/tests/e2e/patched/test_mixtral_samplepack.py b/tests/e2e/patched/test_mixtral_samplepack.py
index b43702a51..4eff3825a 100644
--- a/tests/e2e/patched/test_mixtral_samplepack.py
+++ b/tests/e2e/patched/test_mixtral_samplepack.py
@@ -15,7 +15,7 @@
 from axolotl.utils.config import normalize_config
 from axolotl.utils.dict import DictDefault
 
-from .utils import with_temp_dir
+from ..utils import with_temp_dir
 
 LOG = logging.getLogger("axolotl.tests.e2e")
 os.environ["WANDB_DISABLED"] = "true"
diff --git a/tests/e2e/patched/test_model_patches.py b/tests/e2e/patched/test_model_patches.py
index eb1124464..65d372c73 100644
--- a/tests/e2e/patched/test_model_patches.py
+++ b/tests/e2e/patched/test_model_patches.py
@@ -9,7 +9,7 @@
 from axolotl.utils.dict import DictDefault
 from axolotl.utils.models import load_model, load_tokenizer
 
-from .utils import with_temp_dir
+from ..utils import with_temp_dir
 
 
 class TestModelPatches(unittest.TestCase):

From 3bf3e08b9f706c9ac9f6b14756a4e03eb155d010 Mon Sep 17 00:00:00 2001
From: Wing Lian <wing.lian@gmail.com>
Date: Tue, 9 Jan 2024 20:38:35 -0500
Subject: [PATCH 9/9] move other tests using monkeypatches to the correct run

---
 tests/e2e/{ => patched}/test_fused_llama.py | 2 +-
 tests/e2e/{ => patched}/test_resume.py      | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)
 rename tests/e2e/{ => patched}/test_fused_llama.py (98%)
 rename tests/e2e/{ => patched}/test_resume.py (96%)

diff --git a/tests/e2e/test_fused_llama.py b/tests/e2e/patched/test_fused_llama.py
similarity index 98%
rename from tests/e2e/test_fused_llama.py
rename to tests/e2e/patched/test_fused_llama.py
index 513df69f9..96ff5eee8 100644
--- a/tests/e2e/test_fused_llama.py
+++ b/tests/e2e/patched/test_fused_llama.py
@@ -15,7 +15,7 @@
 from axolotl.utils.config import normalize_config
 from axolotl.utils.dict import DictDefault
 
-from .utils import with_temp_dir
+from ..utils import with_temp_dir
 
 LOG = logging.getLogger("axolotl.tests.e2e")
 os.environ["WANDB_DISABLED"] = "true"
diff --git a/tests/e2e/test_resume.py b/tests/e2e/patched/test_resume.py
similarity index 96%
rename from tests/e2e/test_resume.py
rename to tests/e2e/patched/test_resume.py
index 98ec3ac6b..dfe9e8625 100644
--- a/tests/e2e/test_resume.py
+++ b/tests/e2e/patched/test_resume.py
@@ -17,7 +17,7 @@
 from axolotl.utils.config import normalize_config
 from axolotl.utils.dict import DictDefault
 
-from .utils import most_recent_subdir, with_temp_dir
+from ..utils import most_recent_subdir, with_temp_dir
 
 LOG = logging.getLogger("axolotl.tests.e2e")
 os.environ["WANDB_DISABLED"] = "true"
@@ -29,7 +29,7 @@ class TestResumeLlama(unittest.TestCase):
     """
 
     @with_temp_dir
-    def test_resume_qlora(self, temp_dir):
+    def test_resume_qlora_packed(self, temp_dir):
         # pylint: disable=duplicate-code
         cfg = DictDefault(
             {