From a67c7d0ff4a33fbf375567a7a2ce389899357a75 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Tue, 9 Jan 2024 10:57:59 -0500 Subject: [PATCH 1/9] attempt to also run e2e tests that needs gpus --- .github/workflows/tests-docker.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests-docker.yml b/.github/workflows/tests-docker.yml index 380729637..bee483b0f 100644 --- a/.github/workflows/tests-docker.yml +++ b/.github/workflows/tests-docker.yml @@ -43,4 +43,4 @@ jobs: --tag test-axolotl - name: Unit Tests w docker image run: | - docker run --rm test-axolotl pytest --ignore=tests/e2e/ /workspace/axolotl/tests/ + docker run --privileged --gpus '"all" --rm test-axolotl pytest /workspace/axolotl/tests/ From 94639383aa5e4bb4e91d04ebc67b983775791ca9 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Tue, 9 Jan 2024 11:02:20 -0500 Subject: [PATCH 2/9] fix stray quote --- .github/workflows/tests-docker.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests-docker.yml b/.github/workflows/tests-docker.yml index bee483b0f..699a4cf10 100644 --- a/.github/workflows/tests-docker.yml +++ b/.github/workflows/tests-docker.yml @@ -43,4 +43,4 @@ jobs: --tag test-axolotl - name: Unit Tests w docker image run: | - docker run --privileged --gpus '"all" --rm test-axolotl pytest /workspace/axolotl/tests/ + docker run --privileged --gpus "all" --rm test-axolotl pytest /workspace/axolotl/tests/ From a811e6cd29012fcd643996c09e0bb9acf651e093 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Tue, 9 Jan 2024 15:19:06 -0500 Subject: [PATCH 3/9] checkout specific github ref --- .github/workflows/tests-docker.yml | 1 + docker/Dockerfile | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/tests-docker.yml b/.github/workflows/tests-docker.yml index 699a4cf10..988244335 100644 --- a/.github/workflows/tests-docker.yml +++ b/.github/workflows/tests-docker.yml @@ -39,6 +39,7 @@ jobs: --file ./docker/Dockerfile \ --build-arg BASE_TAG=$BASE_TAG \ --build-arg CUDA=$CUDA \ + --build-arg GITHUB_REF=$GITHUB_REF \ --build-arg PYTORCH_VERSION=$PYTORCH_VERSION \ --tag test-axolotl - name: Unit Tests w docker image diff --git a/docker/Dockerfile b/docker/Dockerfile index f8e052856..267f8418f 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -6,6 +6,7 @@ ARG AXOLOTL_EXTRAS="" ARG CUDA="118" ENV BNB_CUDA_VERSION=$CUDA ARG PYTORCH_VERSION="2.0.1" +ARG GITHUB_REF="main" ENV PYTORCH_VERSION=$PYTORCH_VERSION @@ -14,7 +15,7 @@ RUN apt-get update && \ WORKDIR /workspace -RUN git clone --depth=1 https://github.com/OpenAccess-AI-Collective/axolotl.git +RUN git clone --depth=1 -b $GITHUB_REF https://github.com/OpenAccess-AI-Collective/axolotl.git WORKDIR /workspace/axolotl From bd57059d68432c5b4179c1e4b4f5df3a453601f5 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Tue, 9 Jan 2024 15:36:40 -0500 Subject: [PATCH 4/9] dockerfile for tests with proper checkout ensure wandb is dissabled for docker pytests clear wandb env after testing clear wandb env after testing make sure to provide a default val for pop tryin skipping wandb validation tests explicitly disable wandb in the e2e tests explicitly report_to None to see if that fixes the docker e2e tests split gpu from non-gpu unit tests skip bf16 check in test for now build docker w/o cache since it uses branch name ref revert some changes now that caching is fixed skip bf16 check if on gpu w support --- .github/workflows/tests-docker.yml | 10 +++++--- docker/Dockerfile | 3 +-- docker/Dockerfile-tests | 40 +++++++++++++++++++++++++++++ src/axolotl/core/trainer_builder.py | 2 +- tests/e2e/test_lora_llama.py | 8 +++++- tests/e2e/test_phi.py | 12 +++++++-- tests/test_validation.py | 17 ++++++++++++ 7 files changed, 83 insertions(+), 9 deletions(-) create mode 100644 docker/Dockerfile-tests diff --git a/.github/workflows/tests-docker.yml b/.github/workflows/tests-docker.yml index 988244335..935dbf76f 100644 --- a/.github/workflows/tests-docker.yml +++ b/.github/workflows/tests-docker.yml @@ -36,12 +36,16 @@ jobs: PYTORCH_VERSION="${{ matrix.pytorch }}" # Build the Docker image docker build . \ - --file ./docker/Dockerfile \ + --file ./docker/Dockerfile-tests \ --build-arg BASE_TAG=$BASE_TAG \ --build-arg CUDA=$CUDA \ --build-arg GITHUB_REF=$GITHUB_REF \ --build-arg PYTORCH_VERSION=$PYTORCH_VERSION \ - --tag test-axolotl + --tag test-axolotl \ + --no-cache - name: Unit Tests w docker image run: | - docker run --privileged --gpus "all" --rm test-axolotl pytest /workspace/axolotl/tests/ + docker run --rm test-axolotl pytest --ignore=tests/e2e/ /workspace/axolotl/tests/ + - name: GPU Unit Tests w docker image + run: | + docker run --privileged --gpus "all" --env WANDB_DISABLED=true --rm test-axolotl pytest /workspace/axolotl/tests/e2e/ diff --git a/docker/Dockerfile b/docker/Dockerfile index 267f8418f..f8e052856 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -6,7 +6,6 @@ ARG AXOLOTL_EXTRAS="" ARG CUDA="118" ENV BNB_CUDA_VERSION=$CUDA ARG PYTORCH_VERSION="2.0.1" -ARG GITHUB_REF="main" ENV PYTORCH_VERSION=$PYTORCH_VERSION @@ -15,7 +14,7 @@ RUN apt-get update && \ WORKDIR /workspace -RUN git clone --depth=1 -b $GITHUB_REF https://github.com/OpenAccess-AI-Collective/axolotl.git +RUN git clone --depth=1 https://github.com/OpenAccess-AI-Collective/axolotl.git WORKDIR /workspace/axolotl diff --git a/docker/Dockerfile-tests b/docker/Dockerfile-tests new file mode 100644 index 000000000..2ec94f868 --- /dev/null +++ b/docker/Dockerfile-tests @@ -0,0 +1,40 @@ +ARG BASE_TAG=main-base +FROM winglian/axolotl-base:$BASE_TAG + +ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6+PTX" +ARG AXOLOTL_EXTRAS="" +ARG CUDA="118" +ENV BNB_CUDA_VERSION=$CUDA +ARG PYTORCH_VERSION="2.0.1" +ARG GITHUB_REF="main" + +ENV PYTORCH_VERSION=$PYTORCH_VERSION + +RUN apt-get update && \ + apt-get install -y --allow-change-held-packages vim curl nano libnccl2 libnccl-dev + +WORKDIR /workspace + +RUN git clone --depth=1 https://github.com/OpenAccess-AI-Collective/axolotl.git + +WORKDIR /workspace/axolotl + +RUN git fetch origin +$GITHUB_REF && \ + git checkout FETCH_HEAD + +# If AXOLOTL_EXTRAS is set, append it in brackets +RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \ + pip install -e .[deepspeed,flash-attn,mamba-ssm,$AXOLOTL_EXTRAS]; \ + else \ + pip install -e .[deepspeed,flash-attn,mamba-ssm]; \ + fi + +# So we can test the Docker image +RUN pip install pytest + +# fix so that git fetch/pull from remote works +RUN git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*" && \ + git config --get remote.origin.fetch + +# helper for huggingface-login cli +RUN git config --global credential.helper store diff --git a/src/axolotl/core/trainer_builder.py b/src/axolotl/core/trainer_builder.py index 7798ca455..883d1d1e8 100644 --- a/src/axolotl/core/trainer_builder.py +++ b/src/axolotl/core/trainer_builder.py @@ -752,7 +752,7 @@ def build(self, total_num_steps): report_to = "wandb" if self.cfg.use_mlflow: report_to = "mlflow" - training_arguments_kwargs["report_to"] = report_to + training_arguments_kwargs["report_to"] = None training_arguments_kwargs["run_name"] = ( self.cfg.wandb_name if self.cfg.use_wandb else None ) diff --git a/tests/e2e/test_lora_llama.py b/tests/e2e/test_lora_llama.py index 9d795601a..dc9e31c5e 100644 --- a/tests/e2e/test_lora_llama.py +++ b/tests/e2e/test_lora_llama.py @@ -7,6 +7,8 @@ import unittest from pathlib import Path +from transformers.utils import is_torch_bf16_gpu_available + from axolotl.cli import load_datasets from axolotl.common.cli import TrainerCliArgs from axolotl.train import train @@ -101,9 +103,13 @@ def test_lora_packing(self, temp_dir): "learning_rate": 0.00001, "optimizer": "adamw_torch", "lr_scheduler": "cosine", - "bf16": True, } ) + if is_torch_bf16_gpu_available(): + cfg.bf16 = True + else: + cfg.fp16 = True + normalize_config(cfg) cli_args = TrainerCliArgs() dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args) diff --git a/tests/e2e/test_phi.py b/tests/e2e/test_phi.py index b21fc14ff..80c748cc9 100644 --- a/tests/e2e/test_phi.py +++ b/tests/e2e/test_phi.py @@ -8,6 +8,7 @@ from pathlib import Path import pytest +from transformers.utils import is_torch_bf16_gpu_available from axolotl.cli import load_datasets from axolotl.common.cli import TrainerCliArgs @@ -59,7 +60,6 @@ def test_phi2_ft(self, temp_dir): "learning_rate": 0.00001, "optimizer": "paged_adamw_8bit", "lr_scheduler": "cosine", - "bf16": True, "flash_attention": True, "max_steps": 10, "save_steps": 10, @@ -67,6 +67,10 @@ def test_phi2_ft(self, temp_dir): "save_safetensors": True, } ) + if is_torch_bf16_gpu_available(): + cfg.bf16 = True + else: + cfg.fp16 = True normalize_config(cfg) cli_args = TrainerCliArgs() dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args) @@ -110,9 +114,13 @@ def test_ft_packed(self, temp_dir): "learning_rate": 0.00001, "optimizer": "adamw_bnb_8bit", "lr_scheduler": "cosine", - "bf16": True, } ) + if is_torch_bf16_gpu_available(): + cfg.bf16 = True + else: + cfg.fp16 = True + normalize_config(cfg) cli_args = TrainerCliArgs() dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args) diff --git a/tests/test_validation.py b/tests/test_validation.py index 12997b023..d2518a7df 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -6,6 +6,7 @@ from typing import Optional import pytest +from transformers.utils import is_torch_bf16_gpu_available from axolotl.utils.config import validate_config from axolotl.utils.dict import DictDefault @@ -354,6 +355,10 @@ def test_packing(self): with pytest.raises(ValueError, match=regex_exp): validate_config(cfg) + @pytest.mark.skipif( + is_torch_bf16_gpu_available(), + reason="test should only run on gpus w/o bf16 support", + ) def test_merge_lora_no_bf16_fail(self): """ This is assumed to be run on a CPU machine, so bf16 is not supported. @@ -778,6 +783,15 @@ def test_wandb_sets_env(self): assert os.environ.get("WANDB_LOG_MODEL", "") == "checkpoint" assert os.environ.get("WANDB_DISABLED", "") != "true" + os.environ.pop("WANDB_PROJECT", None) + os.environ.pop("WANDB_NAME", None) + os.environ.pop("WANDB_RUN_ID", None) + os.environ.pop("WANDB_ENTITY", None) + os.environ.pop("WANDB_MODE", None) + os.environ.pop("WANDB_WATCH", None) + os.environ.pop("WANDB_LOG_MODEL", None) + os.environ.pop("WANDB_DISABLED", None) + def test_wandb_set_disabled(self): cfg = DictDefault({}) @@ -798,3 +812,6 @@ def test_wandb_set_disabled(self): setup_wandb_env_vars(cfg) assert os.environ.get("WANDB_DISABLED", "") != "true" + + os.environ.pop("WANDB_PROJECT", None) + os.environ.pop("WANDB_DISABLED", None) From 501cb0c3c83114f1252d5c4330a8e4d73cd7715a Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Tue, 9 Jan 2024 18:40:00 -0500 Subject: [PATCH 5/9] pytest skip for auto-gptq requirements --- src/axolotl/core/trainer_builder.py | 2 +- tests/e2e/test_lora_llama.py | 4 +++- tests/e2e/test_mamba.py | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/axolotl/core/trainer_builder.py b/src/axolotl/core/trainer_builder.py index 883d1d1e8..7798ca455 100644 --- a/src/axolotl/core/trainer_builder.py +++ b/src/axolotl/core/trainer_builder.py @@ -752,7 +752,7 @@ def build(self, total_num_steps): report_to = "wandb" if self.cfg.use_mlflow: report_to = "mlflow" - training_arguments_kwargs["report_to"] = None + training_arguments_kwargs["report_to"] = report_to training_arguments_kwargs["run_name"] = ( self.cfg.wandb_name if self.cfg.use_wandb else None ) diff --git a/tests/e2e/test_lora_llama.py b/tests/e2e/test_lora_llama.py index dc9e31c5e..21142a95c 100644 --- a/tests/e2e/test_lora_llama.py +++ b/tests/e2e/test_lora_llama.py @@ -7,7 +7,8 @@ import unittest from pathlib import Path -from transformers.utils import is_torch_bf16_gpu_available +import pytest +from transformers.utils import is_auto_gptq_available, is_torch_bf16_gpu_available from axolotl.cli import load_datasets from axolotl.common.cli import TrainerCliArgs @@ -117,6 +118,7 @@ def test_lora_packing(self, temp_dir): train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) assert (Path(temp_dir) / "adapter_model.bin").exists() + @pytest.mark.skipif(not is_auto_gptq_available(), reason="auto-gptq not available") @with_temp_dir def test_lora_gptq(self, temp_dir): # pylint: disable=duplicate-code diff --git a/tests/e2e/test_mamba.py b/tests/e2e/test_mamba.py index 463b0ddac..65a6d7d3d 100644 --- a/tests/e2e/test_mamba.py +++ b/tests/e2e/test_mamba.py @@ -19,9 +19,9 @@ os.environ["WANDB_DISABLED"] = "true" -class TestMistral(unittest.TestCase): +class TestMamba(unittest.TestCase): """ - Test case for Llama models using LoRA + Test case for Mamba models """ @with_temp_dir From ecfea1e2cf913dd257aee38fe45b8b447f46d295 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Tue, 9 Jan 2024 18:54:38 -0500 Subject: [PATCH 6/9] skip mamba tests for now, split multipack and non packed lora llama tests --- tests/e2e/test_lora_llama.py | 101 -------------------- tests/e2e/test_lora_llama_multipack.py | 126 +++++++++++++++++++++++++ tests/e2e/test_mamba.py | 3 + 3 files changed, 129 insertions(+), 101 deletions(-) create mode 100644 tests/e2e/test_lora_llama_multipack.py diff --git a/tests/e2e/test_lora_llama.py b/tests/e2e/test_lora_llama.py index 21142a95c..c79652bef 100644 --- a/tests/e2e/test_lora_llama.py +++ b/tests/e2e/test_lora_llama.py @@ -7,9 +7,6 @@ import unittest from pathlib import Path -import pytest -from transformers.utils import is_auto_gptq_available, is_torch_bf16_gpu_available - from axolotl.cli import load_datasets from axolotl.common.cli import TrainerCliArgs from axolotl.train import train @@ -68,101 +65,3 @@ def test_lora(self, temp_dir): train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) assert (Path(temp_dir) / "adapter_model.bin").exists() - - @with_temp_dir - def test_lora_packing(self, temp_dir): - # pylint: disable=duplicate-code - cfg = DictDefault( - { - "base_model": "JackFram/llama-68m", - "tokenizer_type": "LlamaTokenizer", - "sequence_len": 1024, - "sample_packing": True, - "flash_attention": True, - "load_in_8bit": True, - "adapter": "lora", - "lora_r": 32, - "lora_alpha": 64, - "lora_dropout": 0.05, - "lora_target_linear": True, - "val_set_size": 0.1, - "special_tokens": { - "unk_token": "", - "bos_token": "", - "eos_token": "", - }, - "datasets": [ - { - "path": "mhenrichsen/alpaca_2k_test", - "type": "alpaca", - }, - ], - "num_epochs": 2, - "micro_batch_size": 8, - "gradient_accumulation_steps": 1, - "output_dir": temp_dir, - "learning_rate": 0.00001, - "optimizer": "adamw_torch", - "lr_scheduler": "cosine", - } - ) - if is_torch_bf16_gpu_available(): - cfg.bf16 = True - else: - cfg.fp16 = True - - normalize_config(cfg) - cli_args = TrainerCliArgs() - dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args) - - train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) - assert (Path(temp_dir) / "adapter_model.bin").exists() - - @pytest.mark.skipif(not is_auto_gptq_available(), reason="auto-gptq not available") - @with_temp_dir - def test_lora_gptq(self, temp_dir): - # pylint: disable=duplicate-code - cfg = DictDefault( - { - "base_model": "TheBlokeAI/jackfram_llama-68m-GPTQ", - "model_type": "AutoModelForCausalLM", - "tokenizer_type": "LlamaTokenizer", - "sequence_len": 1024, - "sample_packing": True, - "flash_attention": True, - "load_in_8bit": True, - "adapter": "lora", - "gptq": True, - "gptq_disable_exllama": True, - "lora_r": 32, - "lora_alpha": 64, - "lora_dropout": 0.05, - "lora_target_linear": True, - "val_set_size": 0.1, - "special_tokens": { - "unk_token": "", - "bos_token": "", - "eos_token": "", - }, - "datasets": [ - { - "path": "mhenrichsen/alpaca_2k_test", - "type": "alpaca", - }, - ], - "num_epochs": 2, - "save_steps": 0.5, - "micro_batch_size": 8, - "gradient_accumulation_steps": 1, - "output_dir": temp_dir, - "learning_rate": 0.00001, - "optimizer": "adamw_torch", - "lr_scheduler": "cosine", - } - ) - normalize_config(cfg) - cli_args = TrainerCliArgs() - dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args) - - train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) - assert (Path(temp_dir) / "adapter_model.bin").exists() diff --git a/tests/e2e/test_lora_llama_multipack.py b/tests/e2e/test_lora_llama_multipack.py new file mode 100644 index 000000000..04c9fb4f4 --- /dev/null +++ b/tests/e2e/test_lora_llama_multipack.py @@ -0,0 +1,126 @@ +""" +E2E tests for lora llama +""" + +import logging +import os +import unittest +from pathlib import Path + +import pytest +from transformers.utils import is_auto_gptq_available, is_torch_bf16_gpu_available + +from axolotl.cli import load_datasets +from axolotl.common.cli import TrainerCliArgs +from axolotl.train import train +from axolotl.utils.config import normalize_config +from axolotl.utils.dict import DictDefault + +from .utils import with_temp_dir + +LOG = logging.getLogger("axolotl.tests.e2e") +os.environ["WANDB_DISABLED"] = "true" + + +class TestLoraLlama(unittest.TestCase): + """ + Test case for Llama models using LoRA w multipack + """ + + @with_temp_dir + def test_lora_packing(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "JackFram/llama-68m", + "tokenizer_type": "LlamaTokenizer", + "sequence_len": 1024, + "sample_packing": True, + "flash_attention": True, + "load_in_8bit": True, + "adapter": "lora", + "lora_r": 32, + "lora_alpha": 64, + "lora_dropout": 0.05, + "lora_target_linear": True, + "val_set_size": 0.1, + "special_tokens": { + "unk_token": "", + "bos_token": "", + "eos_token": "", + }, + "datasets": [ + { + "path": "mhenrichsen/alpaca_2k_test", + "type": "alpaca", + }, + ], + "num_epochs": 2, + "micro_batch_size": 8, + "gradient_accumulation_steps": 1, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "adamw_torch", + "lr_scheduler": "cosine", + } + ) + if is_torch_bf16_gpu_available(): + cfg.bf16 = True + else: + cfg.fp16 = True + + normalize_config(cfg) + cli_args = TrainerCliArgs() + dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args) + + train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) + assert (Path(temp_dir) / "adapter_model.bin").exists() + + @pytest.mark.skipif(not is_auto_gptq_available(), reason="auto-gptq not available") + @with_temp_dir + def test_lora_gptq_packed(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "TheBlokeAI/jackfram_llama-68m-GPTQ", + "model_type": "AutoModelForCausalLM", + "tokenizer_type": "LlamaTokenizer", + "sequence_len": 1024, + "sample_packing": True, + "flash_attention": True, + "load_in_8bit": True, + "adapter": "lora", + "gptq": True, + "gptq_disable_exllama": True, + "lora_r": 32, + "lora_alpha": 64, + "lora_dropout": 0.05, + "lora_target_linear": True, + "val_set_size": 0.1, + "special_tokens": { + "unk_token": "", + "bos_token": "", + "eos_token": "", + }, + "datasets": [ + { + "path": "mhenrichsen/alpaca_2k_test", + "type": "alpaca", + }, + ], + "num_epochs": 2, + "save_steps": 0.5, + "micro_batch_size": 8, + "gradient_accumulation_steps": 1, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "adamw_torch", + "lr_scheduler": "cosine", + } + ) + normalize_config(cfg) + cli_args = TrainerCliArgs() + dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args) + + train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) + assert (Path(temp_dir) / "adapter_model.bin").exists() diff --git a/tests/e2e/test_mamba.py b/tests/e2e/test_mamba.py index 65a6d7d3d..8755fa4d5 100644 --- a/tests/e2e/test_mamba.py +++ b/tests/e2e/test_mamba.py @@ -7,6 +7,8 @@ import unittest from pathlib import Path +import pytest + from axolotl.cli import load_datasets from axolotl.common.cli import TrainerCliArgs from axolotl.train import train @@ -19,6 +21,7 @@ os.environ["WANDB_DISABLED"] = "true" +@pytest.mark.skip(reason="skipping until upstreamed into transformers") class TestMamba(unittest.TestCase): """ Test case for Mamba models From 68bce8916061a6398f3eb2add9c426eeedce4270 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Tue, 9 Jan 2024 19:40:31 -0500 Subject: [PATCH 7/9] split tests that use monkeypatches --- .github/workflows/tests-docker.yml | 5 ++++- tests/e2e/patched/__init__.py | 0 tests/e2e/{ => patched}/test_lora_llama_multipack.py | 0 tests/e2e/{ => patched}/test_mistral_samplepack.py | 0 tests/e2e/{ => patched}/test_mixtral_samplepack.py | 0 tests/e2e/{ => patched}/test_model_patches.py | 0 6 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 tests/e2e/patched/__init__.py rename tests/e2e/{ => patched}/test_lora_llama_multipack.py (100%) rename tests/e2e/{ => patched}/test_mistral_samplepack.py (100%) rename tests/e2e/{ => patched}/test_mixtral_samplepack.py (100%) rename tests/e2e/{ => patched}/test_model_patches.py (100%) diff --git a/.github/workflows/tests-docker.yml b/.github/workflows/tests-docker.yml index 935dbf76f..6059946fc 100644 --- a/.github/workflows/tests-docker.yml +++ b/.github/workflows/tests-docker.yml @@ -48,4 +48,7 @@ jobs: docker run --rm test-axolotl pytest --ignore=tests/e2e/ /workspace/axolotl/tests/ - name: GPU Unit Tests w docker image run: | - docker run --privileged --gpus "all" --env WANDB_DISABLED=true --rm test-axolotl pytest /workspace/axolotl/tests/e2e/ + docker run --privileged --gpus "all" --env WANDB_DISABLED=true --rm test-axolotl pytest --ignore=tests/e2e/patched/ /workspace/axolotl/tests/e2e/ + - name: GPU Unit Tests monkeypatched w docker image + run: | + docker run --privileged --gpus "all" --env WANDB_DISABLED=true --rm test-axolotl pytest /workspace/axolotl/tests/e2e/patched/ diff --git a/tests/e2e/patched/__init__.py b/tests/e2e/patched/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/e2e/test_lora_llama_multipack.py b/tests/e2e/patched/test_lora_llama_multipack.py similarity index 100% rename from tests/e2e/test_lora_llama_multipack.py rename to tests/e2e/patched/test_lora_llama_multipack.py diff --git a/tests/e2e/test_mistral_samplepack.py b/tests/e2e/patched/test_mistral_samplepack.py similarity index 100% rename from tests/e2e/test_mistral_samplepack.py rename to tests/e2e/patched/test_mistral_samplepack.py diff --git a/tests/e2e/test_mixtral_samplepack.py b/tests/e2e/patched/test_mixtral_samplepack.py similarity index 100% rename from tests/e2e/test_mixtral_samplepack.py rename to tests/e2e/patched/test_mixtral_samplepack.py diff --git a/tests/e2e/test_model_patches.py b/tests/e2e/patched/test_model_patches.py similarity index 100% rename from tests/e2e/test_model_patches.py rename to tests/e2e/patched/test_model_patches.py From ad9b7e3d4642838b0ff9c93b9febd672b9450533 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Tue, 9 Jan 2024 20:18:28 -0500 Subject: [PATCH 8/9] fix relative import for prev commit --- tests/e2e/patched/test_lora_llama_multipack.py | 2 +- tests/e2e/patched/test_mistral_samplepack.py | 2 +- tests/e2e/patched/test_mixtral_samplepack.py | 2 +- tests/e2e/patched/test_model_patches.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/e2e/patched/test_lora_llama_multipack.py b/tests/e2e/patched/test_lora_llama_multipack.py index 04c9fb4f4..079a8e924 100644 --- a/tests/e2e/patched/test_lora_llama_multipack.py +++ b/tests/e2e/patched/test_lora_llama_multipack.py @@ -16,7 +16,7 @@ from axolotl.utils.config import normalize_config from axolotl.utils.dict import DictDefault -from .utils import with_temp_dir +from ..utils import with_temp_dir LOG = logging.getLogger("axolotl.tests.e2e") os.environ["WANDB_DISABLED"] = "true" diff --git a/tests/e2e/patched/test_mistral_samplepack.py b/tests/e2e/patched/test_mistral_samplepack.py index cefbd7dc0..c0327d7ef 100644 --- a/tests/e2e/patched/test_mistral_samplepack.py +++ b/tests/e2e/patched/test_mistral_samplepack.py @@ -15,7 +15,7 @@ from axolotl.utils.config import normalize_config from axolotl.utils.dict import DictDefault -from .utils import with_temp_dir +from ..utils import with_temp_dir LOG = logging.getLogger("axolotl.tests.e2e") os.environ["WANDB_DISABLED"] = "true" diff --git a/tests/e2e/patched/test_mixtral_samplepack.py b/tests/e2e/patched/test_mixtral_samplepack.py index b43702a51..4eff3825a 100644 --- a/tests/e2e/patched/test_mixtral_samplepack.py +++ b/tests/e2e/patched/test_mixtral_samplepack.py @@ -15,7 +15,7 @@ from axolotl.utils.config import normalize_config from axolotl.utils.dict import DictDefault -from .utils import with_temp_dir +from ..utils import with_temp_dir LOG = logging.getLogger("axolotl.tests.e2e") os.environ["WANDB_DISABLED"] = "true" diff --git a/tests/e2e/patched/test_model_patches.py b/tests/e2e/patched/test_model_patches.py index eb1124464..65d372c73 100644 --- a/tests/e2e/patched/test_model_patches.py +++ b/tests/e2e/patched/test_model_patches.py @@ -9,7 +9,7 @@ from axolotl.utils.dict import DictDefault from axolotl.utils.models import load_model, load_tokenizer -from .utils import with_temp_dir +from ..utils import with_temp_dir class TestModelPatches(unittest.TestCase): From 3bf3e08b9f706c9ac9f6b14756a4e03eb155d010 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Tue, 9 Jan 2024 20:38:35 -0500 Subject: [PATCH 9/9] move other tests using monkeypatches to the correct run --- tests/e2e/{ => patched}/test_fused_llama.py | 2 +- tests/e2e/{ => patched}/test_resume.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) rename tests/e2e/{ => patched}/test_fused_llama.py (98%) rename tests/e2e/{ => patched}/test_resume.py (96%) diff --git a/tests/e2e/test_fused_llama.py b/tests/e2e/patched/test_fused_llama.py similarity index 98% rename from tests/e2e/test_fused_llama.py rename to tests/e2e/patched/test_fused_llama.py index 513df69f9..96ff5eee8 100644 --- a/tests/e2e/test_fused_llama.py +++ b/tests/e2e/patched/test_fused_llama.py @@ -15,7 +15,7 @@ from axolotl.utils.config import normalize_config from axolotl.utils.dict import DictDefault -from .utils import with_temp_dir +from ..utils import with_temp_dir LOG = logging.getLogger("axolotl.tests.e2e") os.environ["WANDB_DISABLED"] = "true" diff --git a/tests/e2e/test_resume.py b/tests/e2e/patched/test_resume.py similarity index 96% rename from tests/e2e/test_resume.py rename to tests/e2e/patched/test_resume.py index 98ec3ac6b..dfe9e8625 100644 --- a/tests/e2e/test_resume.py +++ b/tests/e2e/patched/test_resume.py @@ -17,7 +17,7 @@ from axolotl.utils.config import normalize_config from axolotl.utils.dict import DictDefault -from .utils import most_recent_subdir, with_temp_dir +from ..utils import most_recent_subdir, with_temp_dir LOG = logging.getLogger("axolotl.tests.e2e") os.environ["WANDB_DISABLED"] = "true" @@ -29,7 +29,7 @@ class TestResumeLlama(unittest.TestCase): """ @with_temp_dir - def test_resume_qlora(self, temp_dir): + def test_resume_qlora_packed(self, temp_dir): # pylint: disable=duplicate-code cfg = DictDefault( {