diff --git a/.github/workflows/quality-check.yaml b/.github/workflows/quality-check.yaml index 46abfb408..21be8a509 100644 --- a/.github/workflows/quality-check.yaml +++ b/.github/workflows/quality-check.yaml @@ -18,13 +18,7 @@ jobs: - uses: actions/checkout@v2 - uses: actions/checkout@v2 with: - repository: "neuralmagic/sparsezoo" - path: "sparsezoo" ref: ${{needs.test-setup.outputs.branch}} - - name: "⚙️ Install sparsezoo dependencies" - run: pip3 install sparsezoo/ - - name: "Clean sparsezoo directory" - run: rm -r sparsezoo/ - name: "⚙️ Install dependencies" run: pip3 install .[dev] - name: "🧹 Running quality checks" diff --git a/.github/workflows/test-check.yaml b/.github/workflows/test-check.yaml index 0d2e4aa5b..9b793f391 100644 --- a/.github/workflows/test-check.yaml +++ b/.github/workflows/test-check.yaml @@ -30,8 +30,6 @@ jobs: || echo "::set-output name=branch::main" base-tests: runs-on: ubuntu-22.04 - env: - SPARSEZOO_TEST_MODE: "true" needs: test-setup steps: - uses: actions/setup-python@v4 @@ -40,13 +38,7 @@ jobs: - uses: actions/checkout@v2 - uses: actions/checkout@v2 with: - repository: "neuralmagic/sparsezoo" - path: "sparsezoo" ref: ${{needs.test-setup.outputs.branch}} - - name: "⚙️ Install sparsezoo dependencies" - run: pip3 install -U pip && pip3 install setuptools sparsezoo/ - - name: "Clean sparsezoo directory" - run: rm -r sparsezoo/ - name: "⚙️ Install dependencies" run: pip3 install .[dev] - name: "🔬 Running base tests" @@ -54,7 +46,6 @@ jobs: pytorch-tests: runs-on: k8s-eng-gpu-64G-v100-32G env: - SPARSEZOO_TEST_MODE: "true" CLEARML_WEB_HOST: ${{ secrets.CLEARML_WEB_HOST }} CLEARML_API_HOST: ${{ secrets.CLEARML_API_HOST }} CLEARML_API_ACCESS_KEY: ${{ secrets.CLEARML_API_ACCESS_KEY }} @@ -68,13 +59,7 @@ jobs: - uses: actions/checkout@v2 - uses: actions/checkout@v2 with: - repository: "neuralmagic/sparsezoo" - path: "sparsezoo" ref: ${{needs.test-setup.outputs.branch}} - - name: "⚙️ Install sparsezoo dependencies" - run: pip3 install -U pip && pip3 install setuptools sparsezoo/ - - name: "Clean sparsezoo directory" - run: rm -r sparsezoo/ - uses: actions/checkout@v2 - uses: actions/checkout@v2 with: @@ -92,7 +77,6 @@ jobs: compat-pytorch-1_9-pytorch-tests: runs-on: k8s-eng-gpu-64G-v100-32G env: - SPARSEZOO_TEST_MODE: "true" CLEARML_WEB_HOST: ${{ secrets.CLEARML_WEB_HOST }} CLEARML_API_HOST: ${{ secrets.CLEARML_API_HOST }} CLEARML_API_ACCESS_KEY: ${{ secrets.CLEARML_API_ACCESS_KEY }} @@ -106,13 +90,7 @@ jobs: - uses: actions/checkout@v2 - uses: actions/checkout@v2 with: - repository: "neuralmagic/sparsezoo" - path: "sparsezoo" ref: ${{needs.test-setup.outputs.branch}} - - name: "⚙️ Install sparsezoo dependencies" - run: pip3 install -U pip && pip3 install setuptools sparsezoo/ - - name: "Clean sparsezoo directory" - run: rm -r sparsezoo/ - uses: actions/checkout@v2 - uses: actions/checkout@v2 with: @@ -130,7 +108,6 @@ jobs: transformers-tests: runs-on: k8s-eng-gpu-64G-v100-32G env: - SPARSEZOO_TEST_MODE: "true" CLEARML_WEB_HOST: ${{ secrets.CLEARML_WEB_HOST }} CLEARML_API_HOST: ${{ secrets.CLEARML_API_HOST }} CLEARML_API_ACCESS_KEY: ${{ secrets.CLEARML_API_ACCESS_KEY }} @@ -144,13 +121,7 @@ jobs: - uses: actions/checkout@v2 - uses: actions/checkout@v2 with: - repository: "neuralmagic/sparsezoo" - path: "sparsezoo" ref: ${{needs.test-setup.outputs.branch}} - - name: "⚙️ Install sparsezoo dependencies" - run: pip3 install -U pip && pip3 install setuptools sparsezoo/ - - name: "Clean sparsezoo directory" - run: rm -r sparsezoo/ - uses: actions/checkout@v2 with: repository: "neuralmagic/compressed-tensors" diff --git a/.github/workflows/test-weekly.yml b/.github/workflows/test-weekly.yml index 7c9951b87..dc511999d 100644 --- a/.github/workflows/test-weekly.yml +++ b/.github/workflows/test-weekly.yml @@ -8,6 +8,7 @@ jobs: runs-on: k8s-mle-gpu-12-vcpu-225GB-ram-2-a6000-48G env: CADENCE: "weekly" + HF_TOKEN: ${{ secrets.NM_HF_TOKEN }} CLEARML_WEB_HOST: ${{ secrets.CLEARML_WEB_HOST }} CLEARML_API_HOST: ${{ secrets.CLEARML_API_HOST }} CLEARML_API_ACCESS_KEY: ${{ secrets.CLEARML_API_ACCESS_KEY }} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a538f90d4..b27bb94fa 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -28,7 +28,7 @@ If you’re reading this, hopefully we have piqued your interest to take the nex ## Code of Conduct -Help us keep the software inclusive. Please read and follow our [Code of Conduct](CODE_OF_CONDUCT.md) in order to promote an environment that is friendly, fair, respectful, and safe. We want to inspire collaboration, innovation, and fun! +Help us keep the software inclusive. Please read and follow our [Code of Conduct](https://github.com/neuralmagic/sparseml/blob/main/CODE_OF_CONDUCT.md) in order to promote an environment that is friendly, fair, respectful, and safe. We want to inspire collaboration, innovation, and fun! ## Ways to Contribute @@ -54,7 +54,7 @@ Whether you’re a newbie, dabbler, or expert, we appreciate you jumping in. Please search through existing issues and requests first to avoid duplicates. Neural Magic will work with you further to take next steps. -- Go to: [GitHub Issues](https://github.com/neuralmagic/llmcompressor/issues) +- Go to: [GitHub Issues](https://github.com/vllm-project/llm-compressor/issues) For bugs, include: diff --git a/DEVELOPING.md b/DEVELOPING.md index bd7670327..4e069fffa 100644 --- a/DEVELOPING.md +++ b/DEVELOPING.md @@ -28,8 +28,8 @@ Here are some details to get started. **Development Installation** ```bash -git clone https://github.com/neuralmagic/sparseml.git -cd llmcompressor +git https://github.com/vllm-project/llm-compressor +cd llm-compressor python3 -m pip install -e "./[dev]" ``` @@ -73,7 +73,7 @@ File any error found before changes as an Issue and fix any errors found after m ## GitHub Workflow -1. Fork the `TODO/llmcompressor` repository into your GitHub account: https://github.com/TODO/llmcompressor/fork. +1. Fork the `llmcompressor` repository into your GitHub account: https://github.com/vllm-project/llm-compressor. 2. Clone your fork of the GitHub repository, replacing `` with your GitHub username. diff --git a/Makefile b/Makefile index 937db5640..06c748166 100644 --- a/Makefile +++ b/Makefile @@ -31,7 +31,7 @@ style: # run tests for the repo test: @echo "Running python tests"; - SPARSEZOO_TEST_MODE="true" pytest tests $(PYTEST_ARGS) + pytest tests $(PYTEST_ARGS) # creates wheel file build: diff --git a/examples/quantization/llama7b_quantize_sparse_cnn.py b/examples/quantization/llama7b_quantize_sparse_cnn.py index fde0b4dda..32650d8d7 100644 --- a/examples/quantization/llama7b_quantize_sparse_cnn.py +++ b/examples/quantization/llama7b_quantize_sparse_cnn.py @@ -1,11 +1,8 @@ import torch from datasets import load_dataset +from transformers import AutoTokenizer -from llmcompressor.transformers import ( - SparseAutoModelForCausalLM, - SparseAutoTokenizer, - oneshot, -) +from llmcompressor.transformers import SparseAutoModelForCausalLM, oneshot # define a llmcompressor recipe for GPTQ W4A16 quantization recipe = """ @@ -30,7 +27,7 @@ model = SparseAutoModelForCausalLM.from_pretrained( model_stub, torch_dtype=torch.bfloat16, device_map="auto" ) -tokenizer = SparseAutoTokenizer.from_pretrained(model_stub) +tokenizer = AutoTokenizer.from_pretrained(model_stub) # for quantization calibration, we will use a subset of the dataset that was used to # sparsify and finetune the model diff --git a/examples/quantization/llama7b_sparse_quantized/README.md b/examples/quantization/llama7b_sparse_quantized/README.md index 6637f5cae..f45f35af5 100644 --- a/examples/quantization/llama7b_sparse_quantized/README.md +++ b/examples/quantization/llama7b_sparse_quantized/README.md @@ -10,7 +10,7 @@ Follow the steps below, or to run the example as `python examples/llama7b_sparse In this step, we select which model to use as a baseline for sparsification, a dataset to use for calibration and finetuning, and a recipe. -Models can reference a local directory, model in the huggingface hub, or in the sparsezoo. +Models can reference a local directory, or a model in the huggingface hub. Datasets can be from a local compatible directory or the huggingface hub. @@ -23,7 +23,7 @@ and quantize to 4 bits in one show using GPTQ. import torch from llmcompressor.transformers import SparseAutoModelForCausalLM -model_stub = "zoo:llama2-7b-ultrachat200k_llama2_pretrain-base" +model_stub = "neuralmagic/Llama-2-7b-ultrachat200k" model = SparseAutoModelForCausalLM.from_pretrained( model_stub, torch_dtype=torch.bfloat16, device_map="auto" ) diff --git a/examples/quantization/llama7b_sparse_quantized/llama7b_sparse_w4a16.py b/examples/quantization/llama7b_sparse_quantized/llama7b_sparse_w4a16.py index 79ed55352..ac0cbed0e 100644 --- a/examples/quantization/llama7b_sparse_quantized/llama7b_sparse_w4a16.py +++ b/examples/quantization/llama7b_sparse_quantized/llama7b_sparse_w4a16.py @@ -6,7 +6,7 @@ recipe = "2:4_w4a16_recipe.yaml" # load the model in as bfloat16 to save on memory and compute -model_stub = "zoo:llama2-7b-ultrachat200k_llama2_pretrain-base" +model_stub = "neuralmagic/Llama-2-7b-ultrachat200k" model = SparseAutoModelForCausalLM.from_pretrained( model_stub, torch_dtype=torch.bfloat16, device_map="auto" ) diff --git a/examples/quantization/llama7b_w4a16_quantization.ipynb b/examples/quantization/llama7b_w4a16_quantization.ipynb index c06129966..c69cc90c6 100644 --- a/examples/quantization/llama7b_w4a16_quantization.ipynb +++ b/examples/quantization/llama7b_w4a16_quantization.ipynb @@ -71,7 +71,7 @@ "\n", "# by setting the device_map to auto, we can spread the model evenly across all available GPUs\n", "# load the model in as bfloat16 to save on memory and compute\n", - "model_stub = \"zoo:llama2-7b-ultrachat200k_llama2_pretrain-base\"\n", + "model_stub = \"neuralmagic/Llama-2-7b-ultrachat200k\"\n", "model = SparseAutoModelForCausalLM.from_pretrained(model_stub, torch_dtype=torch.bfloat16, device_map=\"auto\")\n", "\n", "# uses SparseML's built-in preprocessing for ultra chat\n", diff --git a/examples/quantization/llama7b_w4a16_quantization.py b/examples/quantization/llama7b_w4a16_quantization.py index 20feaea75..9bd60c2f3 100644 --- a/examples/quantization/llama7b_w4a16_quantization.py +++ b/examples/quantization/llama7b_w4a16_quantization.py @@ -21,7 +21,7 @@ # setting device_map to auto to spread the model evenly across all available GPUs # load the model in as bfloat16 to save on memory and compute -model_stub = "zoo:llama2-7b-ultrachat200k_llama2_pretrain-base" +model_stub = "neuralmagic/Llama-2-7b-ultrachat200k" model = SparseAutoModelForCausalLM.from_pretrained( model_stub, torch_dtype=torch.bfloat16, device_map="auto" ) diff --git a/examples/quantization/llama7b_w8a8_quantization.py b/examples/quantization/llama7b_w8a8_quantization.py index 888f98521..2429e1362 100644 --- a/examples/quantization/llama7b_w8a8_quantization.py +++ b/examples/quantization/llama7b_w8a8_quantization.py @@ -27,7 +27,7 @@ # setting device_map to auto to spread the model evenly across all available GPUs # load the model in as bfloat16 to save on memory and compute -model_stub = "zoo:llama2-7b-ultrachat200k_llama2_pretrain-base" +model_stub = "neuralmagic/Llama-2-7b-ultrachat200k" model = SparseAutoModelForCausalLM.from_pretrained( model_stub, torch_dtype=torch.bfloat16, device_map="auto" ) diff --git a/examples/trl_mixin/ex_trl_constant.py b/examples/trl_mixin/ex_trl_constant.py index 45e6da504..8b67542af 100644 --- a/examples/trl_mixin/ex_trl_constant.py +++ b/examples/trl_mixin/ex_trl_constant.py @@ -13,12 +13,12 @@ # limitations under the License. from datasets import load_dataset +from transformers import AutoTokenizer from trl import DataCollatorForCompletionOnlyLM from llmcompressor.transformers import ( SFTTrainer, SparseAutoModelForCausalLM, - SparseAutoTokenizer, TrainingArguments, ) @@ -27,7 +27,7 @@ model = SparseAutoModelForCausalLM.from_pretrained( model_path, torch_dtype="auto", device_map="auto" ) -tokenizer = SparseAutoTokenizer.from_pretrained(model_path) +tokenizer = AutoTokenizer.from_pretrained(model_path) tokenizer.pad_token = tokenizer.eos_token # recipe for maintaining model sparsity during finetuning diff --git a/examples/trl_mixin/ex_trl_distillation.py b/examples/trl_mixin/ex_trl_distillation.py index d599029c4..675f6ebdd 100644 --- a/examples/trl_mixin/ex_trl_distillation.py +++ b/examples/trl_mixin/ex_trl_distillation.py @@ -12,19 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. -from transformers import DefaultDataCollator +from transformers import AutoTokenizer, DefaultDataCollator from llmcompressor.transformers import ( DataTrainingArguments, SFTTrainer, SparseAutoModelForCausalLM, - SparseAutoTokenizer, TextGenerationDataset, TrainingArguments, ) model_path = "neuralmagic/Llama-2-7b-pruned50-retrained" -teacher_path = "zoo:llama2-7b-gsm8k_llama2_pretrain-base" +teacher_path = "neuralmagic/Llama-2-7b-gsm8k" output_dir = "./output_trl_sft_test_7b_gsm8k" model = SparseAutoModelForCausalLM.from_pretrained( @@ -34,7 +33,7 @@ teacher_path, torch_dtype="auto", device_map="auto" ) -tokenizer = SparseAutoTokenizer.from_pretrained(model_path) +tokenizer = AutoTokenizer.from_pretrained(model_path) # Load gsm8k using SparseML dataset tools data_args = DataTrainingArguments( diff --git a/setup.py b/setup.py index 64aa1f4d2..9f9716dd1 100644 --- a/setup.py +++ b/setup.py @@ -62,7 +62,6 @@ "compressed-tensors" if version_info.is_release else "compressed-tensors-nightly", - "sparsezoo" if version_info.is_release else "sparsezoo-nightly", ], extras_require={ "dev": [ diff --git a/src/llmcompressor/pytorch/utils/helpers.py b/src/llmcompressor/pytorch/utils/helpers.py index 1fbcee4b9..c4f06613b 100644 --- a/src/llmcompressor/pytorch/utils/helpers.py +++ b/src/llmcompressor/pytorch/utils/helpers.py @@ -45,8 +45,6 @@ QATLinear = None QATConv2d = None -from sparsezoo import Model - from llmcompressor.utils import create_dirs, save_numpy try: @@ -98,7 +96,6 @@ "thin_model_from_checkpoint", "MEMORY_BOUNDED", "memory_aware_threshold", - "download_framework_model_by_recipe_type", "detach", "adjust_quantization_for_onnx_export", "get_dependency_order", @@ -1099,38 +1096,6 @@ def memory_aware_threshold(tensor: torch.Tensor, idx: int) -> Tensor: return torch.kthvalue(tensor.view(-1), idx + 1)[0] -def download_framework_model_by_recipe_type( - zoo_model: Model, recipe_name: Optional[str] = None, model_suffix: str = "pth" -) -> str: - """ - Extract the path of the framework model from the - zoo model, conditioned on the name of the recipe - By default, the function will return path to the final framework model - :params zoo_model: model object from sparsezoo - :params recipe_name: a name of the recipe (e.g. "transfer_learn", "original" etc.) - :params model_suffix: model_suffix that models are saved with - :return: path to the framework model - """ - - # default to model query params if available - recipe_name = recipe_name or ( - zoo_model.stub_params.get("recipe_type") or zoo_model.stub_params.get("recipe") - ) - - framework_model = None - if recipe_name and "transfer" in recipe_name.lower(): - # fetching the model for transfer learning - model_name = f"model.ckpt.{model_suffix}" - framework_model = zoo_model.training.default.get_file(model_name) - - if framework_model is None: - # fetching the model for inference or fall back if model.ckpt.pth doesn't exist - model_name = f"model.{model_suffix}" - framework_model = zoo_model.training.default.get_file(model_name) - - return framework_model.path - - def detach(x: Union[torch.Tensor, List, Tuple]): if isinstance(x, torch.Tensor): return x.detach() diff --git a/src/llmcompressor/recipe/recipe.py b/src/llmcompressor/recipe/recipe.py index 62c23eb35..bcf502875 100644 --- a/src/llmcompressor/recipe/recipe.py +++ b/src/llmcompressor/recipe/recipe.py @@ -27,7 +27,7 @@ class Recipe(RecipeBase): (More information on supported modifiers can be found at https://docs.neuralmagic.com/products/sparseml) - Recipes can be created from a file, string, or SparseZoo stub. + Recipes can be created from a file, string, or HuggingFace stub. Acceptable file formats include both json and yaml, however, when serializing a recipe, yaml will be used by default. """ @@ -96,7 +96,7 @@ def create_instance( >>> recipe = Recipe.create_instance(recipe_str) :param path_or_modifiers: The path to the recipe file or - SparseZoo stub or the recipe string (must be a valid + or the recipe string (must be a valid json/yaml file or a valid json/yaml string). Can also accept a RecipeModifier instance, or a list of RecipeModifiers diff --git a/src/llmcompressor/transformers/__init__.py b/src/llmcompressor/transformers/__init__.py index d2c1f9479..c1d6e285e 100644 --- a/src/llmcompressor/transformers/__init__.py +++ b/src/llmcompressor/transformers/__init__.py @@ -21,10 +21,5 @@ # isort: skip_file # (import order matters for circular import avoidance) from .utils import * -from .sparsification import ( - SparseAutoModel, - SparseAutoModelForCausalLM, - SparseAutoConfig, - SparseAutoTokenizer, -) +from .sparsification import SparseAutoModel, SparseAutoModelForCausalLM from .finetune import * diff --git a/src/llmcompressor/transformers/finetune/README.md b/src/llmcompressor/transformers/finetune/README.md index 1fbe39749..7384b077b 100644 --- a/src/llmcompressor/transformers/finetune/README.md +++ b/src/llmcompressor/transformers/finetune/README.md @@ -38,7 +38,7 @@ accelerate launch --splits "train" ``` -See [configure_fsdp.md](https://github.com/neuralmagic/llmcompressor/blob/main/integrations/huggingface-transformers/finetuning/configure_fsdp.md) for additional instructions on setting up FSDP configuration +See [configure_fsdp.md](../../../../examples/finetuning/configure_fsdp.md) for additional instructions on setting up FSDP configuration ## Launching from Python @@ -132,7 +132,7 @@ A recipe can be run stage-by-stage by setting `run_stages` to `True` or calling a `run_type` attribute set to either `oneshot` or `train` when running in sequential mode. -See [example_alternating_recipe.yaml](../../../../integrations/huggingface-transformers/tutorials/text-generation/example_alternating_recipe.yaml) for an example +See [example_alternating_recipe.yaml](../../../../examples/finetuning/example_alternating_recipe.yaml) for an example of a staged recipe for Llama. ### Python Example diff --git a/src/llmcompressor/transformers/finetune/data/base.py b/src/llmcompressor/transformers/finetune/data/base.py index 313ebe5f6..c289369bf 100644 --- a/src/llmcompressor/transformers/finetune/data/base.py +++ b/src/llmcompressor/transformers/finetune/data/base.py @@ -15,8 +15,8 @@ import logging from typing import Optional, Union +from compressed_tensors.registry import RegistryMixin from datasets import Dataset, IterableDataset -from sparsezoo.utils.registry import RegistryMixin from transformers import AutoTokenizer from llmcompressor.transformers.finetune.data.data_args import DataTrainingArguments diff --git a/src/llmcompressor/transformers/finetune/data/custom.py b/src/llmcompressor/transformers/finetune/data/custom.py index 88e499ff5..e849594e7 100644 --- a/src/llmcompressor/transformers/finetune/data/custom.py +++ b/src/llmcompressor/transformers/finetune/data/custom.py @@ -15,12 +15,12 @@ from typing import Dict, List, Union from datasets.dataset_dict import Dataset, DatasetDict -from sparsezoo.utils.helpers import import_from_path from llmcompressor.transformers.finetune.data import TextGenerationDataset from llmcompressor.transformers.utils.preprocessing_functions import ( PreprocessingFunctionRegistry, ) +from llmcompressor.utils import import_from_path @TextGenerationDataset.register(name="custom", alias=["json", "csv"]) diff --git a/src/llmcompressor/transformers/finetune/model_args.py b/src/llmcompressor/transformers/finetune/model_args.py index e50b46775..6532c78d9 100644 --- a/src/llmcompressor/transformers/finetune/model_args.py +++ b/src/llmcompressor/transformers/finetune/model_args.py @@ -26,7 +26,7 @@ class ModelArguments: metadata={ "help": ( "A pretrained model or a string as a path to pretrained model, " - "sparsezoo stub, or model identifier from huggingface.co/models." + "HF stub, or model identifier from huggingface.co/models." ) }, ) diff --git a/src/llmcompressor/transformers/finetune/text_generation.py b/src/llmcompressor/transformers/finetune/text_generation.py index 7f0e0dbf8..0082ac807 100644 --- a/src/llmcompressor/transformers/finetune/text_generation.py +++ b/src/llmcompressor/transformers/finetune/text_generation.py @@ -23,7 +23,13 @@ import datasets import transformers -from transformers import AutoConfig, DefaultDataCollator, HfArgumentParser, set_seed +from transformers import ( + AutoConfig, + AutoTokenizer, + DefaultDataCollator, + HfArgumentParser, + set_seed, +) from llmcompressor import pre_initialize_structure, reset_session from llmcompressor.pytorch.model_load.helpers import ( @@ -33,7 +39,6 @@ parse_dtype, ) from llmcompressor.recipe import Recipe, StageRunType -from llmcompressor.transformers import SparseAutoTokenizer from llmcompressor.transformers.finetune.data.data_args import DataTrainingArguments from llmcompressor.transformers.finetune.model_args import ModelArguments from llmcompressor.transformers.finetune.runner import StageRunner @@ -216,7 +221,7 @@ def intialize_model_from_path( def initialize_tokenizer_from_path(model_args, model, teacher): tokenizer_src = model_args.tokenizer tokenizer_src = tokenizer_src or get_shared_tokenizer_src(model, teacher) - tokenizer = SparseAutoTokenizer.from_pretrained( + tokenizer = AutoTokenizer.from_pretrained( tokenizer_src, cache_dir=model_args.cache_dir, use_fast=True, @@ -239,7 +244,7 @@ def main( Lifecycle: - SparseAutoModel.text_generation_from_pretrained if model provided as string for model and teacher - - SparseAutoTokenizer.from_pretrained() if tokenizer provided as + - AutoTokenizer.from_pretrained() if tokenizer provided as string for tokenizer - StageRunner.populate_datasets() - Trainer() diff --git a/src/llmcompressor/transformers/sparsification/__init__.py b/src/llmcompressor/transformers/sparsification/__init__.py index 83082200d..59959393a 100644 --- a/src/llmcompressor/transformers/sparsification/__init__.py +++ b/src/llmcompressor/transformers/sparsification/__init__.py @@ -18,7 +18,4 @@ """ # flake8: noqa - -from .sparse_config import * from .sparse_model import * -from .sparse_tokenizer import * diff --git a/src/llmcompressor/transformers/sparsification/sparse_config.py b/src/llmcompressor/transformers/sparsification/sparse_config.py deleted file mode 100644 index 5a75d4ec5..000000000 --- a/src/llmcompressor/transformers/sparsification/sparse_config.py +++ /dev/null @@ -1,49 +0,0 @@ -# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from sparsezoo import Model -from transformers import AutoConfig - -__all__ = ["SparseAutoConfig"] - - -class SparseAutoConfig(AutoConfig): - """ - SparseML wrapper for the AutoConfig class - """ - - @classmethod - def from_pretrained(cls, pretrained_model_name_or_path, **kwargs): - """ - A wrapper around the AutoConfig.from_pretrained method that - enables the loading of configs from SparseZoo stubs - - If a SparseZoo stub is passed, the all the available config - file is passed to the AutoTokenizer.from_pretrained method - - :param pretrained_model_name_or_path: the name of or path to the model to load - :return tokenizer: the loaded tokenizer from pretrained - """ - if str(pretrained_model_name_or_path).startswith("zoo:"): - model = Model(pretrained_model_name_or_path) - config = model.training.get_file(file_name="config.json") - if config is None: - raise ValueError( - "Could not find config.json for stub: " - f"{pretrained_model_name_or_path}" - ) - pretrained_model_name_or_path = config.path - - return super().from_pretrained(pretrained_model_name_or_path, **kwargs) diff --git a/src/llmcompressor/transformers/sparsification/sparse_model.py b/src/llmcompressor/transformers/sparsification/sparse_model.py index 968221378..97fbe46ff 100644 --- a/src/llmcompressor/transformers/sparsification/sparse_model.py +++ b/src/llmcompressor/transformers/sparsification/sparse_model.py @@ -41,8 +41,8 @@ class SparseAutoModelForCausalLM(AutoModelForCausalLM): """ SparseML wrapper for the AutoModelForCausalLM class Its lifecycle is defined as follows: - 1. If pretrained_model_name_or_path is a SparseZoo stub - the appropriate SparseZoo model will be downloaded + 1. If pretrained_model_name_or_path is a HuggingFace stub + the appropriate HuggingFace model will be downloaded (if required) and the path to the deployment directory of the model will be retrieved 2. The original model definition will be loaded, without diff --git a/src/llmcompressor/transformers/sparsification/sparse_tokenizer.py b/src/llmcompressor/transformers/sparsification/sparse_tokenizer.py deleted file mode 100644 index 2a9cdd590..000000000 --- a/src/llmcompressor/transformers/sparsification/sparse_tokenizer.py +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -from sparsezoo import Model -from transformers import AutoTokenizer - -from llmcompressor.transformers.utils.helpers import POSSIBLE_TOKENIZER_FILES -from llmcompressor.utils.fsdp.context import main_process_first_context - -__all__ = ["SparseAutoTokenizer"] - - -class SparseAutoTokenizer(AutoTokenizer): - """ - SparseML wrapper for the AutoTokenizer class - """ - - @classmethod - def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs): - """ - A wrapper around the AutoTokenizer.from_pretrained method that - enables the loading of tokenizer from SparseZoo stubs - - If a SparseZoo stub is passed, the all the available tokenizer - files are downloaded and the path to the directory containing the - files is passed to the AutoTokenizer.from_pretrained method - - :param pretrained_model_name_or_path: the name of or path to the model to load - :return tokenizer: the loaded tokenizer from pretrained - """ - if str(pretrained_model_name_or_path).startswith("zoo:"): - with main_process_first_context(): - model = Model(pretrained_model_name_or_path) - for file_name in POSSIBLE_TOKENIZER_FILES: - # go over all the possible tokenizer files - # and if detected, download them - file = model.deployment.get_file(file_name) - if file is not None: - tokenizer_file = file - tokenizer_file.download() - pretrained_model_name_or_path = os.path.dirname(tokenizer_file.path) - return super().from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs) diff --git a/src/llmcompressor/transformers/utils/helpers.py b/src/llmcompressor/transformers/utils/helpers.py index 19abfa166..cdbaef2d6 100644 --- a/src/llmcompressor/transformers/utils/helpers.py +++ b/src/llmcompressor/transformers/utils/helpers.py @@ -32,12 +32,10 @@ import torch import transformers from huggingface_hub import HUGGINGFACE_CO_URL_HOME, HfFileSystem, hf_hub_download -from sparsezoo import Model from transformers import AutoConfig from transformers.trainer_utils import get_last_checkpoint from transformers.utils import PaddingStrategy -from llmcompressor.utils import download_zoo_training_dir from llmcompressor.utils.fsdp.context import main_process_first_context _LOGGER = logging.getLogger(__name__) @@ -184,7 +182,6 @@ def resolve_recipe( - a path to the model directory - a path to the model file - Hugging face model id - - SparseZoo stub :return: the resolved recipe """ @@ -224,7 +221,6 @@ def infer_recipe_from_model_path(model_path: Union[str, Path]) -> Optional[str]: - a path to the model directory - a path to the model file - Hugging face model id - - SparseZoo stub :return the path to the recipe file if found, None otherwise """ model_path = model_path.as_posix() if isinstance(model_path, Path) else model_path @@ -242,10 +238,6 @@ def infer_recipe_from_model_path(model_path: Union[str, Path]) -> Optional[str]: _LOGGER.debug(f"No recipe found in the model_path: {model_path}") return None - elif model_path.startswith("zoo:"): - # model_path is a sparsezoo stub - return recipe_from_sparsezoo_stub(stub=model_path) - recipe = recipe_from_huggingface_model_id(model_path)[0] if recipe is None: @@ -294,33 +286,6 @@ def recipe_from_huggingface_model_id( return recipe, True -def recipe_from_sparsezoo_stub( - stub: str, recipe_file_name: Optional[str] = None -) -> Optional[str]: - """ - Attempts to find the recipe for the sparsezoo stub. - - :param stub: The sparsezoo stub to find the recipe for - :param recipe_file_name: The name of the recipe file to find. - If None, the default recipe will be returned. Default: None - :return: The path to the recipe file if found, None otherwise - """ - if recipe_file_name is None: - recipe = Model(stub).recipes.default.path - _LOGGER.info(f"Found recipe: {recipe}") - return recipe - else: - for recipe in Model(stub).recipes.recipes: - if recipe.name == recipe_file_name: - recipe = recipe.path - _LOGGER.info(f"Found recipe: {recipe}") - return recipe - _LOGGER.warning( - f"Unable to find recipe: {recipe_file_name} " f"for sparsezoo stub: {stub}." - ) - return None - - def resolve_recipe_file( requested_recipe: Union[str, Path], model_path: Union[str, Path] ) -> Union[str, Path, None]: @@ -333,7 +298,6 @@ def resolve_recipe_file( - a path to the model directory - a path to the model file - Hugging face model id - - SparseZoo stub :return the path to the recipe file if found, None otherwise """ # preprocess arguments so that they are all strings @@ -348,13 +312,9 @@ def resolve_recipe_file( ) if not os.path.isdir(model_path): - # pathway for model_path that is not a directory - if model_path.startswith("zoo:"): - default_recipe = recipe_from_sparsezoo_stub(model_path) - else: - default_recipe, model_exists = recipe_from_huggingface_model_id(model_path) - if not model_exists: - raise ValueError(f"Unrecognized model_path: {model_path}") + default_recipe, model_exists = recipe_from_huggingface_model_id(model_path) + if not model_exists: + raise ValueError(f"Unrecognized model_path: {model_path}") if not default_recipe == requested_recipe and default_recipe is not None: _LOGGER.warning( @@ -441,13 +401,11 @@ def fetch_recipe_path(target: str): Takes care of three scenarios: 1. target is a local path to a model directory (looks for recipe.yaml in the directory) - 2. target is a SparseZoo stub (downloads and - returns the path to the default recipe) - 3. target is a HuggingFace stub (downloads and + 2. target is a HuggingFace stub (downloads and returns the path to the default recipe) :param target: The target to fetch the recipe path for - can be a local path, SparseZoo stub, or HuggingFace stub + can be a local path or HuggingFace stub :return: The path to the recipe for the target """ DEFAULT_RECIPE_NAME = "recipe.yaml" @@ -459,13 +417,6 @@ def fetch_recipe_path(target: str): # Recipe must be downloaded recipe_path = None - if target.startswith("zoo:"): - # target is a SparseZoo stub - sparsezoo_model = Model(source=target) - with suppress(Exception): - # suppress any errors if the recipe is not found on SparseZoo - recipe_path = sparsezoo_model.recipes.default().path - return recipe_path # target is a HuggingFace stub with suppress(Exception): @@ -536,11 +487,10 @@ def download_repo_from_huggingface_hub(repo_id, **kwargs): def download_model_directory(pretrained_model_name_or_path: str, **kwargs): """ - Download the model directory from the HF hub or SparseZoo if the model - is not found locally + Download the model directory from the HF hub if the model is not found locally :param pretrained_model_name_or_path: the name of or path to the model to load - can be a SparseZoo/HuggingFace model stub + can be a HuggingFace model stub :param kwargs: additional keyword arguments to pass to the download function :return: the path to the downloaded model directory """ @@ -553,13 +503,6 @@ def download_model_directory(pretrained_model_name_or_path: str, **kwargs): return pretrained_model_name_or_path with main_process_first_context(): - if pretrained_model_name_or_path.startswith("zoo:"): - _LOGGER.debug( - "Passed zoo stub to SparseAutoModelForCausalLM object. " - "Loading model from SparseZoo training files..." - ) - return download_zoo_training_dir(zoo_stub=pretrained_model_name_or_path) - _LOGGER.debug("Downloading model from HuggingFace Hub.") return download_repo_from_huggingface_hub( repo_id=pretrained_model_name_or_path, **kwargs diff --git a/src/llmcompressor/transformers/utils/preprocessing_functions.py b/src/llmcompressor/transformers/utils/preprocessing_functions.py index 4ca15bd72..8a259f1a3 100644 --- a/src/llmcompressor/transformers/utils/preprocessing_functions.py +++ b/src/llmcompressor/transformers/utils/preprocessing_functions.py @@ -14,7 +14,7 @@ from typing import Dict -from sparsezoo.utils.registry import RegistryMixin +from compressed_tensors.registry import RegistryMixin class PreprocessingFunctionRegistry(RegistryMixin): diff --git a/src/llmcompressor/utils/helpers.py b/src/llmcompressor/utils/helpers.py index 8c295c1ea..555386b06 100644 --- a/src/llmcompressor/utils/helpers.py +++ b/src/llmcompressor/utils/helpers.py @@ -12,6 +12,7 @@ import json import logging import os +import re import sys import tarfile import warnings @@ -22,7 +23,6 @@ from urllib.parse import urlparse import numpy -from sparsezoo import Model __all__ = [ "ALL_TOKEN", @@ -53,12 +53,11 @@ "NumpyArrayBatcher", "tensor_export", "tensors_export", - "parse_optimization_str", "json_to_jsonl", "deprecation_warning", "parse_kwarg_tuples", "is_package_available", - "download_zoo_training_dir", + "import_from_path", ] @@ -835,22 +834,6 @@ def _tensors_export_batch( ) -def parse_optimization_str(optim_full_name: str) -> Tuple[str, str, Any]: - """ - :param optim_full_name: A name of a pretrained model optimization. i.e. - 'pruned-moderate-deepsparse', 'pruned-aggressive', 'base' - :return: A tuple representing the corresponding SparseZoo model sparse_name, - sparse_category, and sparse_target values with appropriate defaults when - not present. - """ - optim_defaults = ["base", "none", None] - optim_split_name = optim_full_name.split("-") - while len(optim_split_name) < len(optim_defaults): - optim_split_name.append(optim_defaults[len(optim_split_name)]) - sparse_name, sparse_category, sparse_target = optim_split_name[:3] - return sparse_name, sparse_category, sparse_target - - def json_to_jsonl(json_file_path: str, overwrite: bool = True): """ Converts a json list file to jsonl file format (used for sharding efficienty) @@ -1003,24 +986,27 @@ def is_package_available( return package_exists -def download_zoo_training_dir(zoo_stub: str) -> str: +def import_from_path(path: str) -> str: """ - Helper function to download the training directory from a zoo stub, - takes care of downloading the missing files in the training - directory if any (This can happen if a some subset of files in the - training directory were downloaded before) - - :param zoo_stub: The zoo stub to download the training directory from - :return: The path to the downloaded training directory + Import the module and the name of the function/class separated by : + Examples: + path = "/path/to/file.py:func_or_class_name" + path = "/path/to/file:focn" + path = "path.to.file:focn" + :param path: path including the file path and object name + :return Function or class object """ - sparsezoo_model = Model(zoo_stub) - training_dir_path = sparsezoo_model.training.path - - # download missing files if any this can happen if - # some subset of files in the training directory - # were downloaded before + original_path, class_name = path.split(":") + _path = original_path - for file_name in sparsezoo_model.training.files: - file_name.path + path = original_path.split(".py")[0] + path = re.sub(r"/+", ".", path) + try: + module = importlib.import_module(path) + except ImportError: + raise ImportError(f"Cannot find module with path {_path}") - return training_dir_path + try: + return getattr(module, class_name) + except AttributeError: + raise AttributeError(f"Cannot find {class_name} in {_path}") diff --git a/tests/llmcompressor/transformers/compression/test_quantization.py b/tests/llmcompressor/transformers/compression/test_quantization.py index 1b7a497a9..167cdec4b 100644 --- a/tests/llmcompressor/transformers/compression/test_quantization.py +++ b/tests/llmcompressor/transformers/compression/test_quantization.py @@ -8,14 +8,10 @@ from compressed_tensors.quantization.utils import is_module_quantized from parameterized import parameterized_class from torch.utils.data import DataLoader -from transformers import DefaultDataCollator +from transformers import AutoTokenizer, DefaultDataCollator from llmcompressor.pytorch.utils import tensors_to_device -from llmcompressor.transformers import ( - SparseAutoModelForCausalLM, - SparseAutoTokenizer, - oneshot, -) +from llmcompressor.transformers import SparseAutoModelForCausalLM, oneshot from llmcompressor.transformers.finetune.data import TextGenerationDataset from llmcompressor.transformers.finetune.data.data_args import DataTrainingArguments from tests.testing_utils import parse_params, requires_gpu, requires_torch @@ -147,7 +143,7 @@ def _get_dataloader(self, data_args, tokenizer): @torch.no_grad() def test_perplexity(self): - tokenizer = SparseAutoTokenizer.from_pretrained(self.model_stub) + tokenizer = AutoTokenizer.from_pretrained(self.model_stub) data_args = DataTrainingArguments( dataset="ultrachat-200k", max_seq_length=self.max_seq_length, diff --git a/tests/llmcompressor/transformers/conftest.py b/tests/llmcompressor/transformers/conftest.py index a59e64bd2..13a777d6d 100644 --- a/tests/llmcompressor/transformers/conftest.py +++ b/tests/llmcompressor/transformers/conftest.py @@ -7,5 +7,4 @@ def run_before_and_after_tests(tmp_path): os.environ["TRANSFORMERS_CACHE"] = str(tmp_path / "transformers") os.environ["HF_DATASETS_CACHE"] = str(tmp_path / "datasets") - os.environ["SPARSEZOO_MODELS_PATH"] = str(tmp_path / "sparsezoo") yield diff --git a/tests/llmcompressor/transformers/finetune/finetune_custom/gpu/gpu_config.yaml b/tests/llmcompressor/transformers/finetune/finetune_custom/gpu/gpu_config.yaml index a2fdf8239..d223b2dcb 100644 --- a/tests/llmcompressor/transformers/finetune/finetune_custom/gpu/gpu_config.yaml +++ b/tests/llmcompressor/transformers/finetune/finetune_custom/gpu/gpu_config.yaml @@ -1,5 +1,5 @@ cadence: "nightly" test_type: "regression" -model: "zoo:llama2-7b-ultrachat200k_llama2_pretrain-base" +model: “neuralmagic/Llama-2-7b-ultrachat200k” file_extension: json num_train_epochs: 0.5 \ No newline at end of file diff --git a/tests/llmcompressor/transformers/finetune/finetune_oneshot_configs/gpu/gpu_config.yaml b/tests/llmcompressor/transformers/finetune/finetune_oneshot_configs/gpu/gpu_config.yaml index d1eaa0251..ef46ce8a3 100644 --- a/tests/llmcompressor/transformers/finetune/finetune_oneshot_configs/gpu/gpu_config.yaml +++ b/tests/llmcompressor/transformers/finetune/finetune_oneshot_configs/gpu/gpu_config.yaml @@ -1,6 +1,6 @@ cadence: "weekly" test_type: "regression" -model: "zoo:llama2-7b-ultrachat200k_llama2_pretrain-base" +model: “neuralmagic/Llama-2-7b-ultrachat200k” dataset: "ultrachat-200k" recipe: "tests/llmcompressor/transformers/finetune/test_alternate_recipe.yaml" num_train_epochs: 0.05 diff --git a/tests/llmcompressor/transformers/finetune/test_finetune_no_recipe_custom_dataset.py b/tests/llmcompressor/transformers/finetune/test_finetune_no_recipe_custom_dataset.py index e23a78f47..e50ded634 100644 --- a/tests/llmcompressor/transformers/finetune/test_finetune_no_recipe_custom_dataset.py +++ b/tests/llmcompressor/transformers/finetune/test_finetune_no_recipe_custom_dataset.py @@ -146,10 +146,9 @@ def setUp(self): self.device = "auto" self.output = "./oneshot_output" - if "zoo:" in self.model: - self.model = SparseAutoModelForCausalLM.from_pretrained( - self.model, device_map=self.device, torch_dtype=torch.bfloat16 - ) + self.model = SparseAutoModelForCausalLM.from_pretrained( + self.model, device_map=self.device, torch_dtype=torch.bfloat16 + ) def test_oneshot_then_finetune_gpu(self): self._test_finetune_wout_recipe_custom_dataset() diff --git a/tests/llmcompressor/transformers/finetune/test_oneshot_and_finetune.py b/tests/llmcompressor/transformers/finetune/test_oneshot_and_finetune.py index ea0401dda..6cbc58c75 100644 --- a/tests/llmcompressor/transformers/finetune/test_oneshot_and_finetune.py +++ b/tests/llmcompressor/transformers/finetune/test_oneshot_and_finetune.py @@ -82,10 +82,9 @@ def setUp(self): self.device = "auto" self.output = "./finetune_output" - if "zoo:" in self.model: - self.model = SparseAutoModelForCausalLM.from_pretrained( - self.model, device_map=self.device, torch_dtype=torch.bfloat16 - ) + self.model = SparseAutoModelForCausalLM.from_pretrained( + self.model, device_map=self.device, torch_dtype=torch.bfloat16 + ) def test_oneshot_then_finetune_gpu(self): self._test_oneshot_and_finetune() diff --git a/tests/llmcompressor/transformers/finetune/test_oneshot_and_finetune_with_tokenizer.py b/tests/llmcompressor/transformers/finetune/test_oneshot_and_finetune_with_tokenizer.py index d17d71ae3..6d85149bd 100644 --- a/tests/llmcompressor/transformers/finetune/test_oneshot_and_finetune_with_tokenizer.py +++ b/tests/llmcompressor/transformers/finetune/test_oneshot_and_finetune_with_tokenizer.py @@ -15,18 +15,15 @@ def setUp(self): def test_oneshot_and_finetune_with_tokenizer(self): import torch from datasets import load_dataset + from transformers import AutoTokenizer - from llmcompressor.transformers import ( - SparseAutoModelForCausalLM, - SparseAutoTokenizer, - compress, - ) + from llmcompressor.transformers import SparseAutoModelForCausalLM, compress recipe_str = ( "tests/llmcompressor/transformers/finetune/test_alternate_recipe.yaml" ) model = SparseAutoModelForCausalLM.from_pretrained("Xenova/llama2.c-stories15M") - tokenizer = SparseAutoTokenizer.from_pretrained( + tokenizer = AutoTokenizer.from_pretrained( "Xenova/llama2.c-stories15M", ) device = "cuda:0" diff --git a/tests/llmcompressor/transformers/obcq/obcq_configs/completion/gpu/llama_7b_quant.yaml b/tests/llmcompressor/transformers/obcq/obcq_configs/completion/gpu/llama_7b_quant.yaml index bd6ce72b4..496edbb88 100644 --- a/tests/llmcompressor/transformers/obcq/obcq_configs/completion/gpu/llama_7b_quant.yaml +++ b/tests/llmcompressor/transformers/obcq/obcq_configs/completion/gpu/llama_7b_quant.yaml @@ -1,6 +1,6 @@ cadence: "weekly" test_type: "regression" -model: "zoo:llama2-7b-llama2_pretrain-base" +model: "meta-llama/Llama-2-7b-hf" dataset: open_platypus recipe: "tests/llmcompressor/transformers/obcq/recipes/quant.yaml" device: "cuda:1" diff --git a/tests/llmcompressor/transformers/obcq/obcq_configs/completion/gpu/llama_7b_quant_and_sparse.yaml b/tests/llmcompressor/transformers/obcq/obcq_configs/completion/gpu/llama_7b_quant_and_sparse.yaml index f5d8197cf..ef80b17ea 100644 --- a/tests/llmcompressor/transformers/obcq/obcq_configs/completion/gpu/llama_7b_quant_and_sparse.yaml +++ b/tests/llmcompressor/transformers/obcq/obcq_configs/completion/gpu/llama_7b_quant_and_sparse.yaml @@ -1,6 +1,6 @@ cadence: "weekly" test_type: "regression" -model: "zoo:llama2-7b-llama2_pretrain-base" +model: "meta-llama/Llama-2-7b-hf" dataset: open_platypus recipe: "tests/llmcompressor/transformers/obcq/recipes/quant_and_sparse.yaml" device: "cuda:0" diff --git a/tests/llmcompressor/transformers/obcq/obcq_configs/completion/gpu/llama_7b_sparse.yml b/tests/llmcompressor/transformers/obcq/obcq_configs/completion/gpu/llama_7b_sparse.yml index 84ac51e3a..5b5d644fd 100644 --- a/tests/llmcompressor/transformers/obcq/obcq_configs/completion/gpu/llama_7b_sparse.yml +++ b/tests/llmcompressor/transformers/obcq/obcq_configs/completion/gpu/llama_7b_sparse.yml @@ -1,6 +1,6 @@ cadence: "weekly" test_type: "regression" -model: "zoo:llama2-7b-llama2_pretrain-base" +model: "meta-llama/Llama-2-7b-hf" dataset: open_platypus recipe: "tests/llmcompressor/transformers/obcq/recipes/sparse.yaml" device: "cuda:1" diff --git a/tests/llmcompressor/transformers/obcq/obcq_configs/consec_runs/gpu/llama_consec_runs.yaml b/tests/llmcompressor/transformers/obcq/obcq_configs/consec_runs/gpu/llama_consec_runs.yaml index 3717e90a7..e245b9b78 100644 --- a/tests/llmcompressor/transformers/obcq/obcq_configs/consec_runs/gpu/llama_consec_runs.yaml +++ b/tests/llmcompressor/transformers/obcq/obcq_configs/consec_runs/gpu/llama_consec_runs.yaml @@ -1,6 +1,6 @@ cadence: "nightly" test_type: "regression" -model: "zoo:llama2-7b-llama2_pretrain-base" +model: "meta-llama/Llama-2-7b-hf" dataset: open_platypus first_recipe: "tests/llmcompressor/transformers/obcq/recipes/quant_and_sparse.yaml" second_recipe: "tests/llmcompressor/transformers/obcq/recipes/additional_sparsity.yaml" diff --git a/tests/llmcompressor/transformers/obcq/obcq_configs/sparse/gpu/llama_7b_sparse.yaml b/tests/llmcompressor/transformers/obcq/obcq_configs/sparse/gpu/llama_7b_sparse.yaml index e13a4e040..0135c33b1 100644 --- a/tests/llmcompressor/transformers/obcq/obcq_configs/sparse/gpu/llama_7b_sparse.yaml +++ b/tests/llmcompressor/transformers/obcq/obcq_configs/sparse/gpu/llama_7b_sparse.yaml @@ -1,6 +1,6 @@ cadence: "nightly" test_type: "regression" -model: "zoo:llama2-7b-llama2_pretrain-base" +model: "meta-llama/Llama-2-7b-hf" dataset: open_platypus recipe: "tests/llmcompressor/transformers/obcq/recipes/sparse.yaml" sparsity: 0.3 diff --git a/tests/llmcompressor/transformers/obcq/obcq_configs/sparse/gpu/llama_7b_sparse_auto.yaml b/tests/llmcompressor/transformers/obcq/obcq_configs/sparse/gpu/llama_7b_sparse_auto.yaml index 2c4ece1af..4b0cb1d00 100644 --- a/tests/llmcompressor/transformers/obcq/obcq_configs/sparse/gpu/llama_7b_sparse_auto.yaml +++ b/tests/llmcompressor/transformers/obcq/obcq_configs/sparse/gpu/llama_7b_sparse_auto.yaml @@ -1,6 +1,6 @@ cadence: "nightly" test_type: "regression" -model: "zoo:llama2-7b-llama2_pretrain-base" +model: "meta-llama/Llama-2-7b-hf" dataset: open_platypus recipe: "tests/llmcompressor/transformers/obcq/recipes/sparse.yaml" sparsity: 0.3 diff --git a/tests/llmcompressor/transformers/obcq/test_consecutive_runs.py b/tests/llmcompressor/transformers/obcq/test_consecutive_runs.py index a640fe76a..581139f09 100644 --- a/tests/llmcompressor/transformers/obcq/test_consecutive_runs.py +++ b/tests/llmcompressor/transformers/obcq/test_consecutive_runs.py @@ -119,10 +119,9 @@ class TestConsecutiveRunsGPU(TestConsecutiveRuns): def setUp(self): from llmcompressor.transformers import SparseAutoModelForCausalLM - if "zoo:" in self.model: - self.model = SparseAutoModelForCausalLM.from_pretrained( - self.model, device_map=self.device - ) + self.model = SparseAutoModelForCausalLM.from_pretrained( + self.model, device_map=self.device + ) self.output = "./oneshot_output" self.output_first = Path(self.output) / "test_1" diff --git a/tests/llmcompressor/transformers/obcq/test_obcq_completion.py b/tests/llmcompressor/transformers/obcq/test_obcq_completion.py index ea2635ae3..112ffea69 100644 --- a/tests/llmcompressor/transformers/obcq/test_obcq_completion.py +++ b/tests/llmcompressor/transformers/obcq/test_obcq_completion.py @@ -20,15 +20,14 @@ class TestOBCQCompletion(unittest.TestCase): def labeled_dataloader(self, dataset_name, model_name): from torch.utils.data import DataLoader - from transformers import DefaultDataCollator + from transformers import AutoTokenizer, DefaultDataCollator - from llmcompressor.transformers import SparseAutoTokenizer from llmcompressor.transformers.finetune.data import TextGenerationDataset from llmcompressor.transformers.finetune.data.data_args import ( DataTrainingArguments, ) - tokenizer = SparseAutoTokenizer.from_pretrained(model_name) + tokenizer = AutoTokenizer.from_pretrained(model_name) data_args = DataTrainingArguments( dataset=dataset_name, max_seq_length=512, @@ -141,13 +140,10 @@ def setUp(self): self.model_name = None self.output = "./oneshot_output" - # Temporary fix as oneshot seems to not work with zoo: models - # Need to keep th model name for the perplexity calculation post oneshot - if "zoo:" in self.model: - self.model_name = self.model - self.model = SparseAutoModelForCausalLM.from_pretrained( - self.model, device_map=self.device, torch_dtype=torch.bfloat16 - ) + self.model_name = self.model + self.model = SparseAutoModelForCausalLM.from_pretrained( + self.model, device_map=self.device, torch_dtype=torch.bfloat16 + ) def test_oneshot_completion_gpu(self): self._test_oneshot_completion(model_name=self.model_name) diff --git a/tests/llmcompressor/transformers/obcq/test_obcq_sparsity.py b/tests/llmcompressor/transformers/obcq/test_obcq_sparsity.py index 27b189e95..b372df22a 100644 --- a/tests/llmcompressor/transformers/obcq/test_obcq_sparsity.py +++ b/tests/llmcompressor/transformers/obcq/test_obcq_sparsity.py @@ -75,10 +75,9 @@ def setUp(self): self.output = "./oneshot_output" - if "zoo:" in self.model: - self.model = SparseAutoModelForCausalLM.from_pretrained( - self.model, device_map=self.device, torch_dtype=torch.bfloat16 - ) + self.model = SparseAutoModelForCausalLM.from_pretrained( + self.model, device_map=self.device, torch_dtype=torch.bfloat16 + ) def test_sparsities_gpu(self): from llmcompressor.pytorch.model_load.helpers import get_session_model diff --git a/tests/llmcompressor/transformers/oneshot/test_api_inputs.py b/tests/llmcompressor/transformers/oneshot/test_api_inputs.py index 26b5d523a..0a28585b2 100644 --- a/tests/llmcompressor/transformers/oneshot/test_api_inputs.py +++ b/tests/llmcompressor/transformers/oneshot/test_api_inputs.py @@ -25,12 +25,11 @@ class TestOneShotInputs(unittest.TestCase): tokenize = None def setUp(self): - from llmcompressor.transformers import ( - SparseAutoModelForCausalLM, - SparseAutoTokenizer, - ) + from transformers import AutoTokenizer + + from llmcompressor.transformers import SparseAutoModelForCausalLM - self.tokenizer = SparseAutoTokenizer.from_pretrained(self.model) + self.tokenizer = AutoTokenizer.from_pretrained(self.model) self.model = SparseAutoModelForCausalLM.from_pretrained(self.model) self.output = "./oneshot_output" self.kwargs = {"dataset_config_name": self.dataset_config_name} diff --git a/tests/llmcompressor/transformers/sparsification/test_sparse_config.py b/tests/llmcompressor/transformers/sparsification/test_sparse_config.py deleted file mode 100644 index 5f280e320..000000000 --- a/tests/llmcompressor/transformers/sparsification/test_sparse_config.py +++ /dev/null @@ -1,17 +0,0 @@ -import pytest - -from llmcompressor.transformers import SparseAutoConfig - - -@pytest.mark.parametrize( - "pretrained_model_name_or_path", - [ - "roneneldan/TinyStories-1M", - "mgoin/TinyStories-1M-ds", - "zoo:codegen_mono-350m-bigpython_bigquery_thepile-pruned50_quantized", - ], -) -def test_from_pretrained(tmp_path, pretrained_model_name_or_path): - assert SparseAutoConfig.from_pretrained( - pretrained_model_name_or_path, cache_dir=tmp_path - ) diff --git a/tests/llmcompressor/transformers/sparsification/test_sparse_tokenizer.py b/tests/llmcompressor/transformers/sparsification/test_sparse_tokenizer.py deleted file mode 100644 index 7381b995d..000000000 --- a/tests/llmcompressor/transformers/sparsification/test_sparse_tokenizer.py +++ /dev/null @@ -1,17 +0,0 @@ -import pytest - -from llmcompressor.transformers import SparseAutoTokenizer - - -@pytest.mark.parametrize( - "pretrained_model_name_or_path", - [ - "roneneldan/TinyStories-1M", - "mgoin/TinyStories-1M-ds", - "zoo:codegen_mono-350m-bigpython_bigquery_thepile-pruned50_quantized", - ], -) -def test_from_pretrained(tmp_path, pretrained_model_name_or_path): - assert SparseAutoTokenizer.from_pretrained( - pretrained_model_name_or_path, cache_dir=tmp_path - )