Merge pull request #2 from vllm-project/sa/remove_sparsezoo

Remove SparseZoo Usage
vllm-project · Jun 24, 2024 · 06e3131 · 06e3131
2 parents e8c07af + eab8036
commit 06e3131
Show file tree

Hide file tree

Showing 49 changed files with 106 additions and 409 deletions.
diff --git a/.github/workflows/quality-check.yaml b/.github/workflows/quality-check.yaml
@@ -18,13 +18,7 @@ jobs:
       - uses: actions/checkout@v2
       - uses: actions/checkout@v2
         with:
-          repository: "neuralmagic/sparsezoo"
-          path: "sparsezoo"
           ref: ${{needs.test-setup.outputs.branch}}
-      - name: "⚙️ Install sparsezoo dependencies"
-        run: pip3 install sparsezoo/
-      - name: "Clean sparsezoo directory"
-        run: rm -r sparsezoo/
       - name: "⚙️ Install dependencies"
         run: pip3 install .[dev]
       - name: "🧹 Running quality checks"

diff --git a/.github/workflows/test-check.yaml b/.github/workflows/test-check.yaml
@@ -30,8 +30,6 @@ jobs:
           || echo "::set-output name=branch::main"
   base-tests:
     runs-on: ubuntu-22.04
-    env:
-      SPARSEZOO_TEST_MODE: "true"
     needs: test-setup
     steps:
       - uses: actions/setup-python@v4
@@ -40,21 +38,14 @@ jobs:
       - uses: actions/checkout@v2
       - uses: actions/checkout@v2
         with:
-          repository: "neuralmagic/sparsezoo"
-          path: "sparsezoo"
           ref: ${{needs.test-setup.outputs.branch}}
-      - name: "⚙️ Install sparsezoo dependencies"
-        run: pip3 install -U pip && pip3 install setuptools sparsezoo/
-      - name: "Clean sparsezoo directory"
-        run: rm -r sparsezoo/
       - name: "⚙️ Install dependencies"
         run: pip3 install .[dev]
       - name: "🔬 Running base tests"
         run: make test
   pytorch-tests:
     runs-on: k8s-eng-gpu-64G-v100-32G
     env:
-      SPARSEZOO_TEST_MODE: "true"
       CLEARML_WEB_HOST: ${{ secrets.CLEARML_WEB_HOST }}
       CLEARML_API_HOST: ${{ secrets.CLEARML_API_HOST }}
       CLEARML_API_ACCESS_KEY: ${{ secrets.CLEARML_API_ACCESS_KEY }}
@@ -68,13 +59,7 @@ jobs:
       - uses: actions/checkout@v2
       - uses: actions/checkout@v2
         with:
-          repository: "neuralmagic/sparsezoo"
-          path: "sparsezoo"
           ref: ${{needs.test-setup.outputs.branch}}
-      - name: "⚙️ Install sparsezoo dependencies"
-        run: pip3 install -U pip && pip3 install setuptools sparsezoo/
-      - name: "Clean sparsezoo directory"
-        run: rm -r sparsezoo/
       - uses: actions/checkout@v2
       - uses: actions/checkout@v2
         with:
@@ -92,7 +77,6 @@ jobs:
   compat-pytorch-1_9-pytorch-tests:
     runs-on: k8s-eng-gpu-64G-v100-32G
     env:
-      SPARSEZOO_TEST_MODE: "true"
       CLEARML_WEB_HOST: ${{ secrets.CLEARML_WEB_HOST }}
       CLEARML_API_HOST: ${{ secrets.CLEARML_API_HOST }}
       CLEARML_API_ACCESS_KEY: ${{ secrets.CLEARML_API_ACCESS_KEY }}
@@ -106,13 +90,7 @@ jobs:
       - uses: actions/checkout@v2
       - uses: actions/checkout@v2
         with:
-          repository: "neuralmagic/sparsezoo"
-          path: "sparsezoo"
           ref: ${{needs.test-setup.outputs.branch}}
-      - name: "⚙️ Install sparsezoo dependencies"
-        run: pip3 install -U pip && pip3 install setuptools sparsezoo/
-      - name: "Clean sparsezoo directory"
-        run: rm -r sparsezoo/
       - uses: actions/checkout@v2
       - uses: actions/checkout@v2
         with:
@@ -130,7 +108,6 @@ jobs:
   transformers-tests:
     runs-on: k8s-eng-gpu-64G-v100-32G
     env:
-      SPARSEZOO_TEST_MODE: "true"
       CLEARML_WEB_HOST: ${{ secrets.CLEARML_WEB_HOST }}
       CLEARML_API_HOST: ${{ secrets.CLEARML_API_HOST }}
       CLEARML_API_ACCESS_KEY: ${{ secrets.CLEARML_API_ACCESS_KEY }}
@@ -144,13 +121,7 @@ jobs:
       - uses: actions/checkout@v2
       - uses: actions/checkout@v2
         with:
-          repository: "neuralmagic/sparsezoo"
-          path: "sparsezoo"
           ref: ${{needs.test-setup.outputs.branch}}
-      - name: "⚙️ Install sparsezoo dependencies"
-        run: pip3 install -U pip && pip3 install setuptools sparsezoo/
-      - name: "Clean sparsezoo directory"
-        run: rm -r sparsezoo/
       - uses: actions/checkout@v2
         with:
           repository: "neuralmagic/compressed-tensors"

diff --git a/.github/workflows/test-weekly.yml b/.github/workflows/test-weekly.yml
@@ -8,6 +8,7 @@ jobs:
     runs-on: k8s-mle-gpu-12-vcpu-225GB-ram-2-a6000-48G
     env:
       CADENCE: "weekly"
+      HF_TOKEN: ${{ secrets.NM_HF_TOKEN }}
       CLEARML_WEB_HOST: ${{ secrets.CLEARML_WEB_HOST }}
       CLEARML_API_HOST: ${{ secrets.CLEARML_API_HOST }}
       CLEARML_API_ACCESS_KEY: ${{ secrets.CLEARML_API_ACCESS_KEY }}

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -28,7 +28,7 @@ If you’re reading this, hopefully we have piqued your interest to take the nex
 
 ## Code of Conduct
 
-Help us keep the software inclusive. Please read and follow our [Code of Conduct](CODE_OF_CONDUCT.md) in order to promote an environment that is friendly, fair, respectful, and safe. We want to inspire collaboration, innovation, and fun!
+Help us keep the software inclusive. Please read and follow our [Code of Conduct](https://github.com/neuralmagic/sparseml/blob/main/CODE_OF_CONDUCT.md) in order to promote an environment that is friendly, fair, respectful, and safe. We want to inspire collaboration, innovation, and fun!
 
 ## Ways to Contribute
 
@@ -54,7 +54,7 @@ Whether you’re a newbie, dabbler, or expert, we appreciate you jumping in.
 
 Please search through existing issues and requests first to avoid duplicates. Neural Magic will work with you further to take next steps.
 
-- Go to: [GitHub Issues](https://github.com/neuralmagic/llmcompressor/issues)
+- Go to: [GitHub Issues](https://github.com/vllm-project/llm-compressor/issues)
 
 For bugs, include:
 

diff --git a/DEVELOPING.md b/DEVELOPING.md
@@ -28,8 +28,8 @@ Here are some details to get started.
 **Development Installation**
 
 ```bash
-git clone https://github.com/neuralmagic/sparseml.git
-cd llmcompressor
+git https://github.com/vllm-project/llm-compressor
+cd llm-compressor
 python3 -m pip install -e "./[dev]"
 ```
 
@@ -73,7 +73,7 @@ File any error found before changes as an Issue and fix any errors found after m
 
 ## GitHub Workflow
 
-1. Fork the `TODO/llmcompressor` repository into your GitHub account: https://github.com/TODO/llmcompressor/fork.
+1. Fork the `llmcompressor` repository into your GitHub account: https://github.com/vllm-project/llm-compressor.
 
 2. Clone your fork of the GitHub repository, replacing `<username>` with your GitHub username.
 

diff --git a/Makefile b/Makefile
@@ -31,7 +31,7 @@ style:
 # run tests for the repo
 test:
 	@echo "Running python tests";
-	SPARSEZOO_TEST_MODE="true" pytest tests $(PYTEST_ARGS)
+	pytest tests $(PYTEST_ARGS)
 
 # creates wheel file
 build:

diff --git a/examples/quantization/llama7b_quantize_sparse_cnn.py b/examples/quantization/llama7b_quantize_sparse_cnn.py
@@ -1,11 +1,8 @@
 import torch
 from datasets import load_dataset
+from transformers import AutoTokenizer
 
-from llmcompressor.transformers import (
-    SparseAutoModelForCausalLM,
-    SparseAutoTokenizer,
-    oneshot,
-)
+from llmcompressor.transformers import SparseAutoModelForCausalLM, oneshot
 
 # define a llmcompressor recipe for GPTQ W4A16 quantization
 recipe = """
@@ -30,7 +27,7 @@
 model = SparseAutoModelForCausalLM.from_pretrained(
     model_stub, torch_dtype=torch.bfloat16, device_map="auto"
 )
-tokenizer = SparseAutoTokenizer.from_pretrained(model_stub)
+tokenizer = AutoTokenizer.from_pretrained(model_stub)
 
 # for quantization calibration, we will use a subset of the dataset that was used to
 # sparsify and finetune the model

diff --git a/examples/quantization/llama7b_sparse_quantized/README.md b/examples/quantization/llama7b_sparse_quantized/README.md
@@ -10,7 +10,7 @@ Follow the steps below, or to run the example as `python examples/llama7b_sparse
 In this step, we select which model to use as a baseline for sparsification, a dataset to
 use for calibration and finetuning, and a recipe.
 
-Models can reference a local directory, model in the huggingface hub, or in the sparsezoo.
+Models can reference a local directory, or a model in the huggingface hub.
 
 Datasets can be from a local compatible directory or the huggingface hub.
 
@@ -23,7 +23,7 @@ and quantize to 4 bits in one show using GPTQ.
 import torch
 from llmcompressor.transformers import SparseAutoModelForCausalLM
 
-model_stub = "zoo:llama2-7b-ultrachat200k_llama2_pretrain-base"
+model_stub = "neuralmagic/Llama-2-7b-ultrachat200k"
 model = SparseAutoModelForCausalLM.from_pretrained(
     model_stub, torch_dtype=torch.bfloat16, device_map="auto"
 )

diff --git a/examples/quantization/llama7b_sparse_quantized/llama7b_sparse_w4a16.py b/examples/quantization/llama7b_sparse_quantized/llama7b_sparse_w4a16.py
@@ -6,7 +6,7 @@
 recipe = "2:4_w4a16_recipe.yaml"
 
 # load the model in as bfloat16 to save on memory and compute
-model_stub = "zoo:llama2-7b-ultrachat200k_llama2_pretrain-base"
+model_stub = "neuralmagic/Llama-2-7b-ultrachat200k"
 model = SparseAutoModelForCausalLM.from_pretrained(
     model_stub, torch_dtype=torch.bfloat16, device_map="auto"
 )

diff --git a/examples/quantization/llama7b_w4a16_quantization.ipynb b/examples/quantization/llama7b_w4a16_quantization.ipynb
@@ -71,7 +71,7 @@
     "\n",
     "# by setting the device_map to auto, we can spread the model evenly across all available GPUs\n",
     "# load the model in as bfloat16 to save on memory and compute\n",
-    "model_stub = \"zoo:llama2-7b-ultrachat200k_llama2_pretrain-base\"\n",
+    "model_stub = \"neuralmagic/Llama-2-7b-ultrachat200k\"\n",
     "model = SparseAutoModelForCausalLM.from_pretrained(model_stub, torch_dtype=torch.bfloat16, device_map=\"auto\")\n",
     "\n",
     "# uses SparseML's built-in preprocessing for ultra chat\n",

diff --git a/examples/quantization/llama7b_w4a16_quantization.py b/examples/quantization/llama7b_w4a16_quantization.py
@@ -21,7 +21,7 @@
 
 # setting device_map to auto to spread the model evenly across all available GPUs
 # load the model in as bfloat16 to save on memory and compute
-model_stub = "zoo:llama2-7b-ultrachat200k_llama2_pretrain-base"
+model_stub = "neuralmagic/Llama-2-7b-ultrachat200k"
 model = SparseAutoModelForCausalLM.from_pretrained(
     model_stub, torch_dtype=torch.bfloat16, device_map="auto"
 )

diff --git a/examples/quantization/llama7b_w8a8_quantization.py b/examples/quantization/llama7b_w8a8_quantization.py
@@ -27,7 +27,7 @@
 
 # setting device_map to auto to spread the model evenly across all available GPUs
 # load the model in as bfloat16 to save on memory and compute
-model_stub = "zoo:llama2-7b-ultrachat200k_llama2_pretrain-base"
+model_stub = "neuralmagic/Llama-2-7b-ultrachat200k"
 model = SparseAutoModelForCausalLM.from_pretrained(
     model_stub, torch_dtype=torch.bfloat16, device_map="auto"
 )

diff --git a/examples/trl_mixin/ex_trl_constant.py b/examples/trl_mixin/ex_trl_constant.py
@@ -13,12 +13,12 @@
 # limitations under the License.
 
 from datasets import load_dataset
+from transformers import AutoTokenizer
 from trl import DataCollatorForCompletionOnlyLM
 
 from llmcompressor.transformers import (
     SFTTrainer,
     SparseAutoModelForCausalLM,
-    SparseAutoTokenizer,
     TrainingArguments,
 )
 
@@ -27,7 +27,7 @@
 model = SparseAutoModelForCausalLM.from_pretrained(
     model_path, torch_dtype="auto", device_map="auto"
 )
-tokenizer = SparseAutoTokenizer.from_pretrained(model_path)
+tokenizer = AutoTokenizer.from_pretrained(model_path)
 tokenizer.pad_token = tokenizer.eos_token
 
 # recipe for maintaining model sparsity during finetuning

diff --git a/examples/trl_mixin/ex_trl_distillation.py b/examples/trl_mixin/ex_trl_distillation.py
@@ -12,19 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from transformers import DefaultDataCollator
+from transformers import AutoTokenizer, DefaultDataCollator
 
 from llmcompressor.transformers import (
     DataTrainingArguments,
     SFTTrainer,
     SparseAutoModelForCausalLM,
-    SparseAutoTokenizer,
     TextGenerationDataset,
     TrainingArguments,
 )
 
 model_path = "neuralmagic/Llama-2-7b-pruned50-retrained"
-teacher_path = "zoo:llama2-7b-gsm8k_llama2_pretrain-base"
+teacher_path = "neuralmagic/Llama-2-7b-gsm8k"
 output_dir = "./output_trl_sft_test_7b_gsm8k"
 
 model = SparseAutoModelForCausalLM.from_pretrained(
@@ -34,7 +33,7 @@
     teacher_path, torch_dtype="auto", device_map="auto"
 )
 
-tokenizer = SparseAutoTokenizer.from_pretrained(model_path)
+tokenizer = AutoTokenizer.from_pretrained(model_path)
 
 # Load gsm8k using SparseML dataset tools
 data_args = DataTrainingArguments(

diff --git a/setup.py b/setup.py
@@ -62,7 +62,6 @@
         "compressed-tensors"
         if version_info.is_release
         else "compressed-tensors-nightly",
-        "sparsezoo" if version_info.is_release else "sparsezoo-nightly",
     ],
     extras_require={
         "dev": [

diff --git a/src/llmcompressor/pytorch/utils/helpers.py b/src/llmcompressor/pytorch/utils/helpers.py
@@ -45,8 +45,6 @@
     QATLinear = None
     QATConv2d = None
 
-from sparsezoo import Model
-
 from llmcompressor.utils import create_dirs, save_numpy
 
 try:
@@ -98,7 +96,6 @@
     "thin_model_from_checkpoint",
     "MEMORY_BOUNDED",
     "memory_aware_threshold",
-    "download_framework_model_by_recipe_type",
     "detach",
     "adjust_quantization_for_onnx_export",
     "get_dependency_order",
@@ -1099,38 +1096,6 @@ def memory_aware_threshold(tensor: torch.Tensor, idx: int) -> Tensor:
         return torch.kthvalue(tensor.view(-1), idx + 1)[0]
 
 
-def download_framework_model_by_recipe_type(
-    zoo_model: Model, recipe_name: Optional[str] = None, model_suffix: str = "pth"
-) -> str:
-    """
-    Extract the path of the framework model from the
-    zoo model, conditioned on the name of the recipe
-    By default, the function will return path to the final framework model
-    :params zoo_model: model object from sparsezoo
-    :params recipe_name: a name of the recipe (e.g. "transfer_learn", "original" etc.)
-    :params model_suffix: model_suffix that models are saved with
-    :return: path to the framework model
-    """
-
-    # default to model query params if available
-    recipe_name = recipe_name or (
-        zoo_model.stub_params.get("recipe_type") or zoo_model.stub_params.get("recipe")
-    )
-
-    framework_model = None
-    if recipe_name and "transfer" in recipe_name.lower():
-        # fetching the model for transfer learning
-        model_name = f"model.ckpt.{model_suffix}"
-        framework_model = zoo_model.training.default.get_file(model_name)
-
-    if framework_model is None:
-        # fetching the model for inference or fall back if model.ckpt.pth doesn't exist
-        model_name = f"model.{model_suffix}"
-        framework_model = zoo_model.training.default.get_file(model_name)
-
-    return framework_model.path
-
-
 def detach(x: Union[torch.Tensor, List, Tuple]):
     if isinstance(x, torch.Tensor):
         return x.detach()

diff --git a/src/llmcompressor/recipe/recipe.py b/src/llmcompressor/recipe/recipe.py
@@ -27,7 +27,7 @@ class Recipe(RecipeBase):
     (More information on supported modifiers can be found at
     https://docs.neuralmagic.com/products/sparseml)
 
-    Recipes can be created from a file, string, or SparseZoo stub.
+    Recipes can be created from a file, string, or HuggingFace stub.
     Acceptable file formats include both json and yaml, however,
     when serializing a recipe, yaml will be used by default.
     """
@@ -96,7 +96,7 @@ def create_instance(
         >>> recipe = Recipe.create_instance(recipe_str)
 
         :param path_or_modifiers: The path to the recipe file or
-            SparseZoo stub or the recipe string (must be a valid
+            or the recipe string (must be a valid
             json/yaml file or a valid json/yaml string). Can also
             accept a RecipeModifier instance, or a list of
             RecipeModifiers

diff --git a/src/llmcompressor/transformers/__init__.py b/src/llmcompressor/transformers/__init__.py
@@ -21,10 +21,5 @@
 # isort: skip_file
 # (import order matters for circular import avoidance)
 from .utils import *
-from .sparsification import (
-    SparseAutoModel,
-    SparseAutoModelForCausalLM,
-    SparseAutoConfig,
-    SparseAutoTokenizer,
-)
+from .sparsification import SparseAutoModel, SparseAutoModelForCausalLM
 from .finetune import *