Skip to content

Commit

Permalink
Tests
Browse files Browse the repository at this point in the history
Signed-off-by: Rahul Tuli <rahul@neuralmagic.com>
  • Loading branch information
rahul-tuli committed Dec 23, 2024
1 parent ccd6c39 commit 4043b65
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 36 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,27 +3,35 @@ pruning_stage:
SparseGPTModifier:
sparsity: 0.5
sequential_update: true
mask_structure: "2:4"
mask_structure: "0:0"
targets: ['re:model.layers.\d*$']
quant_stage:
test_stage:
quant_modifiers:
QuantizationModifier:
ignore: ["lm_head"]
config_groups:
group_0:
weights:
num_bits: 8
type: int
strategy: tensor
dynamic: false
type: "int"
symmetric: true
strategy: "tensor"
input_activations:
num_bits: 8
type: int
strategy: tensor
dynamic: true
symmetric: true
type: "int"
symmetric: false
strategy: "tensor"
output_activations: null
targets: ["Linear"]
group_1:
weights:
num_bits: 8
type: "int"
symmetric: true
strategy: "tensor"
input_activations: null
output_activations: null
targets: ["Embedding"]
pruning_modifiers:
ConstantPruningModifier:
targets: [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -370,9 +370,9 @@ def test_model_shared_tensors_gpu(
"model_stub, recipe, sparse_format, quant_format",
[
(
"Xenova/llama2.c-stories110M",
"tests/llmcompressor/transformers/compression/recipes/sparse_24_int8.yaml",
CompressionFormat.sparse_24.value,
"Xenova/llama2.c-stories15M",
"tests/llmcompressor/transformers/compression/recipes/sparse_int8.yaml",
CompressionFormat.sparse_bitmask.value,
CompressionFormat.int_quantized.value,
),
],
Expand Down Expand Up @@ -445,30 +445,7 @@ def test_compressor_stacking(model_stub, recipe, sparse_format, quant_format, tm
if key.endswith("weight") and quant_format != "dense":
# we don't expect an exact match for compressed
diff = torch.abs(dense_tensor - reconstructed_tensor)
assert not torch.any(
diff > 0.01
).item(), f"{key} has a diff greater than 0.01"
assert not torch.any(diff > 0.01), f"Max diff: {torch.max(diff)}"
else:
assert torch.equal(dense_tensor, reconstructed_tensor)
shutil.rmtree(tmp_path)


# This parameterization should be added to the test_compressor_stacking test
# once the lossy nature of FP8 compress-decompress is resolved.
# Until then, this test is marked as xfail.
@pytest.mark.xfail(reason="Known issue with FP8 compress-decompress")
@pytest.mark.parametrize(
"model_stub, recipe, sparse_format, quant_format",
[
(
"Xenova/llama2.c-stories110M",
"tests/llmcompressor/transformers/compression/recipes/sparse_24_fp8.yaml",
CompressionFormat.sparse_24.value,
CompressionFormat.float_quantized.value,
),
],
)
def test_compressor_stacking_fp8(
model_stub, recipe, sparse_format, quant_format, tmp_path
):
test_compressor_stacking(model_stub, recipe, sparse_format, quant_format, tmp_path)

0 comments on commit 4043b65

Please sign in to comment.