From d31b31bb9eb3a948faab6d500bd3649d649f302f Mon Sep 17 00:00:00 2001 From: Dipika Date: Sat, 21 Dec 2024 01:27:36 +0000 Subject: [PATCH] add info for clarity --- .../quantization_2of4_sparse_w4a16/llama7b_sparse_w4a16.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/examples/quantization_2of4_sparse_w4a16/llama7b_sparse_w4a16.py b/examples/quantization_2of4_sparse_w4a16/llama7b_sparse_w4a16.py index 7c1aee71d..884952f5e 100644 --- a/examples/quantization_2of4_sparse_w4a16/llama7b_sparse_w4a16.py +++ b/examples/quantization_2of4_sparse_w4a16/llama7b_sparse_w4a16.py @@ -54,6 +54,8 @@ warmup_ratio=warmup_ratio, ) logger.info( - "Note: vLLM requires the dtype=torch.float16 when running the ", - "compressed marlin-24 model", + "Note: llcompressor does not currently support running ", + "compressed models in the marlin-24 format. The model ", + "produced from this example can be run on vLLM with ", + "dtype=torch.float16", )