From d31b31bb9eb3a948faab6d500bd3649d649f302f Mon Sep 17 00:00:00 2001
From: Dipika <dipikasikka1@gmail.com>
Date: Sat, 21 Dec 2024 01:27:36 +0000
Subject: [PATCH] add info for clarity

---
 .../quantization_2of4_sparse_w4a16/llama7b_sparse_w4a16.py  | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/examples/quantization_2of4_sparse_w4a16/llama7b_sparse_w4a16.py b/examples/quantization_2of4_sparse_w4a16/llama7b_sparse_w4a16.py
index 7c1aee71d..884952f5e 100644
--- a/examples/quantization_2of4_sparse_w4a16/llama7b_sparse_w4a16.py
+++ b/examples/quantization_2of4_sparse_w4a16/llama7b_sparse_w4a16.py
@@ -54,6 +54,8 @@
     warmup_ratio=warmup_ratio,
 )
 logger.info(
-    "Note: vLLM requires the dtype=torch.float16 when running the ",
-    "compressed marlin-24 model",
+    "Note: llcompressor does not currently support running ",
+    "compressed models in the marlin-24 format. The model ",
+    "produced from this example can be run on vLLM with ",
+    "dtype=torch.float16",
 )