From b160196814c630c795c45d9983258a9baac9f48e Mon Sep 17 00:00:00 2001
From: Jee Jee Li <pandaleefree@gmail.com>
Date: Tue, 6 Aug 2024 09:57:25 +0800
Subject: [PATCH] [LoRA] Relax LoRA condition (#7146)

---
 tests/lora/test_layers.py           | 2 +-
 tests/lora/test_punica_variation.py | 2 +-
 vllm/config.py                      | 5 +++--
 vllm/lora/layers.py                 | 6 +++---
 4 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/tests/lora/test_layers.py b/tests/lora/test_layers.py
index d8cc68d5e9599..ad86f7bdf6101 100644
--- a/tests/lora/test_layers.py
+++ b/tests/lora/test_layers.py
@@ -420,7 +420,7 @@ def create_random_embedding_layer():
 @torch.inference_mode()
 @pytest.mark.parametrize("num_loras", [1, 2, 4, 8])
 @pytest.mark.parametrize("device", CUDA_DEVICES)
-@pytest.mark.parametrize("vocab_size", [512, 32000, 64000, 128000])
+@pytest.mark.parametrize("vocab_size", [512, 32000, 64000, 256512])
 @pytest.mark.parametrize("stage", STAGES)
 def test_lm_head_logits_processor(dist_init, num_loras, device, vocab_size,
                                   stage) -> None:
diff --git a/tests/lora/test_punica_variation.py b/tests/lora/test_punica_variation.py
index 7e73ea67ee5f4..5bf3f72e7d97b 100644
--- a/tests/lora/test_punica_variation.py
+++ b/tests/lora/test_punica_variation.py
@@ -25,7 +25,7 @@
 BATCHES = [1, 4, 16, 32]
 NUM_LORA = [1, 4, 8, 16, 32, 64, 128]
 DTYPES = [torch.float16, torch.bfloat16]
-MAX_RANKS = [1, 4, 8, 16, 32, 64, 128]
+MAX_RANKS = [1, 4, 8, 16, 32, 64, 128, 256]
 SCALES = [0.5]
 SEED = [0]
 CUDA_DEVICES = [f"cuda:{0}"]
diff --git a/vllm/config.py b/vllm/config.py
index 4b968f549d902..3cc197f3d655f 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -1311,8 +1311,9 @@ class LoRAConfig:
     long_lora_scaling_factors: Optional[Tuple[float]] = None
 
     def __post_init__(self):
-        # TODO: Increase the range of rank
-        possible_max_ranks = (8, 16, 32, 64)
+        # Setting the maximum rank to 256 should be able to satisfy the vast
+        # majority of applications.
+        possible_max_ranks = (8, 16, 32, 64, 128, 256)
         possible_lora_extra_vocab_size = (0, 256, 512)
         if self.max_lora_rank not in possible_max_ranks:
             raise ValueError(
diff --git a/vllm/lora/layers.py b/vllm/lora/layers.py
index d3978ff6f4ff1..e3316059dc6d1 100644
--- a/vllm/lora/layers.py
+++ b/vllm/lora/layers.py
@@ -1073,10 +1073,10 @@ def create_lora_weights(
         lora_config: LoRAConfig,
         model_config: Optional[PretrainedConfig] = None,
     ) -> None:
-        # TODO: Verify if this condition can be relaxed
-        if 32000 < self.base_layer.vocab_size > 128512:
+        # TODO: Verify if this condition can be further relaxed
+        if 32000 < self.base_layer.vocab_size > 257024:
             raise ValueError("When using LoRA, vocab size must be "
-                             "32000 >= vocab_size <= 128512")
+                             "32000 >= vocab_size <= 257024")
         self.lora_a_stacked = torch.zeros(
             (
                 max_loras,