hotfix: reduce concurrent for a100 160gb to 4

OpenRouterTeam · Dec 31, 2023 · af3f494 · af3f494
1 parent e8d6e89
commit af3f494
Show file tree

Hide file tree

Showing 3 changed files with 13 additions and 8 deletions.
diff --git a/modal/runner/containers/__init__.py b/modal/runner/containers/__init__.py
@@ -11,6 +11,7 @@
 def _to_lower_list(l: list[str]):
     return [x.lower() for x in l]
 
+
 vllm_7b_model_ids = [
     "mistralai/Mistral-7B-Instruct-v0.1",
     "HuggingFaceH4/zephyr-7b-beta",
@@ -47,7 +48,6 @@ def _to_lower_list(l: list[str]):
 _vllm_a100_80gb_32k_models_lower = _to_lower_list(vllm_a100_80gb_32k_model_ids)
 
 vllm_a100_160gb_16k_models = [
-    "ehartford/dolphin-2.5-mixtral-8x7b",
     "cognitivecomputations/dolphin-2.6-mixtral-8x7b",
 ]
 _vllm_a100_160gb_16k_models_lower = _to_lower_list(vllm_a100_160gb_16k_models)
@@ -59,10 +59,11 @@ def _to_lower_list(l: list[str]):
     *vllm_top_model_ids,
     *vllm_a100_80gb_32k_model_ids,
     *vllm_a100_80gb_128k_model_ids,
-    *vllm_a100_160gb_16k_models
+    *vllm_a100_160gb_16k_models,
 ]
 all_models_lower = _to_lower_list(all_models)
 
+
 def get_container(model: str):
     normalized_model_id = model.lower()
     model_path = get_model_path(normalized_model_id)
@@ -88,7 +89,7 @@ def get_container(model: str):
 
         if normalized_model_id in _vllm_top_model_lower:
             return VllmContainerA100_80G(str(model_path))
-        
+
         # if normalized_model_id in _vllm_awq_models_lower:
         #     return VllmAWQ(str(model_path))
 

diff --git a/modal/runner/containers/vllm_unified.py b/modal/runner/containers/vllm_unified.py
@@ -54,11 +54,15 @@ def __init__(
     return wrap(_VllmContainer)
 
 
-VllmContainer_7B = _make_container("VllmContainer_7B", num_gpus=1, concurrent_inputs=100)
-VllmContainerA100_40G = _make_container("VllmContainerA100_40G", num_gpus=1, concurrent_inputs=32)
+VllmContainer_7B = _make_container(
+    "VllmContainer_7B", num_gpus=1, concurrent_inputs=100
+)
+VllmContainerA100_40G = _make_container(
+    "VllmContainerA100_40G", num_gpus=1, concurrent_inputs=32
+)
 VllmContainerA100_80G = _make_container(
     "VllmContainerA100_80G", num_gpus=1, memory=80
 )
 VllmContainerA100_160G = _make_container(
-    "VllmContainerA100_160G", num_gpus=2, memory=80
-)
+    "VllmContainerA100_160G", num_gpus=2, memory=80, concurrent_inputs=4
+)
diff --git a/pyproject.toml b/pyproject.toml
@@ -27,7 +27,7 @@ requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"
 
 [tool.ruff]
-line-length = 120
+line-length = 80
 
 [tool.ruff.lint]
 select = [