Skip to content

Commit

Permalink
hotfix: reduce concurrent for a100 160gb to 4
Browse files Browse the repository at this point in the history
  • Loading branch information
louisgv committed Dec 31, 2023
1 parent e8d6e89 commit af3f494
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 8 deletions.
7 changes: 4 additions & 3 deletions modal/runner/containers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
def _to_lower_list(l: list[str]):
return [x.lower() for x in l]


vllm_7b_model_ids = [
"mistralai/Mistral-7B-Instruct-v0.1",
"HuggingFaceH4/zephyr-7b-beta",
Expand Down Expand Up @@ -47,7 +48,6 @@ def _to_lower_list(l: list[str]):
_vllm_a100_80gb_32k_models_lower = _to_lower_list(vllm_a100_80gb_32k_model_ids)

vllm_a100_160gb_16k_models = [
"ehartford/dolphin-2.5-mixtral-8x7b",
"cognitivecomputations/dolphin-2.6-mixtral-8x7b",
]
_vllm_a100_160gb_16k_models_lower = _to_lower_list(vllm_a100_160gb_16k_models)
Expand All @@ -59,10 +59,11 @@ def _to_lower_list(l: list[str]):
*vllm_top_model_ids,
*vllm_a100_80gb_32k_model_ids,
*vllm_a100_80gb_128k_model_ids,
*vllm_a100_160gb_16k_models
*vllm_a100_160gb_16k_models,
]
all_models_lower = _to_lower_list(all_models)


def get_container(model: str):
normalized_model_id = model.lower()
model_path = get_model_path(normalized_model_id)
Expand All @@ -88,7 +89,7 @@ def get_container(model: str):

if normalized_model_id in _vllm_top_model_lower:
return VllmContainerA100_80G(str(model_path))

# if normalized_model_id in _vllm_awq_models_lower:
# return VllmAWQ(str(model_path))

Expand Down
12 changes: 8 additions & 4 deletions modal/runner/containers/vllm_unified.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,15 @@ def __init__(
return wrap(_VllmContainer)


VllmContainer_7B = _make_container("VllmContainer_7B", num_gpus=1, concurrent_inputs=100)
VllmContainerA100_40G = _make_container("VllmContainerA100_40G", num_gpus=1, concurrent_inputs=32)
VllmContainer_7B = _make_container(
"VllmContainer_7B", num_gpus=1, concurrent_inputs=100
)
VllmContainerA100_40G = _make_container(
"VllmContainerA100_40G", num_gpus=1, concurrent_inputs=32
)
VllmContainerA100_80G = _make_container(
"VllmContainerA100_80G", num_gpus=1, memory=80
)
VllmContainerA100_160G = _make_container(
"VllmContainerA100_160G", num_gpus=2, memory=80
)
"VllmContainerA100_160G", num_gpus=2, memory=80, concurrent_inputs=4
)
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

[tool.ruff]
line-length = 120
line-length = 80

[tool.ruff.lint]
select = [
Expand Down

0 comments on commit af3f494

Please sign in to comment.