diff --git a/modal/runner/containers/__init__.py b/modal/runner/containers/__init__.py index 63f2944..b279c3a 100644 --- a/modal/runner/containers/__init__.py +++ b/modal/runner/containers/__init__.py @@ -11,6 +11,7 @@ def _to_lower_list(l: list[str]): return [x.lower() for x in l] + vllm_7b_model_ids = [ "mistralai/Mistral-7B-Instruct-v0.1", "HuggingFaceH4/zephyr-7b-beta", @@ -47,7 +48,6 @@ def _to_lower_list(l: list[str]): _vllm_a100_80gb_32k_models_lower = _to_lower_list(vllm_a100_80gb_32k_model_ids) vllm_a100_160gb_16k_models = [ - "ehartford/dolphin-2.5-mixtral-8x7b", "cognitivecomputations/dolphin-2.6-mixtral-8x7b", ] _vllm_a100_160gb_16k_models_lower = _to_lower_list(vllm_a100_160gb_16k_models) @@ -59,10 +59,11 @@ def _to_lower_list(l: list[str]): *vllm_top_model_ids, *vllm_a100_80gb_32k_model_ids, *vllm_a100_80gb_128k_model_ids, - *vllm_a100_160gb_16k_models + *vllm_a100_160gb_16k_models, ] all_models_lower = _to_lower_list(all_models) + def get_container(model: str): normalized_model_id = model.lower() model_path = get_model_path(normalized_model_id) @@ -88,7 +89,7 @@ def get_container(model: str): if normalized_model_id in _vllm_top_model_lower: return VllmContainerA100_80G(str(model_path)) - + # if normalized_model_id in _vllm_awq_models_lower: # return VllmAWQ(str(model_path)) diff --git a/modal/runner/containers/vllm_unified.py b/modal/runner/containers/vllm_unified.py index d750342..e4fb3f0 100644 --- a/modal/runner/containers/vllm_unified.py +++ b/modal/runner/containers/vllm_unified.py @@ -54,11 +54,15 @@ def __init__( return wrap(_VllmContainer) -VllmContainer_7B = _make_container("VllmContainer_7B", num_gpus=1, concurrent_inputs=100) -VllmContainerA100_40G = _make_container("VllmContainerA100_40G", num_gpus=1, concurrent_inputs=32) +VllmContainer_7B = _make_container( + "VllmContainer_7B", num_gpus=1, concurrent_inputs=100 +) +VllmContainerA100_40G = _make_container( + "VllmContainerA100_40G", num_gpus=1, concurrent_inputs=32 +) VllmContainerA100_80G = _make_container( "VllmContainerA100_80G", num_gpus=1, memory=80 ) VllmContainerA100_160G = _make_container( - "VllmContainerA100_160G", num_gpus=2, memory=80 -) \ No newline at end of file + "VllmContainerA100_160G", num_gpus=2, memory=80, concurrent_inputs=4 +) diff --git a/pyproject.toml b/pyproject.toml index 9bcab03..29f8f85 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" [tool.ruff] -line-length = 120 +line-length = 80 [tool.ruff.lint] select = [