Skip to content

Commit

Permalink
perf: move VLLM engine initialization from __init__ to an @Enter() hook
Browse files Browse the repository at this point in the history
  • Loading branch information
sambarnes committed Jan 11, 2024
1 parent 40bcca8 commit 0c3a991
Showing 1 changed file with 10 additions and 3 deletions.
13 changes: 10 additions & 3 deletions modal/runner/engines/vllm.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import Optional

from modal import method
from modal import enter, method
from pydantic import BaseModel

from shared.protocol import (
Expand Down Expand Up @@ -44,12 +44,17 @@ def __init__(self, params: VllmParams):
from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.engine.async_llm_engine import AsyncLLMEngine

engine_args = AsyncEngineArgs(
self.engine_args = AsyncEngineArgs(
**params.dict(),
disable_log_requests=True,
)
self.engine: AsyncLLMEngine | None = None

self.engine = AsyncLLMEngine.from_engine_args(engine_args)
@enter()
def start(self):
from vllm.engine.async_llm_engine import AsyncLLMEngine

self.engine = AsyncLLMEngine.from_engine_args(self.engine_args)

# @method()
# async def tokenize_prompt(self, payload: Payload) -> List[int]:
Expand All @@ -62,6 +67,8 @@ def __init__(self, params: VllmParams):

@method()
async def generate(self, payload: CompletionPayload, params):
assert self.engine is not None, "Engine not initialized"

try:
import time

Expand Down

0 comments on commit 0c3a991

Please sign in to comment.