Skip to content

Commit

Permalink
Merge pull request #47 from sambarnes/sentry-generation
Browse files Browse the repository at this point in the history
fix: report generation errors to sentry w/ model name
  • Loading branch information
montasaurus authored Jan 12, 2024
2 parents 8f98b6b + e822186 commit 8447de1
Showing 1 changed file with 6 additions and 3 deletions.
9 changes: 6 additions & 3 deletions modal/runner/engines/vllm.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
from typing import Optional

from modal import method
Expand Down Expand Up @@ -44,12 +45,12 @@ def __init__(self, params: VllmParams):
from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.engine.async_llm_engine import AsyncLLMEngine

engine_args = AsyncEngineArgs(
self.engine_args = AsyncEngineArgs(
**params.dict(),
disable_log_requests=True,
)

self.engine = AsyncLLMEngine.from_engine_args(engine_args)
self.engine = AsyncLLMEngine.from_engine_args(self.engine_args)

# @method()
# async def tokenize_prompt(self, payload: Payload) -> List[int]:
Expand Down Expand Up @@ -116,7 +117,9 @@ async def generate(self, payload: CompletionPayload, params):
print(f"Request completed: {throughput:.4f} tokens/s")
except Exception as err:
e = create_error_text(err)
print(e)
logging.exception(
"Failed generation", extra={"model": self.engine_args.model}
)
if payload.stream:
yield create_sse_data(e)
else:
Expand Down

0 comments on commit 8447de1

Please sign in to comment.