Lock on VLLMModel

xorbitsai · Sep 18, 2023 · 39baf42 · 39baf42
1 parent e6821b2
commit 39baf42
Showing 1 changed file with 4 additions and 5 deletions.
diff --git a/xinference/core/model.py b/xinference/core/model.py
@@ -102,15 +102,12 @@ async def __pre_destroy__(self):
     def __init__(self, model: "LLM"):
         super().__init__()
         from ..model.llm.pytorch.core import PytorchModel
-        from ..model.llm.vllm.core import VLLMModel
 
         self._model = model
 
         self._generators: Dict[str, Union[Iterator, AsyncGenerator]] = {}
         self._lock = (
-            None
-            if isinstance(self._model, (PytorchModel, VLLMModel))
-            else asyncio.locks.Lock()
+            None if isinstance(self._model, PytorchModel) else asyncio.locks.Lock()
         )
 
     def load(self):
@@ -137,7 +134,9 @@ async def _call_wrapper(self, _wrapper: Callable):
                 return await asyncio.to_thread(_wrapper)
 
     async def _call_async_wrapper(self, _wrapper: Callable):
-        return await asyncio.create_task(_wrapper())
+        assert self._lock is not None
+        async with self._lock:
+            return await asyncio.create_task(_wrapper())
 
     async def generate(self, prompt: str, *args, **kwargs):
         if not hasattr(self._model, "generate") and not hasattr(