From 39baf42fb759f9d93e59d6b34fdd3222929ab5f4 Mon Sep 17 00:00:00 2001
From: UranusSeven <109661872+UranusSeven@users.noreply.github.com>
Date: Mon, 18 Sep 2023 11:36:56 +0800
Subject: [PATCH] Lock on VLLMModel

---
 xinference/core/model.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/xinference/core/model.py b/xinference/core/model.py
index 165e916820..7339029fa5 100644
--- a/xinference/core/model.py
+++ b/xinference/core/model.py
@@ -102,15 +102,12 @@ async def __pre_destroy__(self):
     def __init__(self, model: "LLM"):
         super().__init__()
         from ..model.llm.pytorch.core import PytorchModel
-        from ..model.llm.vllm.core import VLLMModel
 
         self._model = model
 
         self._generators: Dict[str, Union[Iterator, AsyncGenerator]] = {}
         self._lock = (
-            None
-            if isinstance(self._model, (PytorchModel, VLLMModel))
-            else asyncio.locks.Lock()
+            None if isinstance(self._model, PytorchModel) else asyncio.locks.Lock()
         )
 
     def load(self):
@@ -137,7 +134,9 @@ async def _call_wrapper(self, _wrapper: Callable):
                 return await asyncio.to_thread(_wrapper)
 
     async def _call_async_wrapper(self, _wrapper: Callable):
-        return await asyncio.create_task(_wrapper())
+        assert self._lock is not None
+        async with self._lock:
+            return await asyncio.create_task(_wrapper())
 
     async def generate(self, prompt: str, *args, **kwargs):
         if not hasattr(self._model, "generate") and not hasattr(