xorbitsai · ChengjieLi28 · Dec 12, 2023 · Dec 12, 2023 · Dec 12, 2023 · Dec 12, 2023
diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml
@@ -39,6 +39,7 @@ jobs:
         with:
           src: "xinference"
           options: "--check"
+          version: "23.12.0"
       - uses: isort/isort-action@master
         with:
           sortPaths: "xinference"

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,7 +1,7 @@
 files: xinference
 repos:
   - repo: https://github.com/psf/black
-    rev: 24.1a1
+    rev: 23.12.0
     hooks:
       - id: black
   - repo: https://github.com/pre-commit/pre-commit-hooks

diff --git a/xinference/api/restful_api.py b/xinference/api/restful_api.py
@@ -285,11 +285,13 @@ def read_main():
                 SPAStaticFiles(directory=ui_location, html=True),
             )
         else:
-            warnings.warn(f"""
+            warnings.warn(
+                f"""
             Xinference ui is not built at expected directory: {ui_location}
             To resolve this warning, navigate to {os.path.join(lib_location, "web/ui/")}
             And build the Xinference ui by running "npm run build"
-            """)
+            """
+            )
 
         config = Config(
             app=self._app, host=self._host, port=self._port, log_config=logging_conf

diff --git a/xinference/core/chat_interface.py b/xinference/core/chat_interface.py
@@ -282,10 +282,13 @@ def retry(text, hist, max_tokens, temperature) -> Generator:
         ) as generate_interface:
             history = gr.State([])
 
-            Markdown(f"""
+            Markdown(
+                f"""
                 <h1 style='text-align: center; margin-bottom: 1rem'>🚀 Xinference Generate Bot : {self.model_name} 🚀</h1>
-                """)
-            Markdown(f"""
+                """
+            )
+            Markdown(
+                f"""
                 <div class="center">
                 Model ID: {self.model_uid}
                 </div>
@@ -298,7 +301,8 @@ def retry(text, hist, max_tokens, temperature) -> Generator:
                 <div class="center">
                 Model Quantization: {self.quantization}
                 </div>
-                """)
+                """
+            )
 
             with Column(variant="panel"):
                 textbox = Textbox(

diff --git a/xinference/core/supervisor.py b/xinference/core/supervisor.py
@@ -63,9 +63,9 @@ def __init__(self):
         super().__init__()
         self._worker_address_to_worker: Dict[str, xo.ActorRefType["WorkerActor"]] = {}
         self._worker_status: Dict[str, WorkerStatus] = {}
-        self._replica_model_uid_to_worker: Dict[str, xo.ActorRefType["WorkerActor"]] = (
-            {}
-        )
+        self._replica_model_uid_to_worker: Dict[
+            str, xo.ActorRefType["WorkerActor"]
+        ] = {}
         self._model_uid_to_replica_info: Dict[str, ReplicaInfo] = {}
         self._uptime = None
         self._lock = asyncio.Lock()

diff --git a/xinference/model/llm/pytorch/core.py b/xinference/model/llm/pytorch/core.py
@@ -442,9 +442,9 @@ def _sanitize_generate_config(
             and self.model_family.prompt_style
             and self.model_family.prompt_style.stop_token_ids
         ):
-            generate_config["stop_token_ids"] = (
-                self.model_family.prompt_style.stop_token_ids.copy()
-            )
+            generate_config[
+                "stop_token_ids"
+            ] = self.model_family.prompt_style.stop_token_ids.copy()
 
         return generate_config
 

diff --git a/xinference/model/llm/vllm/core.py b/xinference/model/llm/vllm/core.py
@@ -79,6 +79,10 @@ class VLLMGenerateConfig(TypedDict, total=False):
     "internlm-chat-20b",
     "qwen-chat",
     "Yi",
+    "Yi-chat",
+    "code-llama",
+    "code-llama-python",
+    "code-llama-instruct",
     "mistral-instruct-v0.1",
     "chatglm3",
 ]