diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 7cad1a3583..0d716ae660 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,7 +1,7 @@
files: xinference
repos:
- repo: https://github.com/psf/black
- rev: 23.1.0
+ rev: 24.1a1
hooks:
- id: black
- repo: https://github.com/pre-commit/pre-commit-hooks
diff --git a/xinference/api/restful_api.py b/xinference/api/restful_api.py
index d0a8437ef3..d184ff512d 100644
--- a/xinference/api/restful_api.py
+++ b/xinference/api/restful_api.py
@@ -258,9 +258,6 @@ def serve(self, logging_conf: Optional[dict] = None):
f"{pprint.pformat(invalid_routes)}"
)
- for tp in [CreateChatCompletion, CreateCompletion]:
- logger.debug("Dump request model fields:\n%s", tp.__fields__)
-
class SPAStaticFiles(StaticFiles):
async def get_response(self, path: str, scope):
response = await super().get_response(path, scope)
@@ -288,13 +285,11 @@ def read_main():
SPAStaticFiles(directory=ui_location, html=True),
)
else:
- warnings.warn(
- f"""
+ warnings.warn(f"""
Xinference ui is not built at expected directory: {ui_location}
To resolve this warning, navigate to {os.path.join(lib_location, "web/ui/")}
And build the Xinference ui by running "npm run build"
- """
- )
+ """)
config = Config(
app=self._app, host=self._host, port=self._port, log_config=logging_conf
diff --git a/xinference/core/chat_interface.py b/xinference/core/chat_interface.py
index aa5b284a72..2f4d5f40aa 100644
--- a/xinference/core/chat_interface.py
+++ b/xinference/core/chat_interface.py
@@ -282,13 +282,10 @@ def retry(text, hist, max_tokens, temperature) -> Generator:
) as generate_interface:
history = gr.State([])
- Markdown(
- f"""
+ Markdown(f"""
🚀 Xinference Generate Bot : {self.model_name} 🚀
- """
- )
- Markdown(
- f"""
+ """)
+ Markdown(f"""
Model ID: {self.model_uid}
@@ -301,8 +298,7 @@ def retry(text, hist, max_tokens, temperature) -> Generator:
Model Quantization: {self.quantization}
- """
- )
+ """)
with Column(variant="panel"):
textbox = Textbox(
diff --git a/xinference/core/supervisor.py b/xinference/core/supervisor.py
index 5a887b72e6..2cfb46a5cb 100644
--- a/xinference/core/supervisor.py
+++ b/xinference/core/supervisor.py
@@ -63,9 +63,9 @@ def __init__(self):
super().__init__()
self._worker_address_to_worker: Dict[str, xo.ActorRefType["WorkerActor"]] = {}
self._worker_status: Dict[str, WorkerStatus] = {}
- self._replica_model_uid_to_worker: Dict[
- str, xo.ActorRefType["WorkerActor"]
- ] = {}
+ self._replica_model_uid_to_worker: Dict[str, xo.ActorRefType["WorkerActor"]] = (
+ {}
+ )
self._model_uid_to_replica_info: Dict[str, ReplicaInfo] = {}
self._uptime = None
self._lock = asyncio.Lock()
diff --git a/xinference/deploy/utils.py b/xinference/deploy/utils.py
index 953c3030aa..d3d3343b13 100644
--- a/xinference/deploy/utils.py
+++ b/xinference/deploy/utils.py
@@ -60,7 +60,9 @@ def get_config_dict(
"disable_existing_loggers": False,
"formatters": {
"formatter": {
- "format": "%(asctime)s %(name)-12s %(process)d %(levelname)-8s %(message)s"
+ "format": (
+ "%(asctime)s %(name)-12s %(process)d %(levelname)-8s %(message)s"
+ )
},
},
"filters": {
diff --git a/xinference/model/embedding/__init__.py b/xinference/model/embedding/__init__.py
index fdbf7f90ae..0b538806ab 100644
--- a/xinference/model/embedding/__init__.py
+++ b/xinference/model/embedding/__init__.py
@@ -16,7 +16,7 @@
import json
import os
-from .core import EmbeddingModelSpec, get_cache_status
+from .core import MODEL_NAME_TO_REVISION, EmbeddingModelSpec, get_cache_status
from .custom import CustomEmbeddingModelSpec, register_embedding, unregister_embedding
_model_spec_json = os.path.join(os.path.dirname(__file__), "model_spec.json")
@@ -27,12 +27,16 @@
(spec["model_name"], EmbeddingModelSpec(**spec))
for spec in json.load(codecs.open(_model_spec_json, "r", encoding="utf-8"))
)
+for model_name, model_spec in BUILTIN_EMBEDDING_MODELS.items():
+ MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
MODELSCOPE_EMBEDDING_MODELS = dict(
(spec["model_name"], EmbeddingModelSpec(**spec))
for spec in json.load(
codecs.open(_model_spec_modelscope_json, "r", encoding="utf-8")
)
)
+for model_name, model_spec in MODELSCOPE_EMBEDDING_MODELS.items():
+ MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
from ...constants import XINFERENCE_MODEL_DIR
diff --git a/xinference/model/embedding/core.py b/xinference/model/embedding/core.py
index 2cd16deb31..a97f02960a 100644
--- a/xinference/model/embedding/core.py
+++ b/xinference/model/embedding/core.py
@@ -15,7 +15,8 @@
import logging
import os
import shutil
-from typing import List, Optional, Tuple, Union, no_type_check
+from collections import defaultdict
+from typing import Dict, List, Optional, Tuple, Union, no_type_check
import numpy as np
from pydantic import BaseModel
@@ -23,11 +24,14 @@
from ...constants import XINFERENCE_CACHE_DIR
from ...types import Embedding, EmbeddingData, EmbeddingUsage
from ..core import ModelDescription
-from ..utils import valid_model_revision
+from ..utils import is_model_cached, valid_model_revision
logger = logging.getLogger(__name__)
SUPPORTED_SCHEMES = ["s3"]
+# Used for check whether the model is cached.
+# Init when registering all the builtin models.
+MODEL_NAME_TO_REVISION: Dict[str, List[str]] = defaultdict(list)
class EmbeddingModelSpec(BaseModel):
@@ -195,11 +199,7 @@ def cache(model_spec: EmbeddingModelSpec):
def get_cache_status(
model_spec: EmbeddingModelSpec,
) -> bool:
- cache_dir = os.path.realpath(
- os.path.join(XINFERENCE_CACHE_DIR, model_spec.model_name)
- )
- meta_path = os.path.join(cache_dir, "__valid_download")
- return valid_model_revision(meta_path, model_spec.model_revision)
+ return is_model_cached(model_spec, MODEL_NAME_TO_REVISION)
class EmbeddingModel:
diff --git a/xinference/model/llm/ggml/chatglm.py b/xinference/model/llm/ggml/chatglm.py
index 4a2f637dbc..86ef20fe45 100644
--- a/xinference/model/llm/ggml/chatglm.py
+++ b/xinference/model/llm/ggml/chatglm.py
@@ -134,9 +134,9 @@ def _convert_raw_text_chunks_to_chat(
{
"index": 0,
"delta": {
- "content": token
- if isinstance(token, str)
- else token.content,
+ "content": (
+ token if isinstance(token, str) else token.content
+ ),
},
"finish_reason": None,
}
@@ -223,8 +223,10 @@ def _handle_tools(generate_config) -> Optional[ChatCompletionMessage]:
chatglm_tools.append(elem["function"])
return {
"role": "system",
- "content": f"Answer the following questions as best as you can. You have access to the following tools:\n"
- f"{json.dumps(chatglm_tools, indent=4, ensure_ascii=False)}",
+ "content": (
+ f"Answer the following questions as best as you can. You have access to the following tools:\n"
+ f"{json.dumps(chatglm_tools, indent=4, ensure_ascii=False)}"
+ ),
}
def chat(
diff --git a/xinference/model/llm/llm_family.py b/xinference/model/llm/llm_family.py
index 8c019ceeeb..4ec72f4a74 100644
--- a/xinference/model/llm/llm_family.py
+++ b/xinference/model/llm/llm_family.py
@@ -588,31 +588,57 @@ def cache_from_huggingface(
return cache_dir
+def _check_revision(
+ llm_family: LLMFamilyV1,
+ llm_spec: "LLMSpecV1",
+ builtin: list,
+ meta_path: str,
+) -> bool:
+ for family in builtin:
+ if llm_family.model_name == family.model_name:
+ specs = family.model_specs
+ for spec in specs:
+ if (
+ spec.model_format == "pytorch"
+ and spec.model_size_in_billions == llm_spec.model_size_in_billions
+ ):
+ return valid_model_revision(meta_path, spec.model_revision)
+ return False
+
+
def get_cache_status(
llm_family: LLMFamilyV1,
llm_spec: "LLMSpecV1",
) -> Union[bool, List[bool]]:
+ """
+ When calling this function from above, `llm_family` is constructed only from BUILTIN_LLM_FAMILIES,
+ so we should check both huggingface and modelscope cache files.
+ """
cache_dir = _get_cache_dir(llm_family, llm_spec, create_if_not_exist=False)
+ # check revision for pytorch model
if llm_spec.model_format == "pytorch":
- return _skip_download(
- cache_dir,
- llm_spec.model_format,
- llm_spec.model_hub,
- llm_spec.model_revision,
- "none",
- )
+ hf_meta_path = _get_meta_path(cache_dir, "pytorch", "huggingface", "none")
+ ms_meta_path = _get_meta_path(cache_dir, "pytorch", "modelscope", "none")
+ revisions = [
+ _check_revision(llm_family, llm_spec, BUILTIN_LLM_FAMILIES, hf_meta_path),
+ _check_revision(
+ llm_family, llm_spec, BUILTIN_MODELSCOPE_LLM_FAMILIES, ms_meta_path
+ ),
+ ]
+ return any(revisions)
+ # just check meta file for ggml and gptq model
elif llm_spec.model_format in ["ggmlv3", "ggufv2", "gptq"]:
ret = []
for q in llm_spec.quantizations:
- ret.append(
- _skip_download(
- cache_dir,
- llm_spec.model_format,
- llm_spec.model_hub,
- llm_spec.model_revision,
- q,
- )
+ assert q is not None
+ hf_meta_path = _get_meta_path(
+ cache_dir, llm_spec.model_format, "huggingface", q
+ )
+ ms_meta_path = _get_meta_path(
+ cache_dir, llm_spec.model_format, "modelscope", q
)
+ results = [os.path.exists(hf_meta_path), os.path.exists(ms_meta_path)]
+ ret.append(any(results))
return ret
else:
raise ValueError(f"Unsupported model format: {llm_spec.model_format}")
diff --git a/xinference/model/llm/pytorch/core.py b/xinference/model/llm/pytorch/core.py
index 95e2bac596..17957fcda2 100644
--- a/xinference/model/llm/pytorch/core.py
+++ b/xinference/model/llm/pytorch/core.py
@@ -442,9 +442,9 @@ def _sanitize_generate_config(
and self.model_family.prompt_style
and self.model_family.prompt_style.stop_token_ids
):
- generate_config[
- "stop_token_ids"
- ] = self.model_family.prompt_style.stop_token_ids.copy()
+ generate_config["stop_token_ids"] = (
+ self.model_family.prompt_style.stop_token_ids.copy()
+ )
return generate_config
diff --git a/xinference/model/llm/tests/test_llm_family.py b/xinference/model/llm/tests/test_llm_family.py
index 7b5036e367..578b5996f1 100644
--- a/xinference/model/llm/tests/test_llm_family.py
+++ b/xinference/model/llm/tests/test_llm_family.py
@@ -289,6 +289,7 @@ def test_meta_file():
cache_dir = cache_from_huggingface(family, spec, quantization=None)
meta_path = _get_meta_path(cache_dir, spec.model_format, spec.model_hub, None)
assert valid_model_revision(meta_path, "3d2b5f275bdf882b8775f902e1bfdb790e2cfc32")
+ shutil.rmtree(cache_dir)
def test_parse_uri():
@@ -878,6 +879,7 @@ def test_get_cache_status_pytorch():
model_size_in_billions=1,
quantizations=["4-bit", "8-bit", "none"],
model_id="facebook/opt-125m",
+ model_revision="3d2b5f275bdf882b8775f902e1bfdb790e2cfc32",
)
family = LLMFamilyV1(
version=1,
diff --git a/xinference/model/rerank/__init__.py b/xinference/model/rerank/__init__.py
index d2ecd4c09c..581e980fe0 100644
--- a/xinference/model/rerank/__init__.py
+++ b/xinference/model/rerank/__init__.py
@@ -16,7 +16,7 @@
import json
import os
-from .core import RerankModelSpec, get_cache_status
+from .core import MODEL_NAME_TO_REVISION, RerankModelSpec, get_cache_status
_model_spec_json = os.path.join(os.path.dirname(__file__), "model_spec.json")
_model_spec_modelscope_json = os.path.join(
@@ -26,11 +26,15 @@
(spec["model_name"], RerankModelSpec(**spec))
for spec in json.load(codecs.open(_model_spec_json, "r", encoding="utf-8"))
)
+for model_name, model_spec in BUILTIN_RERANK_MODELS.items():
+ MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
MODELSCOPE_RERANK_MODELS = dict(
(spec["model_name"], RerankModelSpec(**spec))
for spec in json.load(
codecs.open(_model_spec_modelscope_json, "r", encoding="utf-8")
)
)
+for model_name, model_spec in MODELSCOPE_RERANK_MODELS.items():
+ MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
del _model_spec_json
del _model_spec_modelscope_json
diff --git a/xinference/model/rerank/core.py b/xinference/model/rerank/core.py
index b51cabc34a..7e324d3ef3 100644
--- a/xinference/model/rerank/core.py
+++ b/xinference/model/rerank/core.py
@@ -15,6 +15,7 @@
import logging
import os
import uuid
+from collections import defaultdict
from typing import Dict, List, Optional, Tuple
import numpy as np
@@ -23,10 +24,14 @@
from ...constants import XINFERENCE_CACHE_DIR
from ...types import Document, DocumentObj, Rerank
from ..core import ModelDescription
-from ..utils import valid_model_revision
+from ..utils import is_model_cached, valid_model_revision
logger = logging.getLogger(__name__)
+# Used for check whether the model is cached.
+# Init when registering all the builtin models.
+MODEL_NAME_TO_REVISION: Dict[str, List[str]] = defaultdict(list)
+
class RerankModelSpec(BaseModel):
model_name: str
@@ -126,11 +131,7 @@ def rerank(
def get_cache_status(
model_spec: RerankModelSpec,
) -> bool:
- cache_dir = os.path.realpath(
- os.path.join(XINFERENCE_CACHE_DIR, model_spec.model_name)
- )
- meta_path = os.path.join(cache_dir, "__valid_download")
- return valid_model_revision(meta_path, model_spec.model_revision)
+ return is_model_cached(model_spec, MODEL_NAME_TO_REVISION)
def cache(model_spec: RerankModelSpec):
diff --git a/xinference/model/utils.py b/xinference/model/utils.py
index f1da3e75f0..e1ee21d117 100644
--- a/xinference/model/utils.py
+++ b/xinference/model/utils.py
@@ -16,11 +16,11 @@
import os
from json import JSONDecodeError
from pathlib import Path
-from typing import Callable, Dict, Optional, Tuple
+from typing import Any, Callable, Dict, Optional, Tuple
from fsspec import AbstractFileSystem
-from ..constants import XINFERENCE_ENV_MODEL_SRC
+from ..constants import XINFERENCE_CACHE_DIR, XINFERENCE_ENV_MODEL_SRC
logger = logging.getLogger(__name__)
MAX_ATTEMPTS = 3
@@ -132,6 +132,17 @@ def valid_model_revision(
return real_revision == expected_model_revision
+def is_model_cached(model_spec: Any, name_to_revisions_mapping: Dict):
+ cache_dir = os.path.realpath(
+ os.path.join(XINFERENCE_CACHE_DIR, model_spec.model_name)
+ )
+ meta_path = os.path.join(cache_dir, "__valid_download")
+ revisions = name_to_revisions_mapping[model_spec.model_name]
+ if model_spec.model_revision not in revisions: # Usually for UT
+ revisions.append(model_spec.model_revision)
+ return any([valid_model_revision(meta_path, revision) for revision in revisions])
+
+
def is_valid_model_name(model_name: str) -> bool:
import re
diff --git a/xinference/web/ui/src/scenes/launch_model/embeddingCard.js b/xinference/web/ui/src/scenes/launch_model/embeddingCard.js
index a2e272f118..03db9ac388 100644
--- a/xinference/web/ui/src/scenes/launch_model/embeddingCard.js
+++ b/xinference/web/ui/src/scenes/launch_model/embeddingCard.js
@@ -266,6 +266,18 @@ const EmbeddingCard = ({
return
}
})()}
+ {(() => {
+ if (modelData.is_cached) {
+ return (
+
+ )
+ }
+ })()}
{(() => {
if (is_custom && customDeleted) {
return (
diff --git a/xinference/web/ui/src/scenes/launch_model/index.js b/xinference/web/ui/src/scenes/launch_model/index.js
index 3d50328704..33aceee695 100644
--- a/xinference/web/ui/src/scenes/launch_model/index.js
+++ b/xinference/web/ui/src/scenes/launch_model/index.js
@@ -1,7 +1,8 @@
import { TabContext, TabList, TabPanel } from '@mui/lab'
import { Box, Tab } from '@mui/material'
-import React from 'react'
+import React, { useContext, useEffect, useState } from 'react'
+import { ApiContext } from '../../components/apiContext'
import ErrorMessageSnackBar from '../../components/errorMessageSnackBar'
import Title from '../../components/Title'
import LaunchCustom from './launchCustom'
@@ -10,12 +11,42 @@ import LaunchLLM from './launchLLM'
import LaunchRerank from './launchRerank'
const LaunchModel = () => {
+ let endPoint = useContext(ApiContext).endPoint
const [value, setValue] = React.useState('1')
+ const [gpuAvailable, setGPUAvailable] = useState(-1)
+
+ const { setErrorMsg } = useContext(ApiContext)
const handleTabChange = (event, newValue) => {
setValue(newValue)
}
+ useEffect(() => {
+ if (gpuAvailable === -1) {
+ fetch(endPoint + '/v1/cluster/devices', {
+ method: 'GET',
+ headers: {
+ 'Content-Type': 'application/json',
+ },
+ }).then((res) => {
+ if (!res.ok) {
+ // Usually, if some errors happen here, check if the cluster is available
+ res.json().then((errorData) => {
+ setErrorMsg(
+ `Server error: ${res.status} - ${
+ errorData.detail || 'Unknown error'
+ }`
+ )
+ })
+ } else {
+ res.json().then((data) => {
+ setGPUAvailable(parseInt(data, 10))
+ })
+ }
+ })
+ }
+ }, [])
+
return (
@@ -30,7 +61,7 @@ const LaunchModel = () => {
-
+
@@ -39,7 +70,7 @@ const LaunchModel = () => {
-
+
diff --git a/xinference/web/ui/src/scenes/launch_model/launchCustom.js b/xinference/web/ui/src/scenes/launch_model/launchCustom.js
index d8f67686b3..deb50776d7 100644
--- a/xinference/web/ui/src/scenes/launch_model/launchCustom.js
+++ b/xinference/web/ui/src/scenes/launch_model/launchCustom.js
@@ -5,7 +5,7 @@ import { ApiContext } from '../../components/apiContext'
import EmbeddingCard from './embeddingCard'
import ModelCard from './modelCard'
-const LaunchCustom = () => {
+const LaunchCustom = ({ gpuAvailable }) => {
let endPoint = useContext(ApiContext).endPoint
const [registrationData, setRegistrationData] = useState([])
const { isCallingApi, setIsCallingApi } = useContext(ApiContext)
@@ -138,7 +138,7 @@ const LaunchCustom = () => {
)
@@ -147,6 +147,7 @@ const LaunchCustom = () => {
)
diff --git a/xinference/web/ui/src/scenes/launch_model/launchEmbedding.js b/xinference/web/ui/src/scenes/launch_model/launchEmbedding.js
index 771b5d1c65..9d54c95ebe 100644
--- a/xinference/web/ui/src/scenes/launch_model/launchEmbedding.js
+++ b/xinference/web/ui/src/scenes/launch_model/launchEmbedding.js
@@ -22,10 +22,7 @@ const LaunchEmbedding = () => {
const modelName = registration.model_name
? registration.model_name.toLowerCase()
: ''
- if (!modelName.includes(searchTerm.toLowerCase())) {
- return false
- }
- return true
+ return modelName.includes(searchTerm.toLowerCase())
}
const update = async () => {
@@ -35,7 +32,7 @@ const LaunchEmbedding = () => {
setIsCallingApi(true)
const response = await fetch(
- `${endPoint}/v1/model_registrations/embedding`,
+ `${endPoint}/v1/model_registrations/embedding?detailed=true`,
{
method: 'GET',
}
@@ -54,6 +51,7 @@ const LaunchEmbedding = () => {
return {
...(await desc.json()),
is_builtin: registration.is_builtin,
+ is_cached: registration.cache_status,
}
})
)
diff --git a/xinference/web/ui/src/scenes/launch_model/launchLLM.js b/xinference/web/ui/src/scenes/launch_model/launchLLM.js
index 78c8fcc70c..467f7d1624 100644
--- a/xinference/web/ui/src/scenes/launch_model/launchLLM.js
+++ b/xinference/web/ui/src/scenes/launch_model/launchLLM.js
@@ -11,18 +11,16 @@ import React, { useContext, useEffect, useState } from 'react'
import { ApiContext } from '../../components/apiContext'
import ModelCard from './modelCard'
-const LaunchLLM = () => {
+const LaunchLLM = ({ gpuAvailable }) => {
let endPoint = useContext(ApiContext).endPoint
const [registrationData, setRegistrationData] = useState([])
const { isCallingApi, setIsCallingApi } = useContext(ApiContext)
const { isUpdatingModel } = useContext(ApiContext)
- const { setErrorMsg } = useContext(ApiContext)
// States used for filtering
const [searchTerm, setSearchTerm] = useState('')
const [modelAbility, setModelAbility] = useState('all')
- const [gpuAvailable, setGPUAvailable] = useState(-1)
const handleChange = (event) => {
setSearchTerm(event.target.value)
@@ -83,32 +81,6 @@ const LaunchLLM = () => {
update()
}, [])
- useEffect(() => {
- if (gpuAvailable === -1) {
- fetch(endPoint + '/v1/cluster/devices', {
- method: 'GET',
- headers: {
- 'Content-Type': 'application/json',
- },
- }).then((res) => {
- if (!res.ok) {
- // Usually, if some errors happen here, check if the cluster is available
- res.json().then((errorData) => {
- setErrorMsg(
- `Server error: ${res.status} - ${
- errorData.detail || 'Unknown error'
- }`
- )
- })
- } else {
- res.json().then((data) => {
- setGPUAvailable(parseInt(data, 10))
- })
- }
- })
- }
- }, [])
-
const style = {
display: 'grid',
gridTemplateColumns: 'repeat(auto-fill, minmax(300px, 1fr))',
diff --git a/xinference/web/ui/src/scenes/launch_model/launchRerank.js b/xinference/web/ui/src/scenes/launch_model/launchRerank.js
index 51c9285b8a..07bed5a8cd 100644
--- a/xinference/web/ui/src/scenes/launch_model/launchRerank.js
+++ b/xinference/web/ui/src/scenes/launch_model/launchRerank.js
@@ -32,7 +32,7 @@ const LaunchRerank = () => {
setIsCallingApi(true)
const response = await fetch(
- `${endPoint}/v1/model_registrations/rerank`,
+ `${endPoint}/v1/model_registrations/rerank?detailed=true`,
{
method: 'GET',
}
@@ -51,6 +51,7 @@ const LaunchRerank = () => {
return {
...(await desc.json()),
is_builtin: registration.is_builtin,
+ is_cached: registration.cache_status,
}
})
)
diff --git a/xinference/web/ui/src/scenes/launch_model/rerankCard.js b/xinference/web/ui/src/scenes/launch_model/rerankCard.js
index 0bfd400db6..92bdecb1bc 100644
--- a/xinference/web/ui/src/scenes/launch_model/rerankCard.js
+++ b/xinference/web/ui/src/scenes/launch_model/rerankCard.js
@@ -214,6 +214,18 @@ const RerankCard = ({ url, modelData }) => {
return
}
})()}
+ {(() => {
+ if (modelData.is_cached) {
+ return (
+
+ )
+ }
+ })()}
{hover ? (
diff --git a/xinference/web/ui/src/scenes/register_model/index.js b/xinference/web/ui/src/scenes/register_model/index.js
index 2deeb7fc04..ab9a6b1161 100644
--- a/xinference/web/ui/src/scenes/register_model/index.js
+++ b/xinference/web/ui/src/scenes/register_model/index.js
@@ -161,6 +161,8 @@ const RegisterModel = () => {
roles: ps.roles,
intra_message_sep: ps.intra_message_sep,
inter_message_sep: ps.inter_message_sep,
+ stop: ps.stop ?? null,
+ stop_token_ids: ps.stop_token_ids ?? null,
}
}
}
diff --git a/xinference/web/ui/src/scenes/running_models/index.js b/xinference/web/ui/src/scenes/running_models/index.js
index 9c2afb4fb5..e87bd57aea 100644
--- a/xinference/web/ui/src/scenes/running_models/index.js
+++ b/xinference/web/ui/src/scenes/running_models/index.js
@@ -405,7 +405,14 @@ const RunningModels = () => {
}
return (
-
+