diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7cad1a3583..0d716ae660 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,7 +1,7 @@ files: xinference repos: - repo: https://github.com/psf/black - rev: 23.1.0 + rev: 24.1a1 hooks: - id: black - repo: https://github.com/pre-commit/pre-commit-hooks diff --git a/xinference/api/restful_api.py b/xinference/api/restful_api.py index d0a8437ef3..d184ff512d 100644 --- a/xinference/api/restful_api.py +++ b/xinference/api/restful_api.py @@ -258,9 +258,6 @@ def serve(self, logging_conf: Optional[dict] = None): f"{pprint.pformat(invalid_routes)}" ) - for tp in [CreateChatCompletion, CreateCompletion]: - logger.debug("Dump request model fields:\n%s", tp.__fields__) - class SPAStaticFiles(StaticFiles): async def get_response(self, path: str, scope): response = await super().get_response(path, scope) @@ -288,13 +285,11 @@ def read_main(): SPAStaticFiles(directory=ui_location, html=True), ) else: - warnings.warn( - f""" + warnings.warn(f""" Xinference ui is not built at expected directory: {ui_location} To resolve this warning, navigate to {os.path.join(lib_location, "web/ui/")} And build the Xinference ui by running "npm run build" - """ - ) + """) config = Config( app=self._app, host=self._host, port=self._port, log_config=logging_conf diff --git a/xinference/core/chat_interface.py b/xinference/core/chat_interface.py index aa5b284a72..2f4d5f40aa 100644 --- a/xinference/core/chat_interface.py +++ b/xinference/core/chat_interface.py @@ -282,13 +282,10 @@ def retry(text, hist, max_tokens, temperature) -> Generator: ) as generate_interface: history = gr.State([]) - Markdown( - f""" + Markdown(f"""

🚀 Xinference Generate Bot : {self.model_name} 🚀

- """ - ) - Markdown( - f""" + """) + Markdown(f"""
Model ID: {self.model_uid}
@@ -301,8 +298,7 @@ def retry(text, hist, max_tokens, temperature) -> Generator:
Model Quantization: {self.quantization}
- """ - ) + """) with Column(variant="panel"): textbox = Textbox( diff --git a/xinference/core/supervisor.py b/xinference/core/supervisor.py index 5a887b72e6..2cfb46a5cb 100644 --- a/xinference/core/supervisor.py +++ b/xinference/core/supervisor.py @@ -63,9 +63,9 @@ def __init__(self): super().__init__() self._worker_address_to_worker: Dict[str, xo.ActorRefType["WorkerActor"]] = {} self._worker_status: Dict[str, WorkerStatus] = {} - self._replica_model_uid_to_worker: Dict[ - str, xo.ActorRefType["WorkerActor"] - ] = {} + self._replica_model_uid_to_worker: Dict[str, xo.ActorRefType["WorkerActor"]] = ( + {} + ) self._model_uid_to_replica_info: Dict[str, ReplicaInfo] = {} self._uptime = None self._lock = asyncio.Lock() diff --git a/xinference/deploy/utils.py b/xinference/deploy/utils.py index 953c3030aa..d3d3343b13 100644 --- a/xinference/deploy/utils.py +++ b/xinference/deploy/utils.py @@ -60,7 +60,9 @@ def get_config_dict( "disable_existing_loggers": False, "formatters": { "formatter": { - "format": "%(asctime)s %(name)-12s %(process)d %(levelname)-8s %(message)s" + "format": ( + "%(asctime)s %(name)-12s %(process)d %(levelname)-8s %(message)s" + ) }, }, "filters": { diff --git a/xinference/model/embedding/__init__.py b/xinference/model/embedding/__init__.py index fdbf7f90ae..0b538806ab 100644 --- a/xinference/model/embedding/__init__.py +++ b/xinference/model/embedding/__init__.py @@ -16,7 +16,7 @@ import json import os -from .core import EmbeddingModelSpec, get_cache_status +from .core import MODEL_NAME_TO_REVISION, EmbeddingModelSpec, get_cache_status from .custom import CustomEmbeddingModelSpec, register_embedding, unregister_embedding _model_spec_json = os.path.join(os.path.dirname(__file__), "model_spec.json") @@ -27,12 +27,16 @@ (spec["model_name"], EmbeddingModelSpec(**spec)) for spec in json.load(codecs.open(_model_spec_json, "r", encoding="utf-8")) ) +for model_name, model_spec in BUILTIN_EMBEDDING_MODELS.items(): + MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision) MODELSCOPE_EMBEDDING_MODELS = dict( (spec["model_name"], EmbeddingModelSpec(**spec)) for spec in json.load( codecs.open(_model_spec_modelscope_json, "r", encoding="utf-8") ) ) +for model_name, model_spec in MODELSCOPE_EMBEDDING_MODELS.items(): + MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision) from ...constants import XINFERENCE_MODEL_DIR diff --git a/xinference/model/embedding/core.py b/xinference/model/embedding/core.py index 2cd16deb31..a97f02960a 100644 --- a/xinference/model/embedding/core.py +++ b/xinference/model/embedding/core.py @@ -15,7 +15,8 @@ import logging import os import shutil -from typing import List, Optional, Tuple, Union, no_type_check +from collections import defaultdict +from typing import Dict, List, Optional, Tuple, Union, no_type_check import numpy as np from pydantic import BaseModel @@ -23,11 +24,14 @@ from ...constants import XINFERENCE_CACHE_DIR from ...types import Embedding, EmbeddingData, EmbeddingUsage from ..core import ModelDescription -from ..utils import valid_model_revision +from ..utils import is_model_cached, valid_model_revision logger = logging.getLogger(__name__) SUPPORTED_SCHEMES = ["s3"] +# Used for check whether the model is cached. +# Init when registering all the builtin models. +MODEL_NAME_TO_REVISION: Dict[str, List[str]] = defaultdict(list) class EmbeddingModelSpec(BaseModel): @@ -195,11 +199,7 @@ def cache(model_spec: EmbeddingModelSpec): def get_cache_status( model_spec: EmbeddingModelSpec, ) -> bool: - cache_dir = os.path.realpath( - os.path.join(XINFERENCE_CACHE_DIR, model_spec.model_name) - ) - meta_path = os.path.join(cache_dir, "__valid_download") - return valid_model_revision(meta_path, model_spec.model_revision) + return is_model_cached(model_spec, MODEL_NAME_TO_REVISION) class EmbeddingModel: diff --git a/xinference/model/llm/ggml/chatglm.py b/xinference/model/llm/ggml/chatglm.py index 4a2f637dbc..86ef20fe45 100644 --- a/xinference/model/llm/ggml/chatglm.py +++ b/xinference/model/llm/ggml/chatglm.py @@ -134,9 +134,9 @@ def _convert_raw_text_chunks_to_chat( { "index": 0, "delta": { - "content": token - if isinstance(token, str) - else token.content, + "content": ( + token if isinstance(token, str) else token.content + ), }, "finish_reason": None, } @@ -223,8 +223,10 @@ def _handle_tools(generate_config) -> Optional[ChatCompletionMessage]: chatglm_tools.append(elem["function"]) return { "role": "system", - "content": f"Answer the following questions as best as you can. You have access to the following tools:\n" - f"{json.dumps(chatglm_tools, indent=4, ensure_ascii=False)}", + "content": ( + f"Answer the following questions as best as you can. You have access to the following tools:\n" + f"{json.dumps(chatglm_tools, indent=4, ensure_ascii=False)}" + ), } def chat( diff --git a/xinference/model/llm/llm_family.py b/xinference/model/llm/llm_family.py index 8c019ceeeb..4ec72f4a74 100644 --- a/xinference/model/llm/llm_family.py +++ b/xinference/model/llm/llm_family.py @@ -588,31 +588,57 @@ def cache_from_huggingface( return cache_dir +def _check_revision( + llm_family: LLMFamilyV1, + llm_spec: "LLMSpecV1", + builtin: list, + meta_path: str, +) -> bool: + for family in builtin: + if llm_family.model_name == family.model_name: + specs = family.model_specs + for spec in specs: + if ( + spec.model_format == "pytorch" + and spec.model_size_in_billions == llm_spec.model_size_in_billions + ): + return valid_model_revision(meta_path, spec.model_revision) + return False + + def get_cache_status( llm_family: LLMFamilyV1, llm_spec: "LLMSpecV1", ) -> Union[bool, List[bool]]: + """ + When calling this function from above, `llm_family` is constructed only from BUILTIN_LLM_FAMILIES, + so we should check both huggingface and modelscope cache files. + """ cache_dir = _get_cache_dir(llm_family, llm_spec, create_if_not_exist=False) + # check revision for pytorch model if llm_spec.model_format == "pytorch": - return _skip_download( - cache_dir, - llm_spec.model_format, - llm_spec.model_hub, - llm_spec.model_revision, - "none", - ) + hf_meta_path = _get_meta_path(cache_dir, "pytorch", "huggingface", "none") + ms_meta_path = _get_meta_path(cache_dir, "pytorch", "modelscope", "none") + revisions = [ + _check_revision(llm_family, llm_spec, BUILTIN_LLM_FAMILIES, hf_meta_path), + _check_revision( + llm_family, llm_spec, BUILTIN_MODELSCOPE_LLM_FAMILIES, ms_meta_path + ), + ] + return any(revisions) + # just check meta file for ggml and gptq model elif llm_spec.model_format in ["ggmlv3", "ggufv2", "gptq"]: ret = [] for q in llm_spec.quantizations: - ret.append( - _skip_download( - cache_dir, - llm_spec.model_format, - llm_spec.model_hub, - llm_spec.model_revision, - q, - ) + assert q is not None + hf_meta_path = _get_meta_path( + cache_dir, llm_spec.model_format, "huggingface", q + ) + ms_meta_path = _get_meta_path( + cache_dir, llm_spec.model_format, "modelscope", q ) + results = [os.path.exists(hf_meta_path), os.path.exists(ms_meta_path)] + ret.append(any(results)) return ret else: raise ValueError(f"Unsupported model format: {llm_spec.model_format}") diff --git a/xinference/model/llm/pytorch/core.py b/xinference/model/llm/pytorch/core.py index 95e2bac596..17957fcda2 100644 --- a/xinference/model/llm/pytorch/core.py +++ b/xinference/model/llm/pytorch/core.py @@ -442,9 +442,9 @@ def _sanitize_generate_config( and self.model_family.prompt_style and self.model_family.prompt_style.stop_token_ids ): - generate_config[ - "stop_token_ids" - ] = self.model_family.prompt_style.stop_token_ids.copy() + generate_config["stop_token_ids"] = ( + self.model_family.prompt_style.stop_token_ids.copy() + ) return generate_config diff --git a/xinference/model/llm/tests/test_llm_family.py b/xinference/model/llm/tests/test_llm_family.py index 7b5036e367..578b5996f1 100644 --- a/xinference/model/llm/tests/test_llm_family.py +++ b/xinference/model/llm/tests/test_llm_family.py @@ -289,6 +289,7 @@ def test_meta_file(): cache_dir = cache_from_huggingface(family, spec, quantization=None) meta_path = _get_meta_path(cache_dir, spec.model_format, spec.model_hub, None) assert valid_model_revision(meta_path, "3d2b5f275bdf882b8775f902e1bfdb790e2cfc32") + shutil.rmtree(cache_dir) def test_parse_uri(): @@ -878,6 +879,7 @@ def test_get_cache_status_pytorch(): model_size_in_billions=1, quantizations=["4-bit", "8-bit", "none"], model_id="facebook/opt-125m", + model_revision="3d2b5f275bdf882b8775f902e1bfdb790e2cfc32", ) family = LLMFamilyV1( version=1, diff --git a/xinference/model/rerank/__init__.py b/xinference/model/rerank/__init__.py index d2ecd4c09c..581e980fe0 100644 --- a/xinference/model/rerank/__init__.py +++ b/xinference/model/rerank/__init__.py @@ -16,7 +16,7 @@ import json import os -from .core import RerankModelSpec, get_cache_status +from .core import MODEL_NAME_TO_REVISION, RerankModelSpec, get_cache_status _model_spec_json = os.path.join(os.path.dirname(__file__), "model_spec.json") _model_spec_modelscope_json = os.path.join( @@ -26,11 +26,15 @@ (spec["model_name"], RerankModelSpec(**spec)) for spec in json.load(codecs.open(_model_spec_json, "r", encoding="utf-8")) ) +for model_name, model_spec in BUILTIN_RERANK_MODELS.items(): + MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision) MODELSCOPE_RERANK_MODELS = dict( (spec["model_name"], RerankModelSpec(**spec)) for spec in json.load( codecs.open(_model_spec_modelscope_json, "r", encoding="utf-8") ) ) +for model_name, model_spec in MODELSCOPE_RERANK_MODELS.items(): + MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision) del _model_spec_json del _model_spec_modelscope_json diff --git a/xinference/model/rerank/core.py b/xinference/model/rerank/core.py index b51cabc34a..7e324d3ef3 100644 --- a/xinference/model/rerank/core.py +++ b/xinference/model/rerank/core.py @@ -15,6 +15,7 @@ import logging import os import uuid +from collections import defaultdict from typing import Dict, List, Optional, Tuple import numpy as np @@ -23,10 +24,14 @@ from ...constants import XINFERENCE_CACHE_DIR from ...types import Document, DocumentObj, Rerank from ..core import ModelDescription -from ..utils import valid_model_revision +from ..utils import is_model_cached, valid_model_revision logger = logging.getLogger(__name__) +# Used for check whether the model is cached. +# Init when registering all the builtin models. +MODEL_NAME_TO_REVISION: Dict[str, List[str]] = defaultdict(list) + class RerankModelSpec(BaseModel): model_name: str @@ -126,11 +131,7 @@ def rerank( def get_cache_status( model_spec: RerankModelSpec, ) -> bool: - cache_dir = os.path.realpath( - os.path.join(XINFERENCE_CACHE_DIR, model_spec.model_name) - ) - meta_path = os.path.join(cache_dir, "__valid_download") - return valid_model_revision(meta_path, model_spec.model_revision) + return is_model_cached(model_spec, MODEL_NAME_TO_REVISION) def cache(model_spec: RerankModelSpec): diff --git a/xinference/model/utils.py b/xinference/model/utils.py index f1da3e75f0..e1ee21d117 100644 --- a/xinference/model/utils.py +++ b/xinference/model/utils.py @@ -16,11 +16,11 @@ import os from json import JSONDecodeError from pathlib import Path -from typing import Callable, Dict, Optional, Tuple +from typing import Any, Callable, Dict, Optional, Tuple from fsspec import AbstractFileSystem -from ..constants import XINFERENCE_ENV_MODEL_SRC +from ..constants import XINFERENCE_CACHE_DIR, XINFERENCE_ENV_MODEL_SRC logger = logging.getLogger(__name__) MAX_ATTEMPTS = 3 @@ -132,6 +132,17 @@ def valid_model_revision( return real_revision == expected_model_revision +def is_model_cached(model_spec: Any, name_to_revisions_mapping: Dict): + cache_dir = os.path.realpath( + os.path.join(XINFERENCE_CACHE_DIR, model_spec.model_name) + ) + meta_path = os.path.join(cache_dir, "__valid_download") + revisions = name_to_revisions_mapping[model_spec.model_name] + if model_spec.model_revision not in revisions: # Usually for UT + revisions.append(model_spec.model_revision) + return any([valid_model_revision(meta_path, revision) for revision in revisions]) + + def is_valid_model_name(model_name: str) -> bool: import re diff --git a/xinference/web/ui/src/scenes/launch_model/embeddingCard.js b/xinference/web/ui/src/scenes/launch_model/embeddingCard.js index a2e272f118..03db9ac388 100644 --- a/xinference/web/ui/src/scenes/launch_model/embeddingCard.js +++ b/xinference/web/ui/src/scenes/launch_model/embeddingCard.js @@ -266,6 +266,18 @@ const EmbeddingCard = ({ return } })()} + {(() => { + if (modelData.is_cached) { + return ( + + ) + } + })()} {(() => { if (is_custom && customDeleted) { return ( diff --git a/xinference/web/ui/src/scenes/launch_model/index.js b/xinference/web/ui/src/scenes/launch_model/index.js index 3d50328704..33aceee695 100644 --- a/xinference/web/ui/src/scenes/launch_model/index.js +++ b/xinference/web/ui/src/scenes/launch_model/index.js @@ -1,7 +1,8 @@ import { TabContext, TabList, TabPanel } from '@mui/lab' import { Box, Tab } from '@mui/material' -import React from 'react' +import React, { useContext, useEffect, useState } from 'react' +import { ApiContext } from '../../components/apiContext' import ErrorMessageSnackBar from '../../components/errorMessageSnackBar' import Title from '../../components/Title' import LaunchCustom from './launchCustom' @@ -10,12 +11,42 @@ import LaunchLLM from './launchLLM' import LaunchRerank from './launchRerank' const LaunchModel = () => { + let endPoint = useContext(ApiContext).endPoint const [value, setValue] = React.useState('1') + const [gpuAvailable, setGPUAvailable] = useState(-1) + + const { setErrorMsg } = useContext(ApiContext) const handleTabChange = (event, newValue) => { setValue(newValue) } + useEffect(() => { + if (gpuAvailable === -1) { + fetch(endPoint + '/v1/cluster/devices', { + method: 'GET', + headers: { + 'Content-Type': 'application/json', + }, + }).then((res) => { + if (!res.ok) { + // Usually, if some errors happen here, check if the cluster is available + res.json().then((errorData) => { + setErrorMsg( + `Server error: ${res.status} - ${ + errorData.detail || 'Unknown error' + }` + ) + }) + } else { + res.json().then((data) => { + setGPUAvailable(parseInt(data, 10)) + }) + } + }) + } + }, []) + return ( @@ -30,7 +61,7 @@ const LaunchModel = () => { </TabList> </Box> <TabPanel value="1" sx={{ padding: 0 }}> - <LaunchLLM /> + <LaunchLLM gpuAvailable={gpuAvailable} /> </TabPanel> <TabPanel value="2" sx={{ padding: 0 }}> <LaunchEmbedding /> @@ -39,7 +70,7 @@ const LaunchModel = () => { <LaunchRerank /> </TabPanel> <TabPanel value="4" sx={{ padding: 0 }}> - <LaunchCustom /> + <LaunchCustom gpuAvailable={gpuAvailable} /> </TabPanel> </TabContext> </Box> diff --git a/xinference/web/ui/src/scenes/launch_model/launchCustom.js b/xinference/web/ui/src/scenes/launch_model/launchCustom.js index d8f67686b3..deb50776d7 100644 --- a/xinference/web/ui/src/scenes/launch_model/launchCustom.js +++ b/xinference/web/ui/src/scenes/launch_model/launchCustom.js @@ -5,7 +5,7 @@ import { ApiContext } from '../../components/apiContext' import EmbeddingCard from './embeddingCard' import ModelCard from './modelCard' -const LaunchCustom = () => { +const LaunchCustom = ({ gpuAvailable }) => { let endPoint = useContext(ApiContext).endPoint const [registrationData, setRegistrationData] = useState([]) const { isCallingApi, setIsCallingApi } = useContext(ApiContext) @@ -138,7 +138,7 @@ const LaunchCustom = () => { <EmbeddingCard url={endPoint} modelData={filteredRegistration} - cardHeight={350} + cardHeight={380} is_custom={true} /> ) @@ -147,6 +147,7 @@ const LaunchCustom = () => { <ModelCard url={endPoint} modelData={filteredRegistration} + gpuAvailable={gpuAvailable} is_custom={true} /> ) diff --git a/xinference/web/ui/src/scenes/launch_model/launchEmbedding.js b/xinference/web/ui/src/scenes/launch_model/launchEmbedding.js index 771b5d1c65..9d54c95ebe 100644 --- a/xinference/web/ui/src/scenes/launch_model/launchEmbedding.js +++ b/xinference/web/ui/src/scenes/launch_model/launchEmbedding.js @@ -22,10 +22,7 @@ const LaunchEmbedding = () => { const modelName = registration.model_name ? registration.model_name.toLowerCase() : '' - if (!modelName.includes(searchTerm.toLowerCase())) { - return false - } - return true + return modelName.includes(searchTerm.toLowerCase()) } const update = async () => { @@ -35,7 +32,7 @@ const LaunchEmbedding = () => { setIsCallingApi(true) const response = await fetch( - `${endPoint}/v1/model_registrations/embedding`, + `${endPoint}/v1/model_registrations/embedding?detailed=true`, { method: 'GET', } @@ -54,6 +51,7 @@ const LaunchEmbedding = () => { return { ...(await desc.json()), is_builtin: registration.is_builtin, + is_cached: registration.cache_status, } }) ) diff --git a/xinference/web/ui/src/scenes/launch_model/launchLLM.js b/xinference/web/ui/src/scenes/launch_model/launchLLM.js index 78c8fcc70c..467f7d1624 100644 --- a/xinference/web/ui/src/scenes/launch_model/launchLLM.js +++ b/xinference/web/ui/src/scenes/launch_model/launchLLM.js @@ -11,18 +11,16 @@ import React, { useContext, useEffect, useState } from 'react' import { ApiContext } from '../../components/apiContext' import ModelCard from './modelCard' -const LaunchLLM = () => { +const LaunchLLM = ({ gpuAvailable }) => { let endPoint = useContext(ApiContext).endPoint const [registrationData, setRegistrationData] = useState([]) const { isCallingApi, setIsCallingApi } = useContext(ApiContext) const { isUpdatingModel } = useContext(ApiContext) - const { setErrorMsg } = useContext(ApiContext) // States used for filtering const [searchTerm, setSearchTerm] = useState('') const [modelAbility, setModelAbility] = useState('all') - const [gpuAvailable, setGPUAvailable] = useState(-1) const handleChange = (event) => { setSearchTerm(event.target.value) @@ -83,32 +81,6 @@ const LaunchLLM = () => { update() }, []) - useEffect(() => { - if (gpuAvailable === -1) { - fetch(endPoint + '/v1/cluster/devices', { - method: 'GET', - headers: { - 'Content-Type': 'application/json', - }, - }).then((res) => { - if (!res.ok) { - // Usually, if some errors happen here, check if the cluster is available - res.json().then((errorData) => { - setErrorMsg( - `Server error: ${res.status} - ${ - errorData.detail || 'Unknown error' - }` - ) - }) - } else { - res.json().then((data) => { - setGPUAvailable(parseInt(data, 10)) - }) - } - }) - } - }, []) - const style = { display: 'grid', gridTemplateColumns: 'repeat(auto-fill, minmax(300px, 1fr))', diff --git a/xinference/web/ui/src/scenes/launch_model/launchRerank.js b/xinference/web/ui/src/scenes/launch_model/launchRerank.js index 51c9285b8a..07bed5a8cd 100644 --- a/xinference/web/ui/src/scenes/launch_model/launchRerank.js +++ b/xinference/web/ui/src/scenes/launch_model/launchRerank.js @@ -32,7 +32,7 @@ const LaunchRerank = () => { setIsCallingApi(true) const response = await fetch( - `${endPoint}/v1/model_registrations/rerank`, + `${endPoint}/v1/model_registrations/rerank?detailed=true`, { method: 'GET', } @@ -51,6 +51,7 @@ const LaunchRerank = () => { return { ...(await desc.json()), is_builtin: registration.is_builtin, + is_cached: registration.cache_status, } }) ) diff --git a/xinference/web/ui/src/scenes/launch_model/rerankCard.js b/xinference/web/ui/src/scenes/launch_model/rerankCard.js index 0bfd400db6..92bdecb1bc 100644 --- a/xinference/web/ui/src/scenes/launch_model/rerankCard.js +++ b/xinference/web/ui/src/scenes/launch_model/rerankCard.js @@ -214,6 +214,18 @@ const RerankCard = ({ url, modelData }) => { return <Chip label="ZH" variant="outlined" size="small" /> } })()} + {(() => { + if (modelData.is_cached) { + return ( + <Chip + label="Cached" + variant="outlined" + size="small" + sx={{ marginLeft: '10px' }} + /> + ) + } + })()} </div> </div> {hover ? ( diff --git a/xinference/web/ui/src/scenes/register_model/index.js b/xinference/web/ui/src/scenes/register_model/index.js index 2deeb7fc04..ab9a6b1161 100644 --- a/xinference/web/ui/src/scenes/register_model/index.js +++ b/xinference/web/ui/src/scenes/register_model/index.js @@ -161,6 +161,8 @@ const RegisterModel = () => { roles: ps.roles, intra_message_sep: ps.intra_message_sep, inter_message_sep: ps.inter_message_sep, + stop: ps.stop ?? null, + stop_token_ids: ps.stop_token_ids ?? null, } } } diff --git a/xinference/web/ui/src/scenes/running_models/index.js b/xinference/web/ui/src/scenes/running_models/index.js index 9c2afb4fb5..e87bd57aea 100644 --- a/xinference/web/ui/src/scenes/running_models/index.js +++ b/xinference/web/ui/src/scenes/running_models/index.js @@ -405,7 +405,14 @@ const RunningModels = () => { } return ( - <Box sx={{ height: '100%', width: '100%' }}> + <Box + sx={{ + height: '100%', + width: '100%', + paddingLeft: '20px', + paddingTop: '20px', + }} + > <Title title="Running Models" /> <TabContext value={tabValue}> <Box sx={{ borderBottom: 1, borderColor: 'divider' }}>