FEAT: add download from openmind_hub (#2504)

Co-authored-by: cookieyyds <@985663835@qq.com>
xorbitsai · Nov 1, 2024 · 67e97ab · 67e97ab
1 parent 07f9325
commit 67e97ab
Show file tree

Hide file tree

Showing 15 changed files with 1,527 additions and 24 deletions.
diff --git a/doc/source/conf.py b/doc/source/conf.py
@@ -121,7 +121,7 @@
         "type": "fontawesome",
     }])
     html_theme_options["external_links"] = [
-        {"name": "产品官网", "url": "https://xorbits.cn/inference"},
+        {"name": "产品官网", "url": "https://xorbits.cn"},
     ]
 
 html_favicon = "_static/favicon.svg"
diff --git a/xinference/constants.py b/xinference/constants.py
@@ -39,6 +39,7 @@ def get_xinference_home() -> str:
         # if user has already set `XINFERENCE_HOME` env, change huggingface and modelscope default download path
         os.environ["HUGGINGFACE_HUB_CACHE"] = os.path.join(home_path, "huggingface")
         os.environ["MODELSCOPE_CACHE"] = os.path.join(home_path, "modelscope")
+        os.environ["XDG_CACHE_HOME"] = os.path.join(home_path, "openmind_hub")
     # In multi-tenant mode,
     # gradio's temporary files are stored in their respective home directories,
     # to prevent insufficient permissions

diff --git a/xinference/core/worker.py b/xinference/core/worker.py
@@ -785,7 +785,9 @@ async def launch_builtin_model(
         peft_model_config: Optional[PeftModelConfig] = None,
         request_limits: Optional[int] = None,
         gpu_idx: Optional[Union[int, List[int]]] = None,
-        download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
+        download_hub: Optional[
+            Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
+        ] = None,
         model_path: Optional[str] = None,
         **kwargs,
     ):

diff --git a/xinference/model/audio/core.py b/xinference/model/audio/core.py
@@ -100,7 +100,9 @@ def generate_audio_description(
 
 def match_audio(
     model_name: str,
-    download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
+    download_hub: Optional[
+        Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
+    ] = None,
 ) -> AudioModelFamilyV1:
     from ..utils import download_from_modelscope
     from . import BUILTIN_AUDIO_MODELS, MODELSCOPE_AUDIO_MODELS
@@ -152,7 +154,9 @@ def create_audio_model_instance(
     devices: List[str],
     model_uid: str,
     model_name: str,
-    download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
+    download_hub: Optional[
+        Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
+    ] = None,
     model_path: Optional[str] = None,
     **kwargs,
 ) -> Tuple[

diff --git a/xinference/model/core.py b/xinference/model/core.py
@@ -55,7 +55,9 @@ def create_model_instance(
     model_size_in_billions: Optional[Union[int, str]] = None,
     quantization: Optional[str] = None,
     peft_model_config: Optional[PeftModelConfig] = None,
-    download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
+    download_hub: Optional[
+        Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
+    ] = None,
     model_path: Optional[str] = None,
     **kwargs,
 ) -> Tuple[Any, ModelDescription]:

diff --git a/xinference/model/embedding/core.py b/xinference/model/embedding/core.py
@@ -433,7 +433,9 @@ def encode(
 
 def match_embedding(
     model_name: str,
-    download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
+    download_hub: Optional[
+        Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
+    ] = None,
 ) -> EmbeddingModelSpec:
     from ..utils import download_from_modelscope
     from . import BUILTIN_EMBEDDING_MODELS, MODELSCOPE_EMBEDDING_MODELS
@@ -469,7 +471,9 @@ def create_embedding_model_instance(
     devices: List[str],
     model_uid: str,
     model_name: str,
-    download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
+    download_hub: Optional[
+        Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
+    ] = None,
     model_path: Optional[str] = None,
     **kwargs,
 ) -> Tuple[EmbeddingModel, EmbeddingModelDescription]:

diff --git a/xinference/model/image/core.py b/xinference/model/image/core.py
@@ -125,7 +125,9 @@ def generate_image_description(
 
 def match_diffusion(
     model_name: str,
-    download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
+    download_hub: Optional[
+        Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
+    ] = None,
 ) -> ImageModelFamilyV1:
     from ..utils import download_from_modelscope
     from . import BUILTIN_IMAGE_MODELS, MODELSCOPE_IMAGE_MODELS
@@ -213,7 +215,9 @@ def create_image_model_instance(
     model_uid: str,
     model_name: str,
     peft_model_config: Optional[PeftModelConfig] = None,
-    download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
+    download_hub: Optional[
+        Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
+    ] = None,
     model_path: Optional[str] = None,
     **kwargs,
 ) -> Tuple[

diff --git a/xinference/model/llm/__init__.py b/xinference/model/llm/__init__.py
@@ -32,6 +32,7 @@
     BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES,
     BUILTIN_LLM_PROMPT_STYLE,
     BUILTIN_MODELSCOPE_LLM_FAMILIES,
+    BUILTIN_OPENMIND_HUB_LLM_FAMILIES,
     LLAMA_CLASSES,
     LLM_ENGINES,
     LMDEPLOY_CLASSES,
@@ -258,6 +259,36 @@ def _install():
         if "tools" in model_spec.model_ability:
             BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES.add(model_spec.model_name)
 
+    openmind_hub_json_path = os.path.join(
+        os.path.dirname(os.path.abspath(__file__)), "llm_family_openmind_hub.json"
+    )
+    for json_obj in json.load(
+        codecs.open(openmind_hub_json_path, "r", encoding="utf-8")
+    ):
+        model_spec = LLMFamilyV1.parse_obj(json_obj)
+        BUILTIN_OPENMIND_HUB_LLM_FAMILIES.append(model_spec)
+
+        # register prompt style, in case that we have something missed
+        # if duplicated with huggingface json, keep it as the huggingface style
+
+        if (
+            "chat" in model_spec.model_ability
+            and isinstance(model_spec.chat_template, str)
+            and model_spec.model_name not in BUILTIN_LLM_PROMPT_STYLE
+        ):
+            BUILTIN_LLM_PROMPT_STYLE[model_spec.model_name] = {
+                "chat_template": model_spec.chat_template,
+                "stop_token_ids": model_spec.stop_token_ids,
+                "stop": model_spec.stop,
+            }
+        # register model family
+        if "chat" in model_spec.model_ability:
+            BUILTIN_LLM_MODEL_CHAT_FAMILIES.add(model_spec.model_name)
+        else:
+            BUILTIN_LLM_MODEL_GENERATE_FAMILIES.add(model_spec.model_name)
+        if "tools" in model_spec.model_ability:
+            BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES.add(model_spec.model_name)
+
     csghub_json_path = os.path.join(
         os.path.dirname(os.path.abspath(__file__)), "llm_family_csghub.json"
     )
@@ -288,6 +319,7 @@ def _install():
     for llm_specs in [
         BUILTIN_LLM_FAMILIES,
         BUILTIN_MODELSCOPE_LLM_FAMILIES,
+        BUILTIN_OPENMIND_HUB_LLM_FAMILIES,
         BUILTIN_CSGHUB_LLM_FAMILIES,
     ]:
         for llm_spec in llm_specs:
@@ -298,6 +330,7 @@ def _install():
     for families in [
         BUILTIN_LLM_FAMILIES,
         BUILTIN_MODELSCOPE_LLM_FAMILIES,
+        BUILTIN_OPENMIND_HUB_LLM_FAMILIES,
         BUILTIN_CSGHUB_LLM_FAMILIES,
     ]:
         for family in families:

diff --git a/xinference/model/llm/core.py b/xinference/model/llm/core.py
@@ -193,7 +193,9 @@ def create_llm_model_instance(
     model_size_in_billions: Optional[Union[int, str]] = None,
     quantization: Optional[str] = None,
     peft_model_config: Optional[PeftModelConfig] = None,
-    download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
+    download_hub: Optional[
+        Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
+    ] = None,
     model_path: Optional[str] = None,
     **kwargs,
 ) -> Tuple[LLM, LLMDescription]:

diff --git a/xinference/model/llm/llm_family.py b/xinference/model/llm/llm_family.py
@@ -41,6 +41,7 @@
     create_symlink,
     download_from_csghub,
     download_from_modelscope,
+    download_from_openmind_hub,
     is_valid_model_uri,
     parse_uri,
     retry_download,
@@ -239,6 +240,7 @@ def parse_raw(
 
 BUILTIN_LLM_FAMILIES: List["LLMFamilyV1"] = []
 BUILTIN_MODELSCOPE_LLM_FAMILIES: List["LLMFamilyV1"] = []
+BUILTIN_OPENMIND_HUB_LLM_FAMILIES: List["LLMFamilyV1"] = []
 BUILTIN_CSGHUB_LLM_FAMILIES: List["LLMFamilyV1"] = []
 
 SGLANG_CLASSES: List[Type[LLM]] = []
@@ -301,6 +303,9 @@ def cache(
             elif llm_spec.model_hub == "modelscope":
                 logger.info(f"Caching from Modelscope: {llm_spec.model_id}")
                 return cache_from_modelscope(llm_family, llm_spec, quantization)
+            elif llm_spec.model_hub == "openmind_hub":
+                logger.info(f"Caching from openmind_hub: {llm_spec.model_id}")
+                return cache_from_openmind_hub(llm_family, llm_spec, quantization)
             elif llm_spec.model_hub == "csghub":
                 logger.info(f"Caching from CSGHub: {llm_spec.model_id}")
                 return cache_from_csghub(llm_family, llm_spec, quantization)
@@ -474,14 +479,17 @@ def _skip_download(
     model_revision: Optional[str],
     quantization: Optional[str] = None,
 ) -> bool:
-    if model_format == "pytorch":
+    if model_format in ["pytorch", "mindspore"]:
         model_hub_to_meta_path = {
             "huggingface": _get_meta_path(
                 cache_dir, model_format, "huggingface", quantization
             ),
             "modelscope": _get_meta_path(
                 cache_dir, model_format, "modelscope", quantization
             ),
+            "openmind_hub": _get_meta_path(
+                cache_dir, model_format, "openmind_hub", quantization
+            ),
             "csghub": _get_meta_path(cache_dir, model_format, "csghub", quantization),
         }
         if valid_model_revision(model_hub_to_meta_path[model_hub], model_revision):
@@ -702,6 +710,50 @@ def cache_from_modelscope(
     return cache_dir
 
 
+def cache_from_openmind_hub(
+    llm_family: LLMFamilyV1,
+    llm_spec: "LLMSpecV1",
+    quantization: Optional[str] = None,
+) -> str:
+    """
+    Cache model from openmind_hub. Return the cache directory.
+    """
+    from openmind_hub import snapshot_download
+
+    cache_dir = _get_cache_dir(llm_family, llm_spec)
+    if _skip_download(
+        cache_dir,
+        llm_spec.model_format,
+        llm_spec.model_hub,
+        llm_spec.model_revision,
+        quantization,
+    ):
+        return cache_dir
+
+    if llm_spec.model_format in ["pytorch", "mindspore"]:
+        download_dir = retry_download(
+            snapshot_download,
+            llm_family.model_name,
+            {
+                "model_size": llm_spec.model_size_in_billions,
+                "model_format": llm_spec.model_format,
+            },
+            llm_spec.model_id,
+            revision=llm_spec.model_revision,
+        )
+        create_symlink(download_dir, cache_dir)
+
+    else:
+        raise ValueError(f"Unsupported format: {llm_spec.model_format}")
+
+    meta_path = _get_meta_path(
+        cache_dir, llm_spec.model_format, llm_spec.model_hub, quantization
+    )
+    _generate_meta_file(meta_path, llm_family, llm_spec, quantization)
+
+    return cache_dir
+
+
 def cache_from_huggingface(
     llm_family: LLMFamilyV1,
     llm_spec: "LLMSpecV1",
@@ -893,7 +945,9 @@ def match_llm(
     model_format: Optional[str] = None,
     model_size_in_billions: Optional[Union[int, str]] = None,
     quantization: Optional[str] = None,
-    download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
+    download_hub: Optional[
+        Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
+    ] = None,
 ) -> Optional[Tuple[LLMFamilyV1, LLMSpecV1, str]]:
     """
     Find an LLM family, spec, and quantization that satisfy given criteria.
@@ -924,6 +978,12 @@ def _apply_format_to_model_id(spec: LLMSpecV1, q: str) -> LLMSpecV1:
             + BUILTIN_LLM_FAMILIES
             + user_defined_llm_families
         )
+    elif download_hub == "openmind_hub":
+        all_families = (
+            BUILTIN_OPENMIND_HUB_LLM_FAMILIES
+            + BUILTIN_LLM_FAMILIES
+            + user_defined_llm_families
+        )
     elif download_hub == "csghub":
         all_families = (
             BUILTIN_CSGHUB_LLM_FAMILIES
@@ -938,6 +998,12 @@ def _apply_format_to_model_id(spec: LLMSpecV1, q: str) -> LLMSpecV1:
             + BUILTIN_LLM_FAMILIES
             + user_defined_llm_families
         )
+    elif download_from_openmind_hub():
+        all_families = (
+            BUILTIN_OPENMIND_HUB_LLM_FAMILIES
+            + BUILTIN_LLM_FAMILIES
+            + user_defined_llm_families
+        )
     elif download_from_csghub():
         all_families = (
             BUILTIN_CSGHUB_LLM_FAMILIES