Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FEAT: support minicpm-reranker model #2383

Merged
merged 2 commits into from
Sep 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions xinference/model/rerank/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.

import gc
import importlib
import logging
import os
import threading
Expand Down Expand Up @@ -178,6 +179,16 @@ def _auto_detect_type(model_path):
return rerank_type

def load(self):
flash_attn_installed = importlib.util.find_spec("flash_attn") is not None
if (
self._auto_detect_type(self._model_path) != "normal"
and flash_attn_installed
):
logger.warning(
"flash_attn can only support fp16 and bf16, "
"will force set `use_fp16` to True"
)
self._use_fp16 = True
if self._model_spec.type == "normal":
try:
import sentence_transformers
Expand Down
8 changes: 8 additions & 0 deletions xinference/model/rerank/model_spec.json
Original file line number Diff line number Diff line change
Expand Up @@ -54,5 +54,13 @@
"max_tokens": 1024,
"model_id": "jinaai/jina-reranker-v2-base-multilingual",
"model_revision": "298e48cada4a9318650d7fbd795f63827f884087"
},
{
"model_name": "minicpm-reranker",
"type": "normal",
"language": ["en", "zh"],
"max_tokens": 1024,
"model_id": "openbmb/MiniCPM-Reranker",
"model_revision": "5d2fd7345b6444c89d4c0fa59c92272888f3f2d0"
}
]
8 changes: 8 additions & 0 deletions xinference/model/rerank/model_spec_modelscope.json
Original file line number Diff line number Diff line change
Expand Up @@ -49,5 +49,13 @@
"max_tokens": 2048,
"model_id": "mirror013/bge-reranker-v2-minicpm-layerwise",
"model_hub": "modelscope"
},
{
"model_name": "minicpm-reranker",
"type": "normal",
"language": ["en", "zh"],
"max_tokens": 1024,
"model_id": "OpenBMB/MiniCPM-Reranker",
"model_hub": "modelscope"
}
]
3 changes: 3 additions & 0 deletions xinference/model/rerank/tests/test_rerank.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,9 @@ def test_auto_detect_type():
with open(rerank_model_json, "r") as f:
rerank_models = json.load(f)
for m in rerank_models:
if m["model_name"] == "minicpm-reranker":
# TODO: we need to fix the auto detect type
continue
try:
assert m["type"] == RerankModel._auto_detect_type(m["model_id"])
except EnvironmentError:
Expand Down
Loading