Skip to content

feat: 支持百度千帆向量模型 #1394

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# coding=utf-8
"""
@project: MaxKB
@Author:虎
@file: embedding.py
@date:2024/10/17 15:40
@desc:
"""
from typing import Dict

from common import forms
from common.exception.app_exception import AppApiException
from common.forms import BaseForm
from setting.models_provider.base_model_provider import BaseModelCredential, ValidCode


class QianfanEmbeddingCredential(BaseForm, BaseModelCredential):

def is_valid(self, model_type: str, model_name, model_credential: Dict[str, object], provider,
raise_exception=False):
model_type_list = provider.get_model_type_list()
if not any(list(filter(lambda mt: mt.get('value') == model_type, model_type_list))):
raise AppApiException(ValidCode.valid_error.value, f'{model_type} 模型类型不支持')
self.valid_form(model_credential)
try:
model = provider.get_model(model_type, model_name, model_credential)
model.embed_query('你好')
except Exception as e:
if isinstance(e, AppApiException):
raise e
if raise_exception:
raise AppApiException(ValidCode.valid_error.value, f'校验失败,请检查参数是否正确: {str(e)}')
else:
return False
return True

def encryption_dict(self, model: Dict[str, object]):
return {**model, 'qianfan_sk': super().encryption(model.get('qianfan_sk', ''))}

qianfan_ak = forms.PasswordInputField('API Key', required=True)

qianfan_sk = forms.PasswordInputField("Secret Key", required=True)
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# coding=utf-8
"""
@project: MaxKB
@Author:虎
@file: embedding.py
@date:2024/10/17 16:48
@desc:
"""
from typing import Dict

from langchain_community.embeddings import QianfanEmbeddingsEndpoint

from setting.models_provider.base_model_provider import MaxKBBaseModel


class QianfanEmbeddings(MaxKBBaseModel, QianfanEmbeddingsEndpoint):
@staticmethod
def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs):
return QianfanEmbeddings(
model=model_name,
qianfan_ak=model_credential.get('qianfan_ak'),
qianfan_sk=model_credential.get('qianfan_sk'),
)
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,14 @@
from common.util.file_util import get_file_content
from setting.models_provider.base_model_provider import ModelProvideInfo, ModelTypeConst, ModelInfo, IModelProvider, \
ModelInfoManage
from setting.models_provider.impl.wenxin_model_provider.credential.embedding import QianfanEmbeddingCredential
from setting.models_provider.impl.wenxin_model_provider.credential.llm import WenxinLLMModelCredential
from setting.models_provider.impl.wenxin_model_provider.model.embedding import QianfanEmbeddings
from setting.models_provider.impl.wenxin_model_provider.model.llm import QianfanChatModel
from smartdoc.conf import PROJECT_DIR

win_xin_llm_model_credential = WenxinLLMModelCredential()
qianfan_embedding_credential = QianfanEmbeddingCredential()
model_info_list = [ModelInfo('ERNIE-Bot-4',
'ERNIE-Bot-4是百度自行研发的大语言模型,覆盖海量中文数据,具有更强的对话问答、内容创作生成等能力。',
ModelTypeConst.LLM, win_xin_llm_model_credential, QianfanChatModel),
Expand All @@ -41,13 +44,16 @@
'千帆团队在Llama-2-7b基础上的中文增强版本,在CMMLU、C-EVAL等中文知识库上表现优异。',
ModelTypeConst.LLM, win_xin_llm_model_credential, QianfanChatModel)
]

embedding_model_info = ModelInfo('Embedding-V1',
'Embedding-V1是一个基于百度文心大模型技术的文本表示模型,可以将文本转化为用数值表示的向量形式,用于文本检索、信息推荐、知识挖掘等场景。 Embedding-V1提供了Embeddings接口,可以根据输入内容生成对应的向量表示。您可以通过调用该接口,将文本输入到模型中,获取到对应的向量表示,从而进行后续的文本处理和分析。',
ModelTypeConst.EMBEDDING, qianfan_embedding_credential, QianfanEmbeddings)
model_info_manage = ModelInfoManage.builder().append_model_info_list(model_info_list).append_default_model_info(
ModelInfo('ERNIE-Bot-4',
'ERNIE-Bot-4是百度自行研发的大语言模型,覆盖海量中文数据,具有更强的对话问答、内容创作生成等能力。',
ModelTypeConst.LLM,
win_xin_llm_model_credential,
QianfanChatModel)).build()
QianfanChatModel)).append_model_info(embedding_model_info).append_default_model_info(
embedding_model_info).build()


class WenxinModelProvider(IModelProvider):
Expand Down
Loading