From a7472d37b08a80f56b1956630a6a5eb7b8b61f5d Mon Sep 17 00:00:00 2001 From: liwenshi Date: Fri, 24 May 2024 12:33:34 +0800 Subject: [PATCH 1/4] feat: support huggingface/text-embeddings-inference for faster embedding inference --- modelcache/embedding/__init__.py | 4 +++ .../embedding/text_embeddings_inference.py | 31 +++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 modelcache/embedding/text_embeddings_inference.py diff --git a/modelcache/embedding/__init__.py b/modelcache/embedding/__init__.py index eb6ca80..410c92a 100644 --- a/modelcache/embedding/__init__.py +++ b/modelcache/embedding/__init__.py @@ -6,6 +6,7 @@ fasttext = LazyImport("fasttext", globals(), "modelcache.embedding.fasttext") paddlenlp = LazyImport("paddlenlp", globals(), "modelcache.embedding.paddlenlp") timm = LazyImport("timm", globals(), "modelcache.embedding.timm") +text_embeddings_inference = LazyImport("text_embeddings_inference", globals(), "modelcache.embedding.text_embeddings_inference") def Huggingface(model="sentence-transformers/all-mpnet-base-v2"): @@ -30,3 +31,6 @@ def PaddleNLP(model="ernie-3.0-medium-zh"): def Timm(model="resnet50", device="default"): return timm.Timm(model, device) + +def TextEmbeddingsInference(base_url, model): + return text_embeddings_inference.TextEmbeddingsInference(base_url, model) \ No newline at end of file diff --git a/modelcache/embedding/text_embeddings_inference.py b/modelcache/embedding/text_embeddings_inference.py new file mode 100644 index 0000000..87b34aa --- /dev/null +++ b/modelcache/embedding/text_embeddings_inference.py @@ -0,0 +1,31 @@ +# -*- coding: utf-8 -*- +import requests +import numpy as np +from modelcache.embedding.base import BaseEmbedding + +class TextEmbeddingsInference(BaseEmbedding): + def __init__(self, base_url: str, model: str): + self.base_url = base_url + self.model = model + self.headers = { + 'accept': 'application/json', + 'Content-Type': 'application/json', + } + self.__dimension = self.to_embeddings('test').shape[0] + def to_embeddings(self, data, **_): + json_data = { + 'input': data, + 'model': self.model, + } + + response = requests.post(self.base_url, headers=self.headers, json=json_data) + embedding = response.json()['data'][0]['embedding'] + return np.array(embedding) + + @property + def dimension(self): + """Embedding dimension. + + :return: embedding dimension + """ + return self.__dimension From 01acdcdf0948717ac8d9de4c5be3f515b1148686 Mon Sep 17 00:00:00 2001 From: liwenshipro Date: Sat, 25 May 2024 09:37:27 +0800 Subject: [PATCH 2/4] fix: rename huggingface TEI class --- modelcache/embedding/__init__.py | 4 ++-- .../{text_embeddings_inference.py => huggingface_tei.py} | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) rename modelcache/embedding/{text_embeddings_inference.py => huggingface_tei.py} (95%) diff --git a/modelcache/embedding/__init__.py b/modelcache/embedding/__init__.py index 410c92a..b61372c 100644 --- a/modelcache/embedding/__init__.py +++ b/modelcache/embedding/__init__.py @@ -6,7 +6,7 @@ fasttext = LazyImport("fasttext", globals(), "modelcache.embedding.fasttext") paddlenlp = LazyImport("paddlenlp", globals(), "modelcache.embedding.paddlenlp") timm = LazyImport("timm", globals(), "modelcache.embedding.timm") -text_embeddings_inference = LazyImport("text_embeddings_inference", globals(), "modelcache.embedding.text_embeddings_inference") +huggingface_tei = LazyImport("huggingface_tei", globals(), "modelcache.embedding.huggingface_tei") def Huggingface(model="sentence-transformers/all-mpnet-base-v2"): @@ -33,4 +33,4 @@ def Timm(model="resnet50", device="default"): return timm.Timm(model, device) def TextEmbeddingsInference(base_url, model): - return text_embeddings_inference.TextEmbeddingsInference(base_url, model) \ No newline at end of file + return huggingface_tei.HuggingfaceTEI(base_url, model) \ No newline at end of file diff --git a/modelcache/embedding/text_embeddings_inference.py b/modelcache/embedding/huggingface_tei.py similarity index 95% rename from modelcache/embedding/text_embeddings_inference.py rename to modelcache/embedding/huggingface_tei.py index 87b34aa..94075fe 100644 --- a/modelcache/embedding/text_embeddings_inference.py +++ b/modelcache/embedding/huggingface_tei.py @@ -3,7 +3,7 @@ import numpy as np from modelcache.embedding.base import BaseEmbedding -class TextEmbeddingsInference(BaseEmbedding): +class HuggingfaceTEI(BaseEmbedding): def __init__(self, base_url: str, model: str): self.base_url = base_url self.model = model @@ -12,6 +12,7 @@ def __init__(self, base_url: str, model: str): 'Content-Type': 'application/json', } self.__dimension = self.to_embeddings('test').shape[0] + def to_embeddings(self, data, **_): json_data = { 'input': data, From 18d70dfee2a4f33e760a78581a57d0688cad0073 Mon Sep 17 00:00:00 2001 From: liwenshipro Date: Sat, 25 May 2024 10:28:12 +0800 Subject: [PATCH 3/4] add huggingface tei example --- examples/embedding/huggingface_tei_example.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 examples/embedding/huggingface_tei_example.py diff --git a/examples/embedding/huggingface_tei_example.py b/examples/embedding/huggingface_tei_example.py new file mode 100644 index 0000000..152834f --- /dev/null +++ b/examples/embedding/huggingface_tei_example.py @@ -0,0 +1,17 @@ +# -*- coding: utf-8 -*- +import sys +sys.path.append(".") +from modelcache.embedding.huggingface_tei import HuggingfaceTEI + +''' +run tei server: +text-embeddings-router --model-id BAAI/bge-large-zh-v1.5 --port 8080 +''' + +def run(): + tei_instance = HuggingfaceTEI('http://127.0.0.1:8080/v1/embeddings', 'BAAI/bge-large-zh-v1.5') + print('dimenson', tei_instance.dimension) + print('embedding', tei_instance.to_embeddings('hello')) + +if __name__ == '__main__': + run() \ No newline at end of file From 7605a824251a4845a2f892891a8cffcea71c7db5 Mon Sep 17 00:00:00 2001 From: liwenshipro Date: Sat, 25 May 2024 10:50:23 +0800 Subject: [PATCH 4/4] fix: rename huggingface tei --- modelcache/embedding/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modelcache/embedding/__init__.py b/modelcache/embedding/__init__.py index b61372c..5684a2d 100644 --- a/modelcache/embedding/__init__.py +++ b/modelcache/embedding/__init__.py @@ -32,5 +32,5 @@ def PaddleNLP(model="ernie-3.0-medium-zh"): def Timm(model="resnet50", device="default"): return timm.Timm(model, device) -def TextEmbeddingsInference(base_url, model): +def HuggingfaceTEI(base_url, model): return huggingface_tei.HuggingfaceTEI(base_url, model) \ No newline at end of file