Skip to content

Commit b664edd

Browse files
authored
feat(rerankers): Add new backend, support jina rerankers API (#2121)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
1 parent e16658b commit b664edd

File tree

25 files changed

+628
-5
lines changed

25 files changed

+628
-5
lines changed

Diff for: .github/workflows/test-extra.yml

+31
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,37 @@ jobs:
7474
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers
7575
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers test
7676
77+
78+
tests-rerankers:
79+
runs-on: ubuntu-latest
80+
steps:
81+
- name: Clone
82+
uses: actions/checkout@v4
83+
with:
84+
submodules: true
85+
- name: Dependencies
86+
run: |
87+
sudo apt-get update
88+
sudo apt-get install build-essential ffmpeg
89+
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
90+
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
91+
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
92+
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
93+
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
94+
sudo apt-get update && \
95+
sudo apt-get install -y conda
96+
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
97+
sudo apt-get install -y libopencv-dev
98+
pip install --user grpcio-tools
99+
100+
sudo rm -rfv /usr/bin/conda || true
101+
102+
- name: Test rerankers
103+
run: |
104+
export PATH=$PATH:/opt/conda/bin
105+
make --jobs=5 --output-sync=target -C backend/python/rerankers
106+
make --jobs=5 --output-sync=target -C backend/python/rerankers test
107+
77108
tests-diffusers:
78109
runs-on: ubuntu-latest
79110
steps:

Diff for: Dockerfile

+4-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ ARG TARGETVARIANT
1616

1717
ENV BUILD_TYPE=${BUILD_TYPE}
1818
ENV DEBIAN_FRONTEND=noninteractive
19-
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
19+
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
2020

2121
ARG GO_TAGS="stablediffusion tinydream tts"
2222

@@ -259,6 +259,9 @@ RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
259259
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
260260
make -C backend/python/sentencetransformers \
261261
; fi
262+
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
263+
make -C backend/python/rerankers \
264+
; fi
262265
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
263266
make -C backend/python/transformers \
264267
; fi

Diff for: Makefile

+11-2
Original file line numberDiff line numberDiff line change
@@ -437,10 +437,10 @@ protogen-go-clean:
437437
$(RM) bin/*
438438

439439
.PHONY: protogen-python
440-
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen
440+
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen
441441

442442
.PHONY: protogen-python-clean
443-
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean
443+
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean
444444

445445
.PHONY: autogptq-protogen
446446
autogptq-protogen:
@@ -506,6 +506,14 @@ petals-protogen:
506506
petals-protogen-clean:
507507
$(MAKE) -C backend/python/petals protogen-clean
508508

509+
.PHONY: rerankers-protogen
510+
rerankers-protogen:
511+
$(MAKE) -C backend/python/rerankers protogen
512+
513+
.PHONY: rerankers-protogen-clean
514+
rerankers-protogen-clean:
515+
$(MAKE) -C backend/python/rerankers protogen-clean
516+
509517
.PHONY: sentencetransformers-protogen
510518
sentencetransformers-protogen:
511519
$(MAKE) -C backend/python/sentencetransformers protogen
@@ -564,6 +572,7 @@ prepare-extra-conda-environments: protogen-python
564572
$(MAKE) -C backend/python/vllm
565573
$(MAKE) -C backend/python/mamba
566574
$(MAKE) -C backend/python/sentencetransformers
575+
$(MAKE) -C backend/python/rerankers
567576
$(MAKE) -C backend/python/transformers
568577
$(MAKE) -C backend/python/transformers-musicgen
569578
$(MAKE) -C backend/python/parler-tts

Diff for: aio/cpu/rerank.yaml

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
name: jina-reranker-v1-base-en
2+
backend: rerankers
3+
parameters:
4+
model: cross-encoder
5+
6+
usage: |
7+
You can test this model with curl like this:
8+
9+
curl http://localhost:8080/v1/rerank \
10+
-H "Content-Type: application/json" \
11+
-d '{
12+
"model": "jina-reranker-v1-base-en",
13+
"query": "Organic skincare products for sensitive skin",
14+
"documents": [
15+
"Eco-friendly kitchenware for modern homes",
16+
"Biodegradable cleaning supplies for eco-conscious consumers",
17+
"Organic cotton baby clothes for sensitive skin",
18+
"Natural organic skincare range for sensitive skin",
19+
"Tech gadgets for smart homes: 2024 edition",
20+
"Sustainable gardening tools and compost solutions",
21+
"Sensitive skin-friendly facial cleansers and toners",
22+
"Organic food wraps and storage solutions",
23+
"All-natural pet food for dogs with allergies",
24+
"Yoga mats made from recycled materials"
25+
],
26+
"top_n": 3
27+
}'

Diff for: aio/entrypoint.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ detect_gpu
129129
detect_gpu_size
130130

131131
PROFILE="${PROFILE:-$GPU_SIZE}" # default to cpu
132-
export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vision.yaml}"
132+
export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/rerank.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vision.yaml}"
133133

134134
check_vars
135135

Diff for: aio/gpu-8g/rerank.yaml

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
name: jina-reranker-v1-base-en
2+
backend: rerankers
3+
parameters:
4+
model: cross-encoder
5+
6+
usage: |
7+
You can test this model with curl like this:
8+
9+
curl http://localhost:8080/v1/rerank \
10+
-H "Content-Type: application/json" \
11+
-d '{
12+
"model": "jina-reranker-v1-base-en",
13+
"query": "Organic skincare products for sensitive skin",
14+
"documents": [
15+
"Eco-friendly kitchenware for modern homes",
16+
"Biodegradable cleaning supplies for eco-conscious consumers",
17+
"Organic cotton baby clothes for sensitive skin",
18+
"Natural organic skincare range for sensitive skin",
19+
"Tech gadgets for smart homes: 2024 edition",
20+
"Sustainable gardening tools and compost solutions",
21+
"Sensitive skin-friendly facial cleansers and toners",
22+
"Organic food wraps and storage solutions",
23+
"All-natural pet food for dogs with allergies",
24+
"Yoga mats made from recycled materials"
25+
],
26+
"top_n": 3
27+
}'

Diff for: aio/intel/rerank.yaml

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
name: jina-reranker-v1-base-en
2+
backend: rerankers
3+
parameters:
4+
model: cross-encoder
5+
6+
usage: |
7+
You can test this model with curl like this:
8+
9+
curl http://localhost:8080/v1/rerank \
10+
-H "Content-Type: application/json" \
11+
-d '{
12+
"model": "jina-reranker-v1-base-en",
13+
"query": "Organic skincare products for sensitive skin",
14+
"documents": [
15+
"Eco-friendly kitchenware for modern homes",
16+
"Biodegradable cleaning supplies for eco-conscious consumers",
17+
"Organic cotton baby clothes for sensitive skin",
18+
"Natural organic skincare range for sensitive skin",
19+
"Tech gadgets for smart homes: 2024 edition",
20+
"Sustainable gardening tools and compost solutions",
21+
"Sensitive skin-friendly facial cleansers and toners",
22+
"Organic food wraps and storage solutions",
23+
"All-natural pet food for dogs with allergies",
24+
"Yoga mats made from recycled materials"
25+
],
26+
"top_n": 3
27+
}'

Diff for: backend/backend.proto

+24
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,30 @@ service Backend {
2323
rpc StoresDelete(StoresDeleteOptions) returns (Result) {}
2424
rpc StoresGet(StoresGetOptions) returns (StoresGetResult) {}
2525
rpc StoresFind(StoresFindOptions) returns (StoresFindResult) {}
26+
27+
rpc Rerank(RerankRequest) returns (RerankResult) {}
28+
}
29+
30+
message RerankRequest {
31+
string query = 1;
32+
repeated string documents = 2;
33+
int32 top_n = 3;
34+
}
35+
36+
message RerankResult {
37+
Usage usage = 1;
38+
repeated DocumentResult results = 2;
39+
}
40+
41+
message Usage {
42+
int32 total_tokens = 1;
43+
int32 prompt_tokens = 2;
44+
}
45+
46+
message DocumentResult {
47+
int32 index = 1;
48+
string text = 2;
49+
float relevance_score = 3;
2650
}
2751

2852
message StoresKey {

Diff for: backend/python/common-env/transformers/transformers-nvidia.yml

+2
Original file line numberDiff line numberDiff line change
@@ -120,4 +120,6 @@ dependencies:
120120
- transformers>=4.38.2 # Updated Version
121121
- transformers_stream_generator==0.0.5
122122
- xformers==0.0.23.post1
123+
- rerankers[transformers]
124+
- pydantic
123125
prefix: /opt/conda/envs/transformers

Diff for: backend/python/common-env/transformers/transformers-rocm.yml

+2
Original file line numberDiff line numberDiff line change
@@ -108,4 +108,6 @@ dependencies:
108108
- transformers>=4.38.2 # Updated Version
109109
- transformers_stream_generator==0.0.5
110110
- xformers==0.0.23.post1
111+
- rerankers[transformers]
112+
- pydantic
111113
prefix: /opt/conda/envs/transformers

Diff for: backend/python/common-env/transformers/transformers.yml

+3-1
Original file line numberDiff line numberDiff line change
@@ -111,5 +111,7 @@ dependencies:
111111
- vllm>=0.4.0
112112
- transformers>=4.38.2 # Updated Version
113113
- transformers_stream_generator==0.0.5
114-
- xformers==0.0.23.post1
114+
- xformers==0.0.23.post1
115+
- rerankers[transformers]
116+
- pydantic
115117
prefix: /opt/conda/envs/transformers

Diff for: backend/python/rerankers/Makefile

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
.PHONY: rerankers
2+
rerankers: protogen
3+
$(MAKE) -C ../common-env/transformers
4+
5+
6+
.PHONY: run
7+
run: protogen
8+
@echo "Running rerankers..."
9+
bash run.sh
10+
@echo "rerankers run."
11+
12+
# It is not working well by using command line. It only6 works with IDE like VSCode.
13+
.PHONY: test
14+
test: protogen
15+
@echo "Testing rerankers..."
16+
bash test.sh
17+
@echo "rerankers tested."
18+
19+
.PHONY: protogen
20+
protogen: backend_pb2_grpc.py backend_pb2.py
21+
22+
.PHONY: protogen-clean
23+
protogen-clean:
24+
$(RM) backend_pb2_grpc.py backend_pb2.py
25+
26+
backend_pb2_grpc.py backend_pb2.py:
27+
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto

Diff for: backend/python/rerankers/README.md

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Creating a separate environment for the reranker project
2+
3+
```
4+
make reranker
5+
```

Diff for: backend/python/rerankers/reranker.py

+123
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Extra gRPC server for Rerankers models.
4+
"""
5+
from concurrent import futures
6+
7+
import argparse
8+
import signal
9+
import sys
10+
import os
11+
12+
import time
13+
import backend_pb2
14+
import backend_pb2_grpc
15+
16+
import grpc
17+
18+
from rerankers import Reranker
19+
20+
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
21+
22+
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
23+
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
24+
25+
# Implement the BackendServicer class with the service methods
26+
class BackendServicer(backend_pb2_grpc.BackendServicer):
27+
"""
28+
A gRPC servicer for the backend service.
29+
30+
This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding.
31+
"""
32+
def Health(self, request, context):
33+
"""
34+
A gRPC method that returns the health status of the backend service.
35+
36+
Args:
37+
request: A HealthRequest object that contains the request parameters.
38+
context: A grpc.ServicerContext object that provides information about the RPC.
39+
40+
Returns:
41+
A Reply object that contains the health status of the backend service.
42+
"""
43+
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
44+
45+
def LoadModel(self, request, context):
46+
"""
47+
A gRPC method that loads a model into memory.
48+
49+
Args:
50+
request: A LoadModelRequest object that contains the request parameters.
51+
context: A grpc.ServicerContext object that provides information about the RPC.
52+
53+
Returns:
54+
A Result object that contains the result of the LoadModel operation.
55+
"""
56+
model_name = request.Model
57+
try:
58+
kwargs = {}
59+
if request.Type != "":
60+
kwargs['model_type'] = request.Type
61+
if request.PipelineType != "": # Reuse the PipelineType field for language
62+
kwargs['lang'] = request.PipelineType
63+
self.model_name = model_name
64+
self.model = Reranker(model_name, **kwargs)
65+
except Exception as err:
66+
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
67+
68+
# Implement your logic here for the LoadModel service
69+
# Replace this with your desired response
70+
return backend_pb2.Result(message="Model loaded successfully", success=True)
71+
72+
def Rerank(self, request, context):
73+
documents = []
74+
for idx, doc in enumerate(request.documents):
75+
documents.append(doc)
76+
ranked_results=self.model.rank(query=request.query, docs=documents, doc_ids=list(range(len(request.documents))))
77+
# Prepare results to return
78+
results = [
79+
backend_pb2.DocumentResult(
80+
index=res.doc_id,
81+
text=res.text,
82+
relevance_score=res.score
83+
) for res in ranked_results.results
84+
]
85+
86+
# Calculate the usage and total tokens
87+
# TODO: Implement the usage calculation with reranker
88+
total_tokens = sum(len(doc.split()) for doc in request.documents) + len(request.query.split())
89+
prompt_tokens = len(request.query.split())
90+
usage = backend_pb2.Usage(total_tokens=total_tokens, prompt_tokens=prompt_tokens)
91+
return backend_pb2.RerankResult(usage=usage, results=results)
92+
93+
def serve(address):
94+
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
95+
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
96+
server.add_insecure_port(address)
97+
server.start()
98+
print("Server started. Listening on: " + address, file=sys.stderr)
99+
100+
# Define the signal handler function
101+
def signal_handler(sig, frame):
102+
print("Received termination signal. Shutting down...")
103+
server.stop(0)
104+
sys.exit(0)
105+
106+
# Set the signal handlers for SIGINT and SIGTERM
107+
signal.signal(signal.SIGINT, signal_handler)
108+
signal.signal(signal.SIGTERM, signal_handler)
109+
110+
try:
111+
while True:
112+
time.sleep(_ONE_DAY_IN_SECONDS)
113+
except KeyboardInterrupt:
114+
server.stop(0)
115+
116+
if __name__ == "__main__":
117+
parser = argparse.ArgumentParser(description="Run the gRPC server.")
118+
parser.add_argument(
119+
"--addr", default="localhost:50051", help="The address to bind the server to."
120+
)
121+
args = parser.parse_args()
122+
123+
serve(args.addr)

0 commit comments

Comments
 (0)