enable ragas (#129)

Signed-off-by: XuhuiRen <xuhui.ren@intel.com>
opea-project · Jun 8, 2024 · 8a670ee · 8a670ee
1 parent 38f6461
commit 8a670ee
Show file tree

Hide file tree

Showing 9 changed files with 187 additions and 0 deletions.
diff --git a/comps/__init__.py b/comps/__init__.py
@@ -14,6 +14,8 @@
     LLMParamsDoc,
     SearchedDoc,
     TextDoc,
+    RAGASParams,
+    RAGASScores,
 )
 
 # Constants

diff --git a/comps/cores/mega/constants.py b/comps/cores/mega/constants.py
@@ -25,6 +25,7 @@ class ServiceType(Enum):
     VECTORSTORE = 8
     DATAPREP = 9
     UNDEFINED = 10
+    RAGAS = 11
 
 
 class MegaServiceEndpoint(Enum):
@@ -47,6 +48,7 @@ class MegaServiceEndpoint(Enum):
     RETRIEVAL = "/v1/retrieval"
     RERANKING = "/v1/reranking"
     GUARDRAILS = "/v1/guardrails"
+    RAGAS = "/v1/ragas"
     # COMMON
     LIST_SERVICE = "/v1/list_service"
     LIST_PARAMETERS = "/v1/list_parameters"
@@ -65,6 +67,7 @@ class MicroServiceEndpoint(Enum):
     RETRIEVAL = "/v1/microservice/retrieval"
     RERANKING = "/v1/microservice/reranking"
     GUARDRAILS = "/v1/microservice/guardrails"
+    RAGAS = "/v1/microservice/ragas"
 
     def __str__(self):
         return self.value
diff --git a/comps/cores/proto/docarray.py b/comps/cores/proto/docarray.py
@@ -79,3 +79,17 @@ class LLMParams(BaseDoc):
     temperature: float = 0.01
     repetition_penalty: float = 1.03
     streaming: bool = True
+
+
+class RAGASParams(BaseDoc):
+    questions: DocList[TextDoc]
+    answers: DocList[TextDoc]
+    docs: DocList[TextDoc]
+    ground_truths: DocList[TextDoc]
+
+
+class RAGASScores(BaseDoc):
+    answer_relevancy: float
+    faithfulness: float
+    context_recallL: float
+    context_precision: float
diff --git a/comps/ragas/tgi/Dockerfile b/comps/ragas/tgi/Dockerfile
@@ -0,0 +1,26 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM langchain/langchain:latest
+
+RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
+    libgl1-mesa-glx \
+    libjemalloc-dev \
+    vim
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p /home/user && \
+    chown -R user /home/user/
+
+USER user
+
+COPY comps /home/user/comps
+
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r /home/user/comps/ragas/tgi/requirements.txt
+
+ENV PYTHONPATH=$PYTHONPATH:/home/user
+
+WORKDIR /home/user/comps/ragas/tgi/
+
+ENTRYPOINT ["python", "llm.py"]
diff --git a/comps/ragas/tgi/__init__.py b/comps/ragas/tgi/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
diff --git a/comps/ragas/tgi/build_docker.sh b/comps/ragas/tgi/build_docker.sh
@@ -0,0 +1,8 @@
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+git clone https://github.com/huggingface/tgi-gaudi.git
+cd ./tgi-gaudi/
+docker build -t ghcr.io/huggingface/tgi-gaudi:1.2.1 . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy
diff --git a/comps/ragas/tgi/docker_compose_llm.yaml b/comps/ragas/tgi/docker_compose_llm.yaml
@@ -0,0 +1,33 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+version: "3.8"
+
+services:
+  tgi_service:
+    image: ghcr.io/huggingface/text-generation-inference:1.4
+    container_name: tgi-service
+    ports:
+      - "8008:80"
+    volumes:
+      - "./data:/data"
+    shm_size: 1g
+    command: --model-id ${LLM_MODEL_ID}
+  llm:
+    image: opea/gen-ai-comps:llm-tgi-server:latest
+    container_name: llm-tgi-server
+    ports:
+      - "9000:9000"
+    ipc: host
+    environment:
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
+      TEI_ENDPOINT: ${TEI_ENDPOINT}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+    restart: unless-stopped
+
+networks:
+  default:
+    driver: bridge
diff --git a/comps/ragas/tgi/llm.py b/comps/ragas/tgi/llm.py
@@ -0,0 +1,86 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+
+from datasets import Dataset
+from langchain_community.embeddings import (
+    HuggingFaceBgeEmbeddings,
+    HuggingFaceEmbeddings,
+    HuggingFaceHubEmbeddings,
+    HuggingFaceInstructEmbeddings,
+)
+from langchain_community.llms import HuggingFaceEndpoint
+from langsmith import traceable
+from ragas import evaluate
+from ragas.metrics import answer_relevancy, context_precision, context_recall, faithfulness
+
+from comps import GeneratedDoc, RAGASParams, RAGASScores, ServiceType, opea_microservices, register_microservice
+
+tei_embedding_endpoint = os.getenv("TEI_ENDPOINT")
+EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5")
+
+
+@register_microservice(
+    name="opea_service@ragas_tgi_llm",
+    service_type=ServiceType.RAGAS,
+    endpoint="/v1/ragas",
+    host="0.0.0.0",
+    port=9050,
+    input_datatype=RAGASParams,
+    output_datatype=RAGASScores,
+)
+@traceable(run_type="llm")
+def llm_generate(input: RAGASParams):
+    llm_endpoint = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080")
+
+    # Create vectorstore
+    if tei_embedding_endpoint:
+        # create embeddings using TEI endpoint service
+        embedder = HuggingFaceHubEmbeddings(model=tei_embedding_endpoint)
+    else:
+        # create embeddings using local embedding model
+        embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL)
+
+    llm = HuggingFaceEndpoint(
+        endpoint_url=llm_endpoint,
+        max_new_tokens=input.max_new_tokens,
+        top_k=input.top_k,
+        top_p=input.top_p,
+        typical_p=input.typical_p,
+        temperature=input.temperature,
+        repetition_penalty=input.repetition_penalty,
+        streaming=input.streaming,
+        timeout=600,
+    )
+
+    data_collections = {
+        "question": input.questions,
+        "answer": input.answers,
+        "docs": input.docs,
+        "ground_truth": input.groundtruths,
+    }
+    dataset = Dataset.from_dict(data_collections)
+
+    score = evaluate(
+        dataset,
+        metrics=[answer_relevancy, faithfulness, context_recall, context_precision],
+        llm=llm,
+        embeddings=embedder,
+    )
+    df = score.to_pandas()
+    answer_relevancy_average = df["answer_relevancy"][:].mean()
+    faithfulness_average = df["faithfulness"][:].mean()
+    context_recall_average = df["context_recall"][:].mean()
+    context_precision_average = df["context_precision"][:].mean()
+
+    return RAGASScores(
+        answer_relevancy=answer_relevancy_average,
+        faithfulness=faithfulness_average,
+        context_recallL=context_recall_average,
+        context_precision=context_precision_average,
+    )
+
+
+if __name__ == "__main__":
+    opea_microservices["opea_service@llm_tgi"].start()
diff --git a/comps/ragas/tgi/requirements.txt b/comps/ragas/tgi/requirements.txt
@@ -0,0 +1,13 @@
+datasets
+docarray[full]
+fastapi
+huggingface_hub
+langchain==0.1.16
+langserve
+langsmith
+opentelemetry-api
+opentelemetry-exporter-otlp
+opentelemetry-sdk
+ragas
+shortuuid
+transformers