diff --git a/comps/__init__.py b/comps/__init__.py index 46f21b29b..624260e99 100644 --- a/comps/__init__.py +++ b/comps/__init__.py @@ -14,6 +14,8 @@ LLMParamsDoc, SearchedDoc, TextDoc, + RAGASParams, + RAGASScores, ) # Constants diff --git a/comps/cores/mega/constants.py b/comps/cores/mega/constants.py index bc0a97d48..35d36f215 100644 --- a/comps/cores/mega/constants.py +++ b/comps/cores/mega/constants.py @@ -25,6 +25,7 @@ class ServiceType(Enum): VECTORSTORE = 8 DATAPREP = 9 UNDEFINED = 10 + RAGAS = 11 class MegaServiceEndpoint(Enum): @@ -47,6 +48,7 @@ class MegaServiceEndpoint(Enum): RETRIEVAL = "/v1/retrieval" RERANKING = "/v1/reranking" GUARDRAILS = "/v1/guardrails" + RAGAS = "/v1/ragas" # COMMON LIST_SERVICE = "/v1/list_service" LIST_PARAMETERS = "/v1/list_parameters" @@ -65,6 +67,7 @@ class MicroServiceEndpoint(Enum): RETRIEVAL = "/v1/microservice/retrieval" RERANKING = "/v1/microservice/reranking" GUARDRAILS = "/v1/microservice/guardrails" + RAGAS = "/v1/microservice/ragas" def __str__(self): return self.value diff --git a/comps/cores/proto/docarray.py b/comps/cores/proto/docarray.py index 710c4add8..c657d0c2e 100644 --- a/comps/cores/proto/docarray.py +++ b/comps/cores/proto/docarray.py @@ -79,3 +79,17 @@ class LLMParams(BaseDoc): temperature: float = 0.01 repetition_penalty: float = 1.03 streaming: bool = True + + +class RAGASParams(BaseDoc): + questions: DocList[TextDoc] + answers: DocList[TextDoc] + docs: DocList[TextDoc] + ground_truths: DocList[TextDoc] + + +class RAGASScores(BaseDoc): + answer_relevancy: float + faithfulness: float + context_recallL: float + context_precision: float diff --git a/comps/ragas/tgi/Dockerfile b/comps/ragas/tgi/Dockerfile new file mode 100644 index 000000000..f38bc7849 --- /dev/null +++ b/comps/ragas/tgi/Dockerfile @@ -0,0 +1,26 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM langchain/langchain:latest + +RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ + libgl1-mesa-glx \ + libjemalloc-dev \ + vim + +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + +USER user + +COPY comps /home/user/comps + +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r /home/user/comps/ragas/tgi/requirements.txt + +ENV PYTHONPATH=$PYTHONPATH:/home/user + +WORKDIR /home/user/comps/ragas/tgi/ + +ENTRYPOINT ["python", "llm.py"] diff --git a/comps/ragas/tgi/__init__.py b/comps/ragas/tgi/__init__.py new file mode 100644 index 000000000..916f3a44b --- /dev/null +++ b/comps/ragas/tgi/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/ragas/tgi/build_docker.sh b/comps/ragas/tgi/build_docker.sh new file mode 100644 index 000000000..acab3fae4 --- /dev/null +++ b/comps/ragas/tgi/build_docker.sh @@ -0,0 +1,8 @@ + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +git clone https://github.com/huggingface/tgi-gaudi.git +cd ./tgi-gaudi/ +docker build -t ghcr.io/huggingface/tgi-gaudi:1.2.1 . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy diff --git a/comps/ragas/tgi/docker_compose_llm.yaml b/comps/ragas/tgi/docker_compose_llm.yaml new file mode 100644 index 000000000..ebc6291af --- /dev/null +++ b/comps/ragas/tgi/docker_compose_llm.yaml @@ -0,0 +1,33 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +version: "3.8" + +services: + tgi_service: + image: ghcr.io/huggingface/text-generation-inference:1.4 + container_name: tgi-service + ports: + - "8008:80" + volumes: + - "./data:/data" + shm_size: 1g + command: --model-id ${LLM_MODEL_ID} + llm: + image: opea/gen-ai-comps:llm-tgi-server:latest + container_name: llm-tgi-server + ports: + - "9000:9000" + ipc: host + environment: + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} + TEI_ENDPOINT: ${TEI_ENDPOINT} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + restart: unless-stopped + +networks: + default: + driver: bridge diff --git a/comps/ragas/tgi/llm.py b/comps/ragas/tgi/llm.py new file mode 100644 index 000000000..f31c66657 --- /dev/null +++ b/comps/ragas/tgi/llm.py @@ -0,0 +1,86 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os + +from datasets import Dataset +from langchain_community.embeddings import ( + HuggingFaceBgeEmbeddings, + HuggingFaceEmbeddings, + HuggingFaceHubEmbeddings, + HuggingFaceInstructEmbeddings, +) +from langchain_community.llms import HuggingFaceEndpoint +from langsmith import traceable +from ragas import evaluate +from ragas.metrics import answer_relevancy, context_precision, context_recall, faithfulness + +from comps import GeneratedDoc, RAGASParams, RAGASScores, ServiceType, opea_microservices, register_microservice + +tei_embedding_endpoint = os.getenv("TEI_ENDPOINT") +EMBED_MODEL = os.getenv("EMBED_MODEL", "BAAI/bge-base-en-v1.5") + + +@register_microservice( + name="opea_service@ragas_tgi_llm", + service_type=ServiceType.RAGAS, + endpoint="/v1/ragas", + host="0.0.0.0", + port=9050, + input_datatype=RAGASParams, + output_datatype=RAGASScores, +) +@traceable(run_type="llm") +def llm_generate(input: RAGASParams): + llm_endpoint = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080") + + # Create vectorstore + if tei_embedding_endpoint: + # create embeddings using TEI endpoint service + embedder = HuggingFaceHubEmbeddings(model=tei_embedding_endpoint) + else: + # create embeddings using local embedding model + embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) + + llm = HuggingFaceEndpoint( + endpoint_url=llm_endpoint, + max_new_tokens=input.max_new_tokens, + top_k=input.top_k, + top_p=input.top_p, + typical_p=input.typical_p, + temperature=input.temperature, + repetition_penalty=input.repetition_penalty, + streaming=input.streaming, + timeout=600, + ) + + data_collections = { + "question": input.questions, + "answer": input.answers, + "docs": input.docs, + "ground_truth": input.groundtruths, + } + dataset = Dataset.from_dict(data_collections) + + score = evaluate( + dataset, + metrics=[answer_relevancy, faithfulness, context_recall, context_precision], + llm=llm, + embeddings=embedder, + ) + df = score.to_pandas() + answer_relevancy_average = df["answer_relevancy"][:].mean() + faithfulness_average = df["faithfulness"][:].mean() + context_recall_average = df["context_recall"][:].mean() + context_precision_average = df["context_precision"][:].mean() + + return RAGASScores( + answer_relevancy=answer_relevancy_average, + faithfulness=faithfulness_average, + context_recallL=context_recall_average, + context_precision=context_precision_average, + ) + + +if __name__ == "__main__": + opea_microservices["opea_service@llm_tgi"].start() diff --git a/comps/ragas/tgi/requirements.txt b/comps/ragas/tgi/requirements.txt new file mode 100644 index 000000000..d0a85eb44 --- /dev/null +++ b/comps/ragas/tgi/requirements.txt @@ -0,0 +1,13 @@ +datasets +docarray[full] +fastapi +huggingface_hub +langchain==0.1.16 +langserve +langsmith +opentelemetry-api +opentelemetry-exporter-otlp +opentelemetry-sdk +ragas +shortuuid +transformers