nuclia · carlesonielfa · Dec 18, 2024 · Dec 5, 2024 · Dec 5, 2024 · Dec 11, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,7 +3,7 @@
 ## 4.3.12 (unreleased)
 
 
-- Nothing changed yet.
+- Feature: Add NUA REMi endpoints
 
 
 ## 4.3.11 (2024-12-18)

diff --git a/docs/07-nua.md b/docs/07-nua.md
@@ -149,6 +149,34 @@ res = predict.rephrase(
 )
 ```
 
+It can also evaluate a RAG Experience using our REMi model, which will compute the Answer Relevance and Context Relevance and Groundedness scores for each context. More information about the REMi model can be found [here](https://nuclia.com/developers/remi-open-source-rag-evaluation-model/).
+
+- CLI:
+
+  ```bash
+  nuclia nua predict remi --user_id="Nuclia PY CLI" --question="What is the capital of France?" --answer="Paris is the capital of France" --contexts='["Paris is the capital of France.", "Berlin is the capital of Germany."]'
+  > time=1.0570876598358154 answer_relevance=AnswerRelevance(score=5, reason='The response is accurate and directly answers the query completely.') context_relevance=[5, 0] groundedness=[5, 0]
+  ```
+
+- SDK:
+
+    ```python
+    from nuclia import sdk
+    from nuclia_models.predict.remi import RemiRequest
+    predict = sdk.NucliaPredict()
+    predict.remi(
+        RemiRequest(
+            user_id="Nuclia PY CLI",
+            question="What is the capital of France?",
+            answer="Paris is the capital of France!",
+            contexts=[
+                "Paris is the capital of France.",
+                "Berlin is the capital of Germany.",
+            ],
+        )
+    )
+    ```
+
 ### Agent
 
 `agent` allows to generate LLM agents from an initial prompt:

diff --git a/nuclia/lib/nua.py b/nuclia/lib/nua.py
@@ -49,13 +49,15 @@
     TextGenerativeResponse,
     Tokens,
 )
+from nuclia_models.predict.remi import RemiRequest, RemiResponse
 
 SENTENCE_PREDICT = "/api/v1/predict/sentence"
 CHAT_PREDICT = "/api/v1/predict/chat"
 SUMMARIZE_PREDICT = "/api/v1/predict/summarize"
 REPHRASE_PREDICT = "/api/v1/predict/rephrase"
 TOKENS_PREDICT = "/api/v1/predict/tokens"
 QUERY_PREDICT = "/api/v1/predict/query"
+REMI_PREDICT = "/api/v1/predict/remi"
 UPLOAD_PROCESS = "/api/v1/processing/upload"
 STATUS_PROCESS = "/api/v2/processing/status"
 PUSH_PROCESS = "/api/v2/processing/push"
@@ -286,6 +288,18 @@ def rephrase(
             output=RephraseModel,
         )
 
+    def remi(
+        self,
+        request: RemiRequest,
+    ) -> RemiResponse:
+        endpoint = f"{self.url}{REMI_PREDICT}"
+        return self._request(
+            "POST",
+            endpoint,
+            payload=request.model_dump(),
+            output=RemiResponse,
+        )
+
     def process_file(self, path: str, kbid: str = "default") -> PushResponseV2:
         filename = path.split("/")[-1]
         upload_endpoint = f"{self.url}{UPLOAD_PROCESS}"
@@ -593,6 +607,15 @@ async def rephrase(
             output=RephraseModel,
         )
 
+    async def remi(self, request: RemiRequest) -> RemiResponse:
+        endpoint = f"{self.url}{REMI_PREDICT}"
+        return await self._request(
+            "POST",
+            endpoint,
+            payload=request.model_dump(),
+            output=RemiResponse,
+        )
+
     async def generate_retrieval(
         self,
         question: str,

diff --git a/nuclia/sdk/predict.py b/nuclia/sdk/predict.py
@@ -18,6 +18,7 @@
     UserPrompt,
 )
 from nuclia.sdk.auth import NucliaAuth
+from nuclia_models.predict.remi import RemiRequest, RemiResponse
 
 
 class NucliaPredict:
@@ -142,6 +143,23 @@ def rag(
 
         return nc.generate(body, model)
 
+    @nua
+    def remi(self, request: Optional[RemiRequest] = None, **kwargs) -> RemiResponse:
+        """
+        Perform a REMi evaluation over a RAG experience
+
+        **SDK Usage:**
+        nuclia nua predict remi --user_id="user" --question="question" --answer="answer" --contexts='["context1", "contex2"]'
+
+        :param request: RemiRequest
+        :return: RemiResponse
+        """
+        # If we didn't get a request model, we'll build it from the kwargs for SDK compatibility
+        if request is None:
+            request = RemiRequest(**kwargs)
+        nc: NuaClient = kwargs["nc"]
+        return nc.remi(request)
+
 
 class AsyncNucliaPredict:
     @property
@@ -262,3 +280,20 @@ async def rag(
     ) -> ChatResponse:
         nc: AsyncNuaClient = kwargs["nc"]
         return await nc.generate_retrieval(question, context, model)
+
+    @nua
+    async def remi(
+        self, request: Optional[RemiRequest] = None, **kwargs
+    ) -> RemiResponse:
+        """
+        Perform a REMi evaluation over a RAG experience
+
+        :param request: RemiRequest
+        :return: RemiResponse
+        """
+        # If we didn't get a request model, we'll build it from the kwargs for SDK compatibility
+        if request is None:
+            request = RemiRequest(**kwargs)
+
+        nc: AsyncNuaClient = kwargs["nc"]
+        return await nc.remi(request)
diff --git a/nuclia/tests/test_nua/test_predict.py b/nuclia/tests/test_nua/test_predict.py
@@ -1,5 +1,7 @@
 from nuclia.lib.nua_responses import ChatModel, TextGenerativeResponse, UserPrompt
 from nuclia.sdk.predict import AsyncNucliaPredict, NucliaPredict
+import pytest
+from nuclia_models.predict.remi import RemiRequest
 
 
 def test_predict(testing_config):
@@ -119,3 +121,48 @@ async def test_nua_parse(testing_config):
         )
     )
     assert "SPORTS" in results.object["document_type"]
+
+
+def test_nua_remi(testing_config):
+    np = NucliaPredict()
+    results = np.remi(
+        RemiRequest(
+            user_id="Nuclia PY CLI",
+            question="What is the capital of France?",
+            answer="Paris is the capital of france!",
+            contexts=[
+                "Paris is the capital of France.",
+                "Berlin is the capital of Germany.",
+            ],
+        )
+    )
+    assert results.answer_relevance.score >= 4
+
+    assert results.context_relevance[0] >= 4
+    assert results.groundedness[0] >= 4
+
+    assert results.context_relevance[1] < 2
+    assert results.groundedness[1] < 2
+
+
+@pytest.mark.asyncio
+async def test_nua_async_remi(testing_config):
+    np = AsyncNucliaPredict()
+    results = await np.remi(
+        RemiRequest(
+            user_id="Nuclia PY CLI",
+            question="What is the capital of France?",
+            answer="Paris is the capital of france!",
+            contexts=[
+                "Paris is the capital of France.",
+                "Berlin is the capital of Germany.",
+            ],
+        )
+    )
+    assert results.answer_relevance.score >= 4
+
+    assert results.context_relevance[0] >= 4
+    assert results.groundedness[0] >= 4
+
+    assert results.context_relevance[1] < 2
+    assert results.groundedness[1] < 2