Update Response Matching Operator (#684)

* Change score_response_matching to score_response_match * Update exception * v0.6.13
uptrain-ai · Apr 12, 2024 · 8b42c04 · 8b42c04
1 parent 77fdcec
commit 8b42c04
Show file tree

Hide file tree

Showing 6 changed files with 13 additions and 13 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
 
 [project]
 name = "uptrain"
-version = "0.6.12"
+version = "0.6.13"
 description = "UpTrain - tool to evaluate LLM applications on aspects like factual accuracy, response quality, retrieval quality, tonality, etc."
 readme = "README.md"
 maintainers = [{ name = "UpTrain AI Team", email = "oss@uptrain.ai" }]

diff --git a/tests/test_builtins.py b/tests/test_builtins.py
@@ -199,13 +199,13 @@ def test_check_response_matching():
     output = check.setup(settings).run(response_matching_dataset)
     assert isinstance(output, pl.DataFrame)
     assert (
-        "score_response_matching" in output.columns
+        "score_response_match" in output.columns
         and "explanation_response_matching" in output.columns
     )
     assert (
-        output["score_response_matching"].dtype == pl.Float64
-        and len(output["score_response_matching"])
-        - output["score_response_matching"].null_count()
+        output["score_response_match"].dtype == pl.Float64
+        and len(output["score_response_match"])
+        - output["score_response_match"].null_count()
         > 0
     )
     assert (
@@ -563,8 +563,8 @@ def test_check_guideline_adherence():
 #     check = CheckResponseMatching()
 #     output = check.setup(settings).run(dataset)
 #     assert isinstance(output, pl.DataFrame)
-#     assert "score_response_matching" in output.columns and "explanation_response_matching" in output.columns
-#     assert output["score_response_matching"].dtype == pl.Float64 and len(output["score_response_matching"]) - output["score_response_matching"].null_count() > 0
+#     assert "score_response_match" in output.columns and "explanation_response_matching" in output.columns
+#     assert output["score_response_match"].dtype == pl.Float64 and len(output["score_response_match"]) - output["score_response_match"].null_count() > 0
 #     assert output["explanation_response_matching"].dtype == pl.Utf8 and len(output["explanation_response_matching"]) - output["explanation_response_matching"].null_count() > 0
 
 

diff --git a/uptrain/framework/builtins.py b/uptrain/framework/builtins.py
@@ -76,7 +76,7 @@ def CheckResponseMatching(method="llm"):
     return Check(
         name="response_matching_score",
         operators=[ResponseMatchingScore(method=method)],
-        plots=[Histogram(x="score_response_matching")],
+        plots=[Histogram(x="score_response_match")],
     )
 
 

diff --git a/uptrain/framework/evalllm.py b/uptrain/framework/evalllm.py
@@ -98,7 +98,7 @@ def __init__(self, settings: Settings = None, openai_api_key: str = None) -> Non
         if self.settings.openai_api_key is not None and len(self.settings.openai_api_key):
             response = check_openai_api_key(self.settings.openai_api_key)
             if not response:
-                raise Exception("OpenAI API Key is invalid")
+                raise ValueError("OpenAI API Key is invalid")
 
         self.executor = APIClientWithoutAuth(self.settings)
 

diff --git a/uptrain/operators/language/prompts/few_shots.py b/uptrain/operators/language/prompts/few_shots.py
@@ -438,7 +438,7 @@
 """
 
 
-LANGUAGE_CRITIQUE_COHERENCE_FEW_SHOT__CLASSIFY = """
+LANGUAGE_CRITIQUE_COHERENCE_FEW_SHOT__COT = """
 [Response]: Exercise is beneficial for both physical and mental health. It strengthens the body and uplifts the mind.
 [Output]:
 {

diff --git a/uptrain/operators/language/response_quality.py b/uptrain/operators/language/response_quality.py
@@ -903,7 +903,7 @@ def evaluate_local(self, data):
             precision = combined_row[0]["score_factual_accuracy"]
             recall = combined_row[1]["score_factual_accuracy"]
             output = {
-                "score_response_matching": None,
+                "score_response_match": None,
                 "explanation_response_matching": None,
                 "score_response_match_recall": None,
                 "score_response_match_precision": None,
@@ -921,11 +921,11 @@ def evaluate_local(self, data):
                 output["explanation_response_matching"] = explanation
 
                 if precision != 0 and recall != 0:
-                    output["score_response_matching"] = 4 * (
+                    output["score_response_match"] = 4 * (
                         (precision * recall) / (precision * 3 + recall)
                     )
                 else:
-                    output["score_response_matching"] = 0.0
+                    output["score_response_match"] = 0.0
                 output["score_response_match_recall"] = recall
                 output["score_response_match_precision"] = precision
             results.append(output)