Skip to content

Commit

Permalink
Update Response Matching Operator (#684)
Browse files Browse the repository at this point in the history
* Change score_response_matching to score_response_match

* Update exception

* v0.6.13
  • Loading branch information
Dominastorm authored Apr 12, 2024
1 parent 77fdcec commit 8b42c04
Show file tree
Hide file tree
Showing 6 changed files with 13 additions and 13 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]

[project]
name = "uptrain"
version = "0.6.12"
version = "0.6.13"
description = "UpTrain - tool to evaluate LLM applications on aspects like factual accuracy, response quality, retrieval quality, tonality, etc."
readme = "README.md"
maintainers = [{ name = "UpTrain AI Team", email = "oss@uptrain.ai" }]
Expand Down
12 changes: 6 additions & 6 deletions tests/test_builtins.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,13 +199,13 @@ def test_check_response_matching():
output = check.setup(settings).run(response_matching_dataset)
assert isinstance(output, pl.DataFrame)
assert (
"score_response_matching" in output.columns
"score_response_match" in output.columns
and "explanation_response_matching" in output.columns
)
assert (
output["score_response_matching"].dtype == pl.Float64
and len(output["score_response_matching"])
- output["score_response_matching"].null_count()
output["score_response_match"].dtype == pl.Float64
and len(output["score_response_match"])
- output["score_response_match"].null_count()
> 0
)
assert (
Expand Down Expand Up @@ -563,8 +563,8 @@ def test_check_guideline_adherence():
# check = CheckResponseMatching()
# output = check.setup(settings).run(dataset)
# assert isinstance(output, pl.DataFrame)
# assert "score_response_matching" in output.columns and "explanation_response_matching" in output.columns
# assert output["score_response_matching"].dtype == pl.Float64 and len(output["score_response_matching"]) - output["score_response_matching"].null_count() > 0
# assert "score_response_match" in output.columns and "explanation_response_matching" in output.columns
# assert output["score_response_match"].dtype == pl.Float64 and len(output["score_response_match"]) - output["score_response_match"].null_count() > 0
# assert output["explanation_response_matching"].dtype == pl.Utf8 and len(output["explanation_response_matching"]) - output["explanation_response_matching"].null_count() > 0


Expand Down
2 changes: 1 addition & 1 deletion uptrain/framework/builtins.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def CheckResponseMatching(method="llm"):
return Check(
name="response_matching_score",
operators=[ResponseMatchingScore(method=method)],
plots=[Histogram(x="score_response_matching")],
plots=[Histogram(x="score_response_match")],
)


Expand Down
2 changes: 1 addition & 1 deletion uptrain/framework/evalllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def __init__(self, settings: Settings = None, openai_api_key: str = None) -> Non
if self.settings.openai_api_key is not None and len(self.settings.openai_api_key):
response = check_openai_api_key(self.settings.openai_api_key)
if not response:
raise Exception("OpenAI API Key is invalid")
raise ValueError("OpenAI API Key is invalid")

self.executor = APIClientWithoutAuth(self.settings)

Expand Down
2 changes: 1 addition & 1 deletion uptrain/operators/language/prompts/few_shots.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@
"""


LANGUAGE_CRITIQUE_COHERENCE_FEW_SHOT__CLASSIFY = """
LANGUAGE_CRITIQUE_COHERENCE_FEW_SHOT__COT = """
[Response]: Exercise is beneficial for both physical and mental health. It strengthens the body and uplifts the mind.
[Output]:
{
Expand Down
6 changes: 3 additions & 3 deletions uptrain/operators/language/response_quality.py
Original file line number Diff line number Diff line change
Expand Up @@ -903,7 +903,7 @@ def evaluate_local(self, data):
precision = combined_row[0]["score_factual_accuracy"]
recall = combined_row[1]["score_factual_accuracy"]
output = {
"score_response_matching": None,
"score_response_match": None,
"explanation_response_matching": None,
"score_response_match_recall": None,
"score_response_match_precision": None,
Expand All @@ -921,11 +921,11 @@ def evaluate_local(self, data):
output["explanation_response_matching"] = explanation

if precision != 0 and recall != 0:
output["score_response_matching"] = 4 * (
output["score_response_match"] = 4 * (
(precision * recall) / (precision * 3 + recall)
)
else:
output["score_response_matching"] = 0.0
output["score_response_match"] = 0.0
output["score_response_match_recall"] = recall
output["score_response_match_precision"] = precision
results.append(output)
Expand Down

0 comments on commit 8b42c04

Please sign in to comment.