Skip to content

Commit

Permalink
PR comments
Browse files Browse the repository at this point in the history
  • Loading branch information
CG80499 committed Oct 2, 2023
1 parent 8374eb6 commit b33e32e
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 7 deletions.
4 changes: 2 additions & 2 deletions docs/extras/guides/evaluation/string/scoring_eval_chain.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"source": [
"# Overall quality evaluation\n",
"\n",
"In scenarios where you wish to score a model's output based on a criteria set and/or reference answer, the `Score` evaluator can be helpful. This is most useful for comparing the performance of different models on a given task.\n",
"In scenarios where you wish to score a model's output from 1-10 based on a criteria set and/or reference answer, the `Score` evaluator can be helpful. This is most useful for comparing the performance of different models on a given task.\n",
"\n",
"Refer to the documentation of the [ScoreStringEvalChain](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.scoring.eval_chain.ScoreStringEvalChain.html#langchain.evaluation.scoring.eval_chain.ScoreStringEvalChain) class for full details.\n",
"\n",
Expand Down Expand Up @@ -113,7 +113,7 @@
"- reasoning: String \"chain of thought reasoning\" from the LLM generated prior to creating the score\n",
"\n",
"\n",
"Similar to [CriteriaEvalChain](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.criteria.eval_chain.CriteriaEvalChain.html#langchain.evaluation.criteria.eval_chain.CriteriaEvalChain) you also load the \"labeled_score_string\" evaluator for scoring labeled outputs."
"Similar to [CriteriaEvalChain](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.criteria.eval_chain.CriteriaEvalChain.html#langchain.evaluation.criteria.eval_chain.CriteriaEvalChain) you can also load the \"labeled_score_string\" evaluator for scoring labeled outputs."
]
}
],
Expand Down
10 changes: 5 additions & 5 deletions libs/langchain/langchain/evaluation/scoring/eval_chain.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Base classes for comparing the output of two models."""
"""Base classes for scoring the output of a model on a scale of 1-10."""
from __future__ import annotations

import logging
Expand Down Expand Up @@ -48,7 +48,7 @@
}


def resolve_pairwise_criteria(
def resolve_criteria(
criteria: Optional[Union[CRITERIA_TYPE, str, List[CRITERIA_TYPE]]]
) -> dict:
"""Resolve the criteria for the pairwise evaluator.
Expand Down Expand Up @@ -81,7 +81,7 @@ def resolve_pairwise_criteria(
criteria_ = {
k: v
for criterion in criteria
for k, v in resolve_pairwise_criteria(criterion).items()
for k, v in resolve_criteria(criterion).items()
}
else:
if not criteria:
Expand Down Expand Up @@ -252,7 +252,7 @@ def from_llm(
f"Input variables should be {expected_input_vars}, "
f"but got {prompt_.input_variables}"
)
criteria_ = resolve_pairwise_criteria(criteria)
criteria_ = resolve_criteria(criteria)
criteria_str = "\n".join(f"{k}: {v}" if v else k for k, v in criteria_.items())
criteria_str = (
CRITERIA_INSTRUCTIONS + criteria_str if criteria_str else DEFAULT_CRITERIA
Expand Down Expand Up @@ -421,7 +421,7 @@ def from_llm(
f"Input variables should be {expected_input_vars}, "
f"but got {prompt_.input_variables}"
)
criteria_ = resolve_pairwise_criteria(criteria)
criteria_ = resolve_criteria(criteria)
criteria_str = "\n".join(f"{k}: {v}" for k, v in criteria_.items())
criteria_str = CRITERIA_INSTRUCTIONS + criteria_str if criteria_str else ""
return cls(llm=llm, prompt=prompt_.partial(criteria=criteria_str), **kwargs)

0 comments on commit b33e32e

Please sign in to comment.