Skip to content

Commit

Permalink
fix: variable naming in multi-turn example
Browse files Browse the repository at this point in the history
  • Loading branch information
saikatmitra91 committed May 8, 2024
1 parent 3a1d20c commit c48c9de
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 11 deletions.
4 changes: 3 additions & 1 deletion examples/multi-turn-chat/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ async def execute(inputs, parameters):
openai.chat.completions.create
thread_length = len(messages)
return {
# setting the last response as the final output of the conversation
"value": messages[thread_length - 1].get("content", ""),
"metadata": {"thread": messages},
# saving the thread in metadata for eyeball and scoring output
"metadata": {"messages": messages},
}
20 changes: 10 additions & 10 deletions examples/multi-turn-chat/score.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,17 @@
from enum import Enum


class EvalResultEnum(Enum):
class ScorerResultEnum(Enum):
YES = "Yes"
NO = "No"


class EvaluationResult(TypedDict):
result: EvalResultEnum
class ScorerResult(TypedDict):
result: ScorerResultEnum
reason: str


async def llm_evaluation(output, criteria) -> EvaluationResult:
async def llm_score(output, criteria) -> ScorerResult:
system_prompt = "You are an expert evaluator who grades an output string based on a criteria. The output must fulfil the criteria to pass the evaluation."
openai = AsyncOpenAI()
completion = await openai.chat.completions.create(
Expand All @@ -40,8 +40,8 @@ async def llm_evaluation(output, criteria) -> EvaluationResult:
"result": {
"type": "string",
"enum": [
EvalResultEnum.YES.value,
EvalResultEnum.NO.value,
ScorerResultEnum.YES.value,
ScorerResultEnum.NO.value,
],
},
},
Expand All @@ -56,23 +56,23 @@ async def llm_evaluation(output, criteria) -> EvaluationResult:
},
)
response = completion.choices[0].message.tool_calls[0]
return EvaluationResult(**json.loads(response.function.arguments))
return ScorerResult(**json.loads(response.function.arguments))


async def evaluate(output, inputs):
thread: list[object] = output.get("metadata", {}).get("thread", [])
thread: list[object] = output.get("metadata", {}).get("messages", [])
assistant_responses: list[object] = [
obj for obj in thread if obj["role"] == "assistant"
]
success = 0
message = ""
total = len(inputs)
for idx, assistant_response in enumerate(assistant_responses):
score = await llm_evaluation(
score = await llm_score(
assistant_response.get("content"),
inputs[idx].get("acceptable_response", ""),
)
if score["result"] == EvalResultEnum.YES.value:
if score["result"] == ScorerResultEnum.YES.value:
success = success + 1

message = (
Expand Down

0 comments on commit c48c9de

Please sign in to comment.