Skip to content

Commit

Permalink
feat(evals): extend llm_generate to support an output_parser
Browse files Browse the repository at this point in the history
  • Loading branch information
mikeldking committed Nov 13, 2023
1 parent abddded commit 1cd691e
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 5 deletions.
28 changes: 23 additions & 5 deletions src/phoenix/experimental/evals/functions/generate.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import logging
from typing import Optional, Union
from typing import Callable, Dict, Optional, Union

import pandas as pd

Expand All @@ -9,12 +9,17 @@
logger = logging.getLogger(__name__)


def _no_op_parser(response: str) -> Dict[str, str]:
return {"output": response}


def llm_generate(
dataframe: pd.DataFrame,
template: Union[PromptTemplate, str],
model: BaseEvalModel,
system_instruction: Optional[str] = None,
verbose: bool = False,
output_parser: Optional[Callable[[str], Dict[str, str]]] = None,
) -> pd.DataFrame:
"""
Generates a text using a template using an LLM. This function is useful
Expand All @@ -38,16 +43,29 @@ def llm_generate(
verbose (bool, optional): If True, prints detailed information to stdout such as model
invocation parameters and retry info. Default False.
output_parser (Optional[Callable[[str], Dict[str, str]]], optional): An optional function
that takes each generated response and parses it to a dictionary. The keys of the dictionary
should correspond to the column names of the output dataframe. If None, the output dataframe
will have a single column named "output". Default None.
Returns:
pandas.DataFrame: A dataframe where the `output` column contains the
LLM output.
pandas.DataFrame: A dataframe where each row represents the generated output
"""

with set_verbosity(model, verbose) as verbose_model:
template = normalize_template(template)
logger.info(f"Template: \n{template.text}\n")
logger.info(f"Template variables: {template.variables}")
prompts = map_template(dataframe, template)

responses = verbose_model.generate(prompts.to_list(), system_instruction)
return pd.DataFrame({"output": responses})
# For each prompt, generate and parse the response
output = []
for prompt in prompts:
logger.info(f"Prompt: {prompt}")
response = verbose_model(prompt, instruction=system_instruction)
parsed_response = output_parser(response)
output.append(parsed_response)

# Return the data as a dataframe
return pd.DataFrame(output)
69 changes: 69 additions & 0 deletions tests/experimental/evals/functions/test_generate.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import json
from typing import Dict
from unittest.mock import patch

import httpx
import numpy as np
import pandas as pd
import pytest
import respx
Expand Down Expand Up @@ -107,3 +110,69 @@ def test_llm_generate_prints_info_with_verbose_flag(

out, _ = capfd.readouterr()
assert "Generating responses for 4 prompts..." in out, "Response generation should be printed"


@pytest.mark.respx(base_url="https://api.openai.com/v1/chat/completions")
def test_llm_generate_with_output_parser(monkeypatch: pytest.MonkeyPatch, respx_mock: respx.mock):
monkeypatch.setenv(OPENAI_API_KEY_ENVVAR_NAME, "sk-0123456789")
dataframe = pd.DataFrame(
[
{
"query": "What is Python?",
},
{
"query": "What is Python?",
},
{
"query": "What is C++?",
},
{
"query": "What is C++?",
},
{
"query": "gobbledygook",
},
]
)
responses = [
'{ "category": "programming", "language": "Python" }',
'{ "category": "programming", "language": "Python" }',
'{ "category": "programming", "language": "C++" }',
'{ "category": "programming", "language": "C++" }',
"unparsable response",
]
queries = dataframe["query"].tolist()

for query, response in zip(queries, responses):
matcher = M(content__contains=query) & M(content__contains=query)
respx_mock.route(matcher).mock(
return_value=httpx.Response(200, json={"choices": [{"message": {"content": response}}]})
)

template = "Given {query}, generate output"

with patch.object(OpenAIModel, "_init_tiktoken", return_value=None):
model = OpenAIModel()

def output_parser(response: str) -> Dict[str, str]:
try:
return json.loads(response)
except json.JSONDecodeError as e:
return {"__error__": str(e)}

generated = llm_generate(
dataframe=dataframe, template=template, model=model, output_parser=output_parser
)
# check the output is parsed correctly
assert generated["category"].tolist() == [
"programming",
"programming",
"programming",
"programming",
np.nan,
]

# check the unparsable response captures the error
assert generated["__error__"].tolist() == [np.nan] * 4 + [
"Expecting value: line 1 column 1 (char 0)"
]

0 comments on commit 1cd691e

Please sign in to comment.