Skip to content

Commit

Permalink
feat: add long-context evaluators, including map reduce and refine pa…
Browse files Browse the repository at this point in the history
…tterns (#1710)
  • Loading branch information
axiomofjoy authored Nov 7, 2023
1 parent 831c041 commit 0c3b105
Show file tree
Hide file tree
Showing 2 changed files with 467 additions and 0 deletions.
139 changes: 139 additions & 0 deletions src/phoenix/experimental/evals/evaluators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
from typing import List, Optional

from phoenix.experimental.evals import PromptTemplate
from phoenix.experimental.evals.models import BaseEvalModel


class MapReducer:
"""
Evaluates data that is too large to fit into a single context window using a
map-reduce strategy. The data must first be divided into "chunks" that
individually fit into an LLM's context window. Each chunk of data is
individually evaluated (the "map" step), producing intermediate outputs that
are combined into a single result (the "reduce" step).
This is the simplest strategy for evaluating long-context data.
"""

def __init__(
self,
model: BaseEvalModel,
map_prompt_template: PromptTemplate,
reduce_prompt_template: PromptTemplate,
) -> None:
"""Initializes an instance.
Args:
model (BaseEvalModel): The LLM model to use for evaluation.
map_prompt_template (PromptTemplate): The template that is mapped
over each chunk to produce intermediate outputs. Must contain the
{chunk} placeholder.
reduce_prompt_template (PromptTemplate): The template that combines
the intermediate outputs into a single result. Must contain the
{mapped} placeholder, which will be formatted as a list of the
intermediate outputs produced by the map step.
"""
self._model = model
self._map_prompt_template = map_prompt_template
self._reduce_prompt_template = reduce_prompt_template

def evaluate(self, chunks: List[str]) -> str:
"""Evaluates a list of two or more chunks.
Args:
chunks (List[str]): A list of chunks to be evaluated. Each chunk is
inserted into the map_prompt_template and must therefore fit within
the LLM's context window and still leave room for the rest of the
prompt.
Returns:
str: The output of the map-reduce process.
"""
if len(chunks) < 2:
raise ValueError(
"The map-reduce strategy is not needed to evaluate data "
"that fits within a single context window. "
"Consider using llm_classify instead."
)
model = self._model
mapped_records = []
for chunk in chunks:
map_prompt = self._map_prompt_template.format({"chunk": chunk})
intermediate_output = model(map_prompt)
mapped_records.append(intermediate_output)
reduce_prompt = self._reduce_prompt_template.format({"mapped": repr(mapped_records)})
return model(reduce_prompt)


class Refiner:
"""
Evaluates data that is too large to fit into a single context window using a
refine strategy. The data must first be divided into "chunks" that
individually fit into an LLM's context window. An initial "accumulator" is
generated from the first chunk of data. The accumulator is subsequently
refined by iteratively updating and incorporating new information from each
subsequent chunk. An optional synthesis step can be used to synthesize the
final accumulator into a desired format.
"""

def __init__(
self,
model: BaseEvalModel,
initial_prompt_template: PromptTemplate,
refine_prompt_template: PromptTemplate,
synthesize_prompt_template: Optional[PromptTemplate] = None,
) -> None:
"""Initializes an instance.
Args:
model (BaseEvalModel): The LLM model to use for evaluation.
initial_prompt_template (PromptTemplate): The template for the
initial invocation of the model that will generate the initial
accumulator. Should contain the {chunk} placeholder.
refine_prompt_template (PromptTemplate): The template for refining
the accumulator across all subsequence chunks. Must contain the
{chunk} and {accumulator} placeholders.
synthesize_prompt_template (Optional[PromptTemplate], optional): An
optional template to synthesize the final version of the
accumulator. Must contain the {accumulator} placeholder.
"""
self._model = model
self._initial_prompt_template = initial_prompt_template
self._refine_prompt_template = refine_prompt_template
self._synthesize_prompt_template = synthesize_prompt_template

def evaluate(self, chunks: List[str]) -> str:
"""Evaluates a list of two or more chunks.
Args:
chunks (List[str]): A list of chunks to be evaluated. Each chunk is
inserted into the initial_prompt_template and refine_prompt_template
and must therefore fit within the LLM's context window and still
leave room for the rest of the prompt.
Returns:
str: The output of the refine process.
"""
if len(chunks) < 2:
raise ValueError(
"The refine strategy is not needed to evaluate data "
"that fits within a single context window. "
"Consider using llm_classify instead."
)
model = self._model
initial_prompt = self._initial_prompt_template.format({"chunk": chunks[0]})
accumulator = model(initial_prompt)
for chunk in chunks[1:]:
refine_prompt = self._refine_prompt_template.format(
{"accumulator": accumulator, "chunk": chunk}
)
accumulator = model(refine_prompt)
if not self._synthesize_prompt_template:
return accumulator
reduce_prompt = self._synthesize_prompt_template.format({"accumulator": accumulator})
return model(reduce_prompt)
Loading

0 comments on commit 0c3b105

Please sign in to comment.