-
Notifications
You must be signed in to change notification settings - Fork 285
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add long-context evaluators, including map reduce and refine pa…
…tterns (#1710)
- Loading branch information
1 parent
831c041
commit 0c3b105
Showing
2 changed files
with
467 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
from typing import List, Optional | ||
|
||
from phoenix.experimental.evals import PromptTemplate | ||
from phoenix.experimental.evals.models import BaseEvalModel | ||
|
||
|
||
class MapReducer: | ||
""" | ||
Evaluates data that is too large to fit into a single context window using a | ||
map-reduce strategy. The data must first be divided into "chunks" that | ||
individually fit into an LLM's context window. Each chunk of data is | ||
individually evaluated (the "map" step), producing intermediate outputs that | ||
are combined into a single result (the "reduce" step). | ||
This is the simplest strategy for evaluating long-context data. | ||
""" | ||
|
||
def __init__( | ||
self, | ||
model: BaseEvalModel, | ||
map_prompt_template: PromptTemplate, | ||
reduce_prompt_template: PromptTemplate, | ||
) -> None: | ||
"""Initializes an instance. | ||
Args: | ||
model (BaseEvalModel): The LLM model to use for evaluation. | ||
map_prompt_template (PromptTemplate): The template that is mapped | ||
over each chunk to produce intermediate outputs. Must contain the | ||
{chunk} placeholder. | ||
reduce_prompt_template (PromptTemplate): The template that combines | ||
the intermediate outputs into a single result. Must contain the | ||
{mapped} placeholder, which will be formatted as a list of the | ||
intermediate outputs produced by the map step. | ||
""" | ||
self._model = model | ||
self._map_prompt_template = map_prompt_template | ||
self._reduce_prompt_template = reduce_prompt_template | ||
|
||
def evaluate(self, chunks: List[str]) -> str: | ||
"""Evaluates a list of two or more chunks. | ||
Args: | ||
chunks (List[str]): A list of chunks to be evaluated. Each chunk is | ||
inserted into the map_prompt_template and must therefore fit within | ||
the LLM's context window and still leave room for the rest of the | ||
prompt. | ||
Returns: | ||
str: The output of the map-reduce process. | ||
""" | ||
if len(chunks) < 2: | ||
raise ValueError( | ||
"The map-reduce strategy is not needed to evaluate data " | ||
"that fits within a single context window. " | ||
"Consider using llm_classify instead." | ||
) | ||
model = self._model | ||
mapped_records = [] | ||
for chunk in chunks: | ||
map_prompt = self._map_prompt_template.format({"chunk": chunk}) | ||
intermediate_output = model(map_prompt) | ||
mapped_records.append(intermediate_output) | ||
reduce_prompt = self._reduce_prompt_template.format({"mapped": repr(mapped_records)}) | ||
return model(reduce_prompt) | ||
|
||
|
||
class Refiner: | ||
""" | ||
Evaluates data that is too large to fit into a single context window using a | ||
refine strategy. The data must first be divided into "chunks" that | ||
individually fit into an LLM's context window. An initial "accumulator" is | ||
generated from the first chunk of data. The accumulator is subsequently | ||
refined by iteratively updating and incorporating new information from each | ||
subsequent chunk. An optional synthesis step can be used to synthesize the | ||
final accumulator into a desired format. | ||
""" | ||
|
||
def __init__( | ||
self, | ||
model: BaseEvalModel, | ||
initial_prompt_template: PromptTemplate, | ||
refine_prompt_template: PromptTemplate, | ||
synthesize_prompt_template: Optional[PromptTemplate] = None, | ||
) -> None: | ||
"""Initializes an instance. | ||
Args: | ||
model (BaseEvalModel): The LLM model to use for evaluation. | ||
initial_prompt_template (PromptTemplate): The template for the | ||
initial invocation of the model that will generate the initial | ||
accumulator. Should contain the {chunk} placeholder. | ||
refine_prompt_template (PromptTemplate): The template for refining | ||
the accumulator across all subsequence chunks. Must contain the | ||
{chunk} and {accumulator} placeholders. | ||
synthesize_prompt_template (Optional[PromptTemplate], optional): An | ||
optional template to synthesize the final version of the | ||
accumulator. Must contain the {accumulator} placeholder. | ||
""" | ||
self._model = model | ||
self._initial_prompt_template = initial_prompt_template | ||
self._refine_prompt_template = refine_prompt_template | ||
self._synthesize_prompt_template = synthesize_prompt_template | ||
|
||
def evaluate(self, chunks: List[str]) -> str: | ||
"""Evaluates a list of two or more chunks. | ||
Args: | ||
chunks (List[str]): A list of chunks to be evaluated. Each chunk is | ||
inserted into the initial_prompt_template and refine_prompt_template | ||
and must therefore fit within the LLM's context window and still | ||
leave room for the rest of the prompt. | ||
Returns: | ||
str: The output of the refine process. | ||
""" | ||
if len(chunks) < 2: | ||
raise ValueError( | ||
"The refine strategy is not needed to evaluate data " | ||
"that fits within a single context window. " | ||
"Consider using llm_classify instead." | ||
) | ||
model = self._model | ||
initial_prompt = self._initial_prompt_template.format({"chunk": chunks[0]}) | ||
accumulator = model(initial_prompt) | ||
for chunk in chunks[1:]: | ||
refine_prompt = self._refine_prompt_template.format( | ||
{"accumulator": accumulator, "chunk": chunk} | ||
) | ||
accumulator = model(refine_prompt) | ||
if not self._synthesize_prompt_template: | ||
return accumulator | ||
reduce_prompt = self._synthesize_prompt_template.format({"accumulator": accumulator}) | ||
return model(reduce_prompt) |
Oops, something went wrong.