Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] add pipeline meta parser #103

Merged
merged 3 commits into from
Jul 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 69 additions & 1 deletion src/alpaca_eval/completion_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from . import utils

__all__ = ["regex_parser", "lmsys_parser", "ranking_parser", "json_parser", "eval_parser"]
__all__ = ["regex_parser", "lmsys_parser", "ranking_parser", "json_parser", "eval_parser", "pipeline_meta_parser"]


def regex_parser(completion: str, outputs_to_match: dict[str, Any]) -> list[Any]:
Expand Down Expand Up @@ -165,3 +165,71 @@ def eval_parser(completion: str) -> list[Any]:
if not isinstance(evaluated_completion, list):
evaluated_completion = [evaluated_completion]
return evaluated_completion


def replace_parser(completion: str, replacer: dict, default_replacer: Any = "auto") -> list[str]:
"""Parser that replaces part of the completion using a dictionary. This is useful if it's more natural for a
prompt to ask a completion that is different from the one you want to store.

Parameters
----------
completion : str
Output from the model to parse.

replacer : dict
Dictionary with keys that are the substring of the completion that you want to replace and values that are the
replacements.

default_replacer : any, optional
If a key is not found in `replacer`, use this value instead. If "auto" then use the key itself.

Examples
--------
>>> replace_parser("True", replacer={"True": 1})
[1]
"""
return [replacer.get(completion, completion if default_replacer == "auto" else default_replacer)]


def pipeline_meta_parser(
completion: str, parsers_to_kwargs: dict[str, dict], is_squeeze: bool = True, _depth=0
) -> list[Any]:
r"""Applies a list of parsers in sequence to a completion.

Parameters
----------
completion : str
The completion to parse.

parsers_to_kwargs : dictionary of str to dict
A dictionary mapping parser functions to kwargs to pass to them. The parsing functions will be applied in the
order they are given.

is_squeeze : bool, optional
If True, will squeeze the output of each parser if it's a singleton.

Examples
--------
>>> completion = '{"ex": "...", "rank": [{"model": "model_1", "rank": 1}, {"model": "model_2", "rank": 2}]}'
>>> parsers_to_kwargs = {"json_parser": {"annotation_key": "rank"}, "ranking_parser": {}}
>>> pipeline_meta_parser(completion, parsers_to_kwargs)
[1]
"""
all_parsers = list(parsers_to_kwargs.keys())
all_kwargs = list(parsers_to_kwargs.values())

out = globals()[all_parsers[0]](completion, **all_kwargs[0])
rest_of_parsers_to_kwargs = dict(zip(all_parsers[1:], all_kwargs[1:]))
if len(rest_of_parsers_to_kwargs) > 0:
out = [
pipeline_meta_parser(
o, parsers_to_kwargs=rest_of_parsers_to_kwargs, is_squeeze=is_squeeze, _depth=_depth + 1
)
for o in out
]

if is_squeeze and len(out) == 1 and _depth != 0:
assert isinstance(out, list)
out = out[0]

return out
8 changes: 2 additions & 6 deletions src/alpaca_eval/decoders/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,12 +199,8 @@ def _openai_completion_helper(
choice["text"] = choice.message.content

if choice.message.get("function_call"):
# currently we only use function calls to get a JSON object
# => overwrite text with the JSON object. In the future, we could
# allow actual function calls
all_args = json.loads(choice.message.function_call.arguments)
assert len(all_args) == 1
choice["text"] = all_args[list(all_args.keys())[0]]
# currently we only use function calls to get a JSON object => return raw text of json
choice["text"] = choice.message.function_call.arguments

else:
completion_batch = openai.Completion.create(prompt=prompt_batch, **curr_kwargs)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,10 @@ alpaca_eval_gpt4_fn:
type: "number"
description: "Order of preference of the model, 1 has the best output"
"required": [ "ordered_models" ]
fn_completion_parser: "ranking_parser"
fn_completion_parser: "pipeline_meta_parser"
completion_parser_kwargs:
parsers_to_kwargs:
json_parser:
annotation_key: "ordered_models"
ranking_parser: {}
batch_size: 1
27 changes: 25 additions & 2 deletions tests/integration_tests/test_example_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@

import pytest

from alpaca_eval import main


@pytest.mark.slow
def test_cli_evaluate_example():
Expand All @@ -16,6 +14,8 @@ def test_cli_evaluate_example():
"3",
"--annotators_config",
"claude",
"--is_avoid_reannotations",
"False",
],
capture_output=True,
text=True,
Expand All @@ -24,3 +24,26 @@ def test_cli_evaluate_example():
expected_output = " ".join("example 33.33 33.33 3".split())

assert expected_output in normalized_output


@pytest.mark.slow
def test_openai_fn_evaluate_example():
result = subprocess.run(
[
"alpaca_eval",
"--model_outputs",
"example/outputs.json",
"--max_instances",
"1",
"--annotators_config",
"alpaca_eval_gpt4_fn",
"--is_avoid_reannotations",
"False",
],
capture_output=True,
text=True,
)
normalized_output = " ".join(result.stdout.split())
expected_output = " ".join("example 0.00 0.00 2".split())

assert expected_output in normalized_output