Skip to content

Commit

Permalink
Added the adapter specs for MC joint CoT and for the GPQA run specs
Browse files Browse the repository at this point in the history
  • Loading branch information
liamjxu committed Oct 24, 2024
1 parent 5b8ecfd commit f0bf5c9
Show file tree
Hide file tree
Showing 3 changed files with 120 additions and 13 deletions.
7 changes: 7 additions & 0 deletions src/helm/benchmark/adaptation/adapter_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
ADAPT_GENERATION: str = "generation"
ADAPT_LANGUAGE_MODELING: str = "language_modeling"
ADAPT_MULTIPLE_CHOICE_JOINT: str = "multiple_choice_joint"
ADAPT_MULTIPLE_CHOICE_JOINT_CHAIN_OF_THOUGHT: str = "multiple_choice_joint_chain_of_thought"
ADAPT_MULTIPLE_CHOICE_SEPARATE_ORIGINAL: str = "multiple_choice_separate_original"
ADAPT_MULTIPLE_CHOICE_SEPARATE_CALIBRATED: str = "multiple_choice_separate_calibrated"
ADAPT_RANKING_BINARY: str = "ranking_binary"
Expand Down Expand Up @@ -63,6 +64,12 @@ class AdapterSpec:
reference_suffix: str = "\n"
"""The string that is included after each reference (for multiple-choice questions)."""

chain_of_thought_prefix: str = ""
"""The string that is included before each chain of thought. (e.g., 'Let\'s think step by step')"""

chain_of_thought_suffix: str = "\n"
"""The string that is included after each chain of thought. (e.g., 'The correct answer is')"""

output_prefix: str = "Output: "
"""The string that is included before the correct answer/predicted output (e.g., 'Answer:')."""

Expand Down
73 changes: 69 additions & 4 deletions src/helm/benchmark/adaptation/common_adapter_specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
ADAPT_GENERATION,
ADAPT_LANGUAGE_MODELING,
ADAPT_MULTIPLE_CHOICE_JOINT,
ADAPT_MULTIPLE_CHOICE_JOINT_CHAIN_OF_THOUGHT,
ADAPT_MULTIPLE_CHOICE_SEPARATE_CALIBRATED,
ADAPT_MULTIPLE_CHOICE_SEPARATE_ORIGINAL,
ADAPT_RANKING_BINARY,
Expand Down Expand Up @@ -43,13 +44,66 @@ def get_multiple_choice_joint_adapter_spec(
[output_noun]:
"""

input_prefix = kwargs.pop("input_prefix", f"{input_noun}: " if input_noun is not None else "")
input_suffix = kwargs.pop("input_suffix", "\n" if input_noun is not None else "")
output_prefix = kwargs.pop("output_prefix", f"{output_noun}: ")
output_suffix = kwargs.pop("output_suffix", "\n")

return AdapterSpec(
method=ADAPT_MULTIPLE_CHOICE_JOINT,
instructions=format_instructions(instructions),
input_prefix=f"{input_noun}: " if input_noun is not None else "",
input_suffix="\n" if input_noun is not None else "",
output_prefix=f"{output_noun}: ",
output_suffix="\n",
input_prefix=input_prefix,
input_suffix=input_suffix,
output_prefix=output_prefix,
output_suffix=output_suffix,
max_train_instances=max_train_instances,
num_outputs=num_outputs,
max_tokens=max_tokens,
temperature=0.0,
stop_sequences=["\n"],
sample_train=sample_train,
**kwargs,
)


def get_multiple_choice_joint_chain_of_thought_adapter_spec(
instructions: str,
input_noun: Optional[str],
output_noun: str,
num_outputs: int = 5,
max_train_instances: int = 5,
max_tokens: int = 5,
sample_train: bool = True,
**kwargs,
) -> AdapterSpec:
"""
[instructions]
[input_noun]: [input]
[reference_1]
...
[reference_k]
[output_noun]: [output]
[input_noun]: [input]
[reference_1]
...
[reference_k]
[output_noun]:
"""

input_prefix = kwargs.pop("input_prefix", f"{input_noun}: " if input_noun is not None else "")
input_suffix = kwargs.pop("input_suffix", "\n" if input_noun is not None else "")
output_prefix = kwargs.pop("output_prefix", f"{output_noun}: ")
output_suffix = kwargs.pop("output_suffix", "\n")

return AdapterSpec(
method=ADAPT_MULTIPLE_CHOICE_JOINT_CHAIN_OF_THOUGHT,
instructions=format_instructions(instructions),
input_prefix=input_prefix,
input_suffix=input_suffix,
output_prefix=output_prefix,
output_suffix=output_suffix,
max_train_instances=max_train_instances,
num_outputs=num_outputs,
max_tokens=max_tokens,
Expand Down Expand Up @@ -109,6 +163,17 @@ def get_multiple_choice_adapter_spec(
sample_train=sample_train,
**kwargs,
)
elif method == ADAPT_MULTIPLE_CHOICE_JOINT_CHAIN_OF_THOUGHT:
return get_multiple_choice_joint_chain_of_thought_adapter_spec(
instructions,
input_noun,
output_noun,
max_train_instances=max_train_instances,
num_outputs=num_outputs,
max_tokens=max_tokens,
sample_train=sample_train,
**kwargs,
)
elif method in {ADAPT_MULTIPLE_CHOICE_SEPARATE_ORIGINAL, ADAPT_MULTIPLE_CHOICE_SEPARATE_CALIBRATED}:
return get_multiple_choice_separate_adapter_spec(method, empty_input)
else:
Expand Down
53 changes: 44 additions & 9 deletions src/helm/benchmark/run_specs/lite_run_specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from helm.benchmark.adaptation.adapter_spec import (
ADAPT_GENERATION,
ADAPT_MULTIPLE_CHOICE_JOINT,
ADAPT_MULTIPLE_CHOICE_JOINT_CHAIN_OF_THOUGHT,
AdapterSpec,
)
from helm.benchmark.adaptation.common_adapter_specs import (
Expand Down Expand Up @@ -308,23 +309,57 @@ def get_wmt_14_spec(language_pair: str, max_train_instances: int = 1) -> RunSpec


@run_spec_function("gpqa")
def get_gpqa_spec(subset: str, method: str = ADAPT_MULTIPLE_CHOICE_JOINT) -> RunSpec:
def get_gpqa_spec(
subset: str,
use_chain_of_thought: str = "False",
) -> RunSpec:
# Convert to bools and remove the str versions
use_chain_of_thought_bool: bool = use_chain_of_thought == "True"
del use_chain_of_thought

scenario_spec = ScenarioSpec(
class_name="helm.benchmark.scenarios.gpqa_scenario.GPQAScenario", args={"subset": subset}
)

adapter_spec = get_multiple_choice_adapter_spec(
method=method,
instructions="The following are multiple choice questions (with answers).",
input_noun="Question",
output_noun="Answer",
)
if use_chain_of_thought_bool:
adapter_spec = get_multiple_choice_adapter_spec(
method=ADAPT_MULTIPLE_CHOICE_JOINT_CHAIN_OF_THOUGHT,
instructions=(
"Here are some example questions from experts. "
"An explanation is given before the final answer. "
"Answer the final question yourself, giving your reasoning beforehand.",
)
input_noun="Question",
input_suffix="\nChoices: \n",
reference_prefix="(A) ",
chain_of_thought_prefix="Let's think step by step: ",
chain_of_thought_suffix="The correct answer is ",
output_noun="", # will be overwritten with output_prefix
output_prefix="",
global_suffix=(
"Give step by step reasoning before you answer, and when you’re ready to answer, "
'please use the format "The correct answer is (insert answer here)":',
)
)
else:
adapter_spec = get_multiple_choice_adapter_spec(
method=ADAPT_MULTIPLE_CHOICE_JOINT,
instructions=(
"Here are some example questions from experts. "
"An explanation is given before the final answer. "
"Answer the final question yourself, giving your reasoning beforehand.",
)
input_noun="Question",
input_suffix="\nChoices: \n",
reference_prefix="(A) ",
output_noun="", # will be overwritten with output_prefix
output_prefix="The correct answer is ",
)

return RunSpec(
name=f"gpqa:subset={subset},method={method}",
name=f"gpqa:subset={subset},use_chain_of_thought={use_chain_of_thought_bool}",
scenario_spec=scenario_spec,
adapter_spec=adapter_spec,
metric_specs=get_exact_match_metric_specs(),
metric_specs=get_exact_match_metric_specs(), # TODO: update this after cot metric is ready
groups=["gpqa"],
)

0 comments on commit f0bf5c9

Please sign in to comment.