Skip to content

Commit

Permalink
Adding arc_easy, arc_challenge, mutual, mutual_plus (#1206)
Browse files Browse the repository at this point in the history
  • Loading branch information
yzpang authored Oct 21, 2020
1 parent 0b3dff5 commit da7550d
Show file tree
Hide file tree
Showing 8 changed files with 302 additions and 0 deletions.
12 changes: 12 additions & 0 deletions jiant/scripts/download_data/datasets/hf_datasets_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,18 @@
"phase_list": ["train", "val", "test"],
"jiant_task_name": "adversarial_nli",
},
"arc_easy": {
"path": "ai2_arc",
"name": "ARC-Easy",
"phase_list": ["train", "val", "test"],
"jiant_task_name": "arc_easy",
},
"arc_challenge": {
"path": "ai2_arc",
"name": "ARC-Challenge",
"phase_list": ["train", "val", "test"],
"jiant_task_name": "arc_challenge",
},
}

# HF-Datasets uses "validation", we use "val"
Expand Down
2 changes: 2 additions & 0 deletions jiant/tasks/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
"adversarial_nli_r1",
"adversarial_nli_r2",
"adversarial_nli_r3",
"arc_easy",
"arc_challenge",
}

XTREME_TASKS = {
Expand Down
4 changes: 4 additions & 0 deletions jiant/tasks/evaluate/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -942,10 +942,14 @@ def get_evaluation_scheme_for_task(task) -> BaseEvaluationScheme:
elif isinstance(
task,
(
tasks.ArcEasyTask,
tasks.ArcChallengeTask,
tasks.CommonsenseQATask,
tasks.CosmosQATask,
tasks.SWAGTask,
tasks.HellaSwagTask,
tasks.MutualTask,
tasks.MutualPlusTask,
tasks.SocialIQATask,
),
):
Expand Down
77 changes: 77 additions & 0 deletions jiant/tasks/lib/arc_challenge.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
from dataclasses import dataclass

from jiant.tasks.lib.templates.shared import labels_to_bimap
from jiant.tasks.lib.templates import multiple_choice as mc_template
from jiant.utils.python.io import read_json_lines


@dataclass
class Example(mc_template.Example):
@property
def task(self):
return ArcChallengeTask


@dataclass
class TokenizedExample(mc_template.TokenizedExample):
pass


@dataclass
class DataRow(mc_template.DataRow):
pass


@dataclass
class Batch(mc_template.Batch):
pass


class ArcChallengeTask(mc_template.AbstractMultipleChoiceTask):
Example = Example
TokenizedExample = Example
DataRow = DataRow
Batch = Batch

CHOICE_KEYS = ["A", "B", "C", "D", "E"]
CHOICE_TO_ID, ID_TO_CHOICE = labels_to_bimap(CHOICE_KEYS)
NUM_CHOICES = len(CHOICE_KEYS)

def get_train_examples(self):
return self._create_examples(lines=read_json_lines(self.train_path), set_type="train")

def get_val_examples(self):
return self._create_examples(lines=read_json_lines(self.val_path), set_type="val")

def get_test_examples(self):
return self._create_examples(lines=read_json_lines(self.test_path), set_type="test")

@classmethod
def _create_examples(cls, lines, set_type):
potential_label_map = {
"1": "A",
"2": "B",
"3": "C",
"4": "D",
"5": "E",
}
NUM_CHOICES = len(potential_label_map)
examples = []
for i, line in enumerate(lines):
label = line["answerKey"]
if label in potential_label_map:
label = potential_label_map[label]
choice_list = [d["text"] for d in line["question"]["choices"]]
filler_choice_list = ["." for i in range(NUM_CHOICES - len(choice_list))]
choice_list = choice_list + filler_choice_list
assert len(choice_list) == NUM_CHOICES

examples.append(
Example(
guid="%s-%s" % (set_type, i),
prompt=line["question"]["stem"],
choice_list=choice_list,
label=label,
)
)
return examples
77 changes: 77 additions & 0 deletions jiant/tasks/lib/arc_easy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
from dataclasses import dataclass

from jiant.tasks.lib.templates.shared import labels_to_bimap
from jiant.tasks.lib.templates import multiple_choice as mc_template
from jiant.utils.python.io import read_json_lines


@dataclass
class Example(mc_template.Example):
@property
def task(self):
return ArcEasyTask


@dataclass
class TokenizedExample(mc_template.TokenizedExample):
pass


@dataclass
class DataRow(mc_template.DataRow):
pass


@dataclass
class Batch(mc_template.Batch):
pass


class ArcEasyTask(mc_template.AbstractMultipleChoiceTask):
Example = Example
TokenizedExample = Example
DataRow = DataRow
Batch = Batch

CHOICE_KEYS = ["A", "B", "C", "D", "E"]
CHOICE_TO_ID, ID_TO_CHOICE = labels_to_bimap(CHOICE_KEYS)
NUM_CHOICES = len(CHOICE_KEYS)

def get_train_examples(self):
return self._create_examples(lines=read_json_lines(self.train_path), set_type="train")

def get_val_examples(self):
return self._create_examples(lines=read_json_lines(self.val_path), set_type="val")

def get_test_examples(self):
return self._create_examples(lines=read_json_lines(self.test_path), set_type="test")

@classmethod
def _create_examples(cls, lines, set_type):
potential_label_map = {
"1": "A",
"2": "B",
"3": "C",
"4": "D",
"5": "E",
}
NUM_CHOICES = len(potential_label_map)
examples = []
for i, line in enumerate(lines):
label = line["answerKey"]
if label in potential_label_map:
label = potential_label_map[label]
choice_list = [d["text"] for d in line["question"]["choices"]]
filler_choice_list = ["." for i in range(NUM_CHOICES - len(choice_list))]
choice_list = choice_list + filler_choice_list
assert len(choice_list) == NUM_CHOICES

examples.append(
Example(
guid="%s-%s" % (set_type, i),
prompt=line["question"]["stem"],
choice_list=choice_list,
label=label,
)
)
return examples
61 changes: 61 additions & 0 deletions jiant/tasks/lib/mutual.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from dataclasses import dataclass

from jiant.tasks.lib.templates.shared import labels_to_bimap
from jiant.tasks.lib.templates import multiple_choice as mc_template
from jiant.utils.python.io import read_json_lines


@dataclass
class Example(mc_template.Example):
@property
def task(self):
return MutualTask


@dataclass
class TokenizedExample(mc_template.TokenizedExample):
pass


@dataclass
class DataRow(mc_template.DataRow):
pass


@dataclass
class Batch(mc_template.Batch):
pass


class MutualTask(mc_template.AbstractMultipleChoiceTask):
Example = Example
TokenizedExample = Example
DataRow = DataRow
Batch = Batch

CHOICE_KEYS = ["A", "B", "C", "D"]
CHOICE_TO_ID, ID_TO_CHOICE = labels_to_bimap(CHOICE_KEYS)
NUM_CHOICES = len(CHOICE_KEYS)

def get_train_examples(self):
return self._create_examples(lines=read_json_lines(self.train_path), set_type="train")

def get_val_examples(self):
return self._create_examples(lines=read_json_lines(self.val_path), set_type="val")

def get_test_examples(self):
return self._create_examples(lines=read_json_lines(self.test_path), set_type="test")

@classmethod
def _create_examples(cls, lines, set_type):
examples = []
for i, line in enumerate(lines):
examples.append(
Example(
guid="%s-%s" % (set_type, i),
prompt=line["article"],
choice_list=[d for d in line["options"]],
label=line["answers"],
)
)
return examples
61 changes: 61 additions & 0 deletions jiant/tasks/lib/mutual_plus.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from dataclasses import dataclass

from jiant.tasks.lib.templates.shared import labels_to_bimap
from jiant.tasks.lib.templates import multiple_choice as mc_template
from jiant.utils.python.io import read_json_lines


@dataclass
class Example(mc_template.Example):
@property
def task(self):
return MutualPlusTask


@dataclass
class TokenizedExample(mc_template.TokenizedExample):
pass


@dataclass
class DataRow(mc_template.DataRow):
pass


@dataclass
class Batch(mc_template.Batch):
pass


class MutualPlusTask(mc_template.AbstractMultipleChoiceTask):
Example = Example
TokenizedExample = Example
DataRow = DataRow
Batch = Batch

CHOICE_KEYS = ["A", "B", "C", "D"]
CHOICE_TO_ID, ID_TO_CHOICE = labels_to_bimap(CHOICE_KEYS)
NUM_CHOICES = len(CHOICE_KEYS)

def get_train_examples(self):
return self._create_examples(lines=read_json_lines(self.train_path), set_type="train")

def get_val_examples(self):
return self._create_examples(lines=read_json_lines(self.val_path), set_type="val")

def get_test_examples(self):
return self._create_examples(lines=read_json_lines(self.test_path), set_type="test")

@classmethod
def _create_examples(cls, lines, set_type):
examples = []
for i, line in enumerate(lines):
examples.append(
Example(
guid="%s-%s" % (set_type, i),
prompt=line["article"],
choice_list=[d for d in line["options"]],
label=line["answers"],
)
)
return examples
8 changes: 8 additions & 0 deletions jiant/tasks/retrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from jiant.tasks.lib.abductive_nli import AbductiveNliTask
from jiant.tasks.lib.acceptability_judgement.definiteness import AcceptabilityDefinitenessTask
from jiant.tasks.lib.adversarial_nli import AdversarialNliTask
from jiant.tasks.lib.arc_easy import ArcEasyTask
from jiant.tasks.lib.arc_challenge import ArcChallengeTask
from jiant.tasks.lib.boolq import BoolQTask
from jiant.tasks.lib.bucc2018 import Bucc2018Task
from jiant.tasks.lib.ccg import CCGTask
Expand All @@ -26,6 +28,8 @@
from jiant.tasks.lib.mnli_mismatched import MnliMismatchedTask
from jiant.tasks.lib.mrpc import MrpcTask
from jiant.tasks.lib.multirc import MultiRCTask
from jiant.tasks.lib.mutual import MutualTask
from jiant.tasks.lib.mutual_plus import MutualPlusTask
from jiant.tasks.lib.edge_probing.ner import NerTask
from jiant.tasks.lib.panx import PanxTask
from jiant.tasks.lib.pawsx import PawsXTask
Expand Down Expand Up @@ -66,6 +70,8 @@

TASK_DICT = {
"abductive_nli": AbductiveNliTask,
"arc_easy": ArcEasyTask,
"arc_challenge": ArcChallengeTask,
"superglue_axg": SuperglueWinogenderDiagnosticsTask,
"acceptability_definiteness": AcceptabilityDefinitenessTask,
"adversarial_nli": AdversarialNliTask,
Expand All @@ -90,6 +96,8 @@
"mnli": MnliTask,
"mnli_mismatched": MnliMismatchedTask,
"multirc": MultiRCTask,
"mutual": MutualTask,
"mutual_plus": MutualPlusTask,
"mrpc": MrpcTask,
"ner": NerTask,
"pawsx": PawsXTask,
Expand Down

0 comments on commit da7550d

Please sign in to comment.