Skip to content

Commit

Permalink
mcscript (#1152)
Browse files Browse the repository at this point in the history
* mcscript

* black

* add mcscript to documentation

* removed task property method

Co-authored-by: jeswan <57466294+jeswan@users.noreply.github.com>
  • Loading branch information
wh629 and jeswan authored Oct 22, 2020
1 parent 370e9c1 commit d1b14c1
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 0 deletions.
1 change: 1 addition & 0 deletions guides/tasks/supported_tasks.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
| EP-DPR | dpr || | dpr | Edge-Probing |
| GLUE Diagnostic | glue_diagnostics ||| glue_diagnostics | GLUE |
| HellaSwag | hellaswag ||| hellaswag | |
| [MCScript2.0](https://arxiv.org/pdf/1905.09531.pdf) | mcscript || | mcscript | [Data](https://my.hidrive.com/share/wdnind8pp5#$/) |
| MLM | * || * | mlm_simple | See task-specific notes. |
| MLQA | `mlqa_{lang1}_{lang2}` ||| mlqa | XTREME, multi-lang |
| MNLI | mnli ||| mnli | GLUE, MNLI-matched |
Expand Down
1 change: 1 addition & 0 deletions jiant/tasks/evaluate/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -929,6 +929,7 @@ def get_evaluation_scheme_for_task(task) -> BaseEvaluationScheme:
tasks.WnliTask,
tasks.WSCTask,
tasks.XnliTask,
tasks.MCScriptTask,
tasks.ArctTask,
),
):
Expand Down
70 changes: 70 additions & 0 deletions jiant/tasks/lib/mcscript.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
from dataclasses import dataclass

from jiant.tasks.lib.templates.shared import labels_to_bimap
from jiant.tasks.lib.templates import multiple_choice as mc_template
from jiant.utils.python.io import read_json_lines


@dataclass
class Example(mc_template.Example):
pass


@dataclass
class TokenizedExample(mc_template.TokenizedExample):
pass


@dataclass
class DataRow(mc_template.DataRow):
pass


@dataclass
class Batch(mc_template.Batch):
pass


class MCScriptTask(mc_template.AbstractMultipleChoiceTask):
Example = Example
TokenizedExample = Example
DataRow = DataRow
Batch = Batch

CHOICE_KEYS = [0, 1]
CHOICE_TO_ID, ID_TO_CHOICE = labels_to_bimap(CHOICE_KEYS)
NUM_CHOICES = len(CHOICE_KEYS)

def get_train_examples(self):
return self._create_examples(lines=read_json_lines(self.train_path), set_type="train")

def get_val_examples(self):
return self._create_examples(lines=read_json_lines(self.val_path), set_type="val")

def get_test_examples(self):
return self._create_examples(lines=read_json_lines(self.test_path), set_type="test")

@classmethod
def _create_examples(cls, lines, set_type):
examples = []
for line in lines:
passage = line["passage"]["text"]
passage_id = line["idx"]
for question_dict in line["passage"]["questions"]:
question = question_dict["question"]
question_id = question_dict["idx"]
answer_dicts = question_dict["answers"]
examples.append(
Example(
guid="%s-%s-%s" % (set_type, passage_id, question_id),
prompt=passage,
choice_list=[
question + " " + answer_dict["text"] for answer_dict in answer_dicts
],
label=answer_dicts[1]["label"] == "True"
if set_type != "test"
else cls.CHOICE_KEYS[-1],
)
)

return examples
2 changes: 2 additions & 0 deletions jiant/tasks/retrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
from jiant.tasks.lib.wsc import WSCTask
from jiant.tasks.lib.xnli import XnliTask
from jiant.tasks.lib.xquad import XquadTask
from jiant.tasks.lib.mcscript import MCScriptTask
from jiant.tasks.lib.arct import ArctTask

from jiant.tasks.core import Task
Expand Down Expand Up @@ -130,6 +131,7 @@
"wsc": WSCTask,
"xnli": XnliTask,
"xquad": XquadTask,
"mcscript": MCScriptTask,
"arct": ArctTask,
}

Expand Down

0 comments on commit d1b14c1

Please sign in to comment.