Skip to content
This repository has been archived by the owner on Nov 3, 2023. It is now read-only.

Add glue hf datasets #3624

Merged
merged 7 commits into from
May 1, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 127 additions & 1 deletion parlai/tasks/glue/agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,22 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

from parlai.core.teachers import MultiTaskTeacher
from parlai.tasks.huggingface.agents import AbstractHuggingFaceTeacher
from copy import deepcopy


class AxTeacher(AbstractHuggingFaceTeacher):
"""
Note: this is an evaluation dataset so it only has a test split
Use a model trained on MulitNLI to produce predictions for this dataset.
"""

hf_path = 'glue'
hf_name = 'ax'
hf_text_fields = ['premise', 'hypothesis']
hf_label_field = 'label'
hf_splits_mapping = {'train': None, 'valid': None, 'test': 'test'}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for evaluation datasets, changed mapping of train and valid to None e.g. hf_splits_mapping = {'train': None, 'valid': None, 'test': 'test'} and removed the task from the tests and the multitaskteacher, otherwise will produce errors. e.g. display_data -t glue:ax will work only if you set the datatype to test now.



class ColaTeacher(AbstractHuggingFaceTeacher):
Expand All @@ -15,5 +30,116 @@ class ColaTeacher(AbstractHuggingFaceTeacher):
hf_splits_mapping = {'train': 'train', 'valid': 'validation', 'test': 'test'}


class DefaultTeacher(ColaTeacher):
class MnliTeacher(AbstractHuggingFaceTeacher):
hf_path = 'glue'
hf_name = 'mnli'
hf_text_fields = ['premise', 'hypothesis']
hf_label_field = 'label'
hf_splits_mapping = {
'train': 'train',
'valid': 'validation_matched',
'test': 'test_matched',
}
stephenroller marked this conversation as resolved.
Show resolved Hide resolved


class MnliMatchedTeacher(AbstractHuggingFaceTeacher):
"""
Note: this is an evaluation dataset so it only has valid and test splits
"""

hf_path = 'glue'
hf_name = 'mnli_matched'
hf_text_fields = ['premise', 'hypothesis']
hf_label_field = 'label'
hf_splits_mapping = {'train': None, 'valid': 'validation', 'test': 'test'}


class MnliMismatchedTeacher(AbstractHuggingFaceTeacher):
"""
Note: this is an evaluation dataset so it only has valid and test splits
"""

hf_path = 'glue'
hf_name = 'mnli_mismatched'
hf_text_fields = ['premise', 'hypothesis']
hf_label_field = 'label'
hf_splits_mapping = {'train': None, 'valid': 'validation', 'test': 'test'}
stephenroller marked this conversation as resolved.
Show resolved Hide resolved


class MrpcTeacher(AbstractHuggingFaceTeacher):
hf_path = 'glue'
hf_name = 'mrpc'
hf_text_fields = ['sentence1', 'sentence2']
hf_label_field = 'label'
hf_splits_mapping = {'train': 'train', 'valid': 'validation', 'test': 'test'}


class QnliTeacher(AbstractHuggingFaceTeacher):
hf_path = 'glue'
hf_name = 'qnli'
hf_text_fields = ['sentence', 'question']
hf_label_field = 'label'
hf_splits_mapping = {'train': 'train', 'valid': 'validation', 'test': 'test'}


class QqpTeacher(AbstractHuggingFaceTeacher):
hf_path = 'glue'
hf_name = 'qqp'
hf_text_fields = ['question1', 'question2']
hf_label_field = 'label'
hf_splits_mapping = {'train': 'train', 'valid': 'validation', 'test': 'test'}


class RteTeacher(AbstractHuggingFaceTeacher):
hf_path = 'glue'
hf_name = 'rte'
hf_text_fields = ['sentence1', 'sentence2']
hf_label_field = 'label'
hf_splits_mapping = {'train': 'train', 'valid': 'validation', 'test': 'test'}


class Sst2Teacher(AbstractHuggingFaceTeacher):
hf_path = 'glue'
hf_name = 'sst2'
hf_text_fields = ['sentence']
hf_label_field = 'label'
hf_splits_mapping = {'train': 'train', 'valid': 'validation', 'test': 'test'}


class StsbTeacher(AbstractHuggingFaceTeacher):
hf_path = 'glue'
hf_name = 'stsb'
hf_text_fields = ['sentence1', 'sentence2']
hf_label_field = 'label'
hf_splits_mapping = {'train': 'train', 'valid': 'validation', 'test': 'test'}


class WnliTeacher(AbstractHuggingFaceTeacher):
hf_path = 'glue'
hf_name = 'wnli'
hf_text_fields = ['sentence1', 'sentence2']
hf_label_field = 'label'
hf_splits_mapping = {'train': 'train', 'valid': 'validation', 'test': 'test'}


class GlueTeacher(MultiTaskTeacher):
def __init__(self, opt, shared=None):
glue_tasks = [
'cola',
'mnli',
'mrpc',
'qnli',
'qqp',
'rte',
'sst2',
'stsb',
'wnli',
]
glue_tasks = ['glue:' + t for t in glue_tasks]
opt = deepcopy(opt)
opt['task'] = ', '.join(glue_tasks)
super().__init__(opt, shared)


class DefaultTeacher(GlueTeacher):
pass
47 changes: 47 additions & 0 deletions parlai/tasks/glue/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/usr/bin/env python3

# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

from parlai.utils.testing import AutoTeacherTest


class TestDefaultTeacher(AutoTeacherTest):
task = "glue"


class TestColaTeacher(AutoTeacherTest):
task = "glue:cola"


class TestMnliTeacher(AutoTeacherTest):
task = "glue:mnli"


class TestMrpcTeacher(AutoTeacherTest):
task = "glue:mrpc"


class TestQnliTeacher(AutoTeacherTest):
task = "glue:qnli"


class TestQqpTeacher(AutoTeacherTest):
task = "glue:qqp"


class TestRteTeacher(AutoTeacherTest):
task = "glue:rte"


class TestSst2Teacher(AutoTeacherTest):
task = "glue:sst2"


class TestStsbTeacher(AutoTeacherTest):
task = "glue:stsb"


class TestWnliTeacher(AutoTeacherTest):
task = "glue:wnli"
88 changes: 88 additions & 0 deletions parlai/tasks/glue/test/glue_ax_test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
acts:
- - episode_done: true
eval_labels:
- contradiction
hypothesis: The cat did not sit on the mat.
id: huggingface
label_candidates:
- entailment
- neutral
- contradiction
premise: The cat sat on the mat.
text: 'The cat sat on the mat.

The cat did not sit on the mat.'
- - episode_done: true
eval_labels:
- contradiction
hypothesis: The cat sat on the mat.
id: huggingface
label_candidates:
- entailment
- neutral
- contradiction
premise: The cat did not sit on the mat.
text: 'The cat did not sit on the mat.

The cat sat on the mat.'
- - episode_done: true
eval_labels:
- contradiction
hypothesis: When you've got snow, it's really hard to learn a snow sport so we
looked at all the different ways I could mimic being on snow without actually
being on snow.
id: huggingface
label_candidates:
- entailment
- neutral
- contradiction
premise: When you've got no snow, it's really hard to learn a snow sport so we
looked at all the different ways I could mimic being on snow without actually
being on snow.
text: 'When you''ve got no snow, it''s really hard to learn a snow sport so we
looked at all the different ways I could mimic being on snow without actually
being on snow.

When you''ve got snow, it''s really hard to learn a snow sport so we looked
at all the different ways I could mimic being on snow without actually being
on snow.'
- - episode_done: true
eval_labels:
- contradiction
hypothesis: When you've got no snow, it's really hard to learn a snow sport so
we looked at all the different ways I could mimic being on snow without actually
being on snow.
id: huggingface
label_candidates:
- entailment
- neutral
- contradiction
premise: When you've got snow, it's really hard to learn a snow sport so we looked
at all the different ways I could mimic being on snow without actually being
on snow.
text: 'When you''ve got snow, it''s really hard to learn a snow sport so we looked
at all the different ways I could mimic being on snow without actually being
on snow.

When you''ve got no snow, it''s really hard to learn a snow sport so we looked
at all the different ways I could mimic being on snow without actually being
on snow.'
- - episode_done: true
eval_labels:
- contradiction
hypothesis: Out of the box, Ouya doesn't support media apps such as Twitch.tv
and XBMC media player.
id: huggingface
label_candidates:
- entailment
- neutral
- contradiction
premise: Out of the box, Ouya supports media apps such as Twitch.tv and XBMC media
player.
text: 'Out of the box, Ouya supports media apps such as Twitch.tv and XBMC media
player.

Out of the box, Ouya doesn''t support media apps such as Twitch.tv and XBMC
media player.'
num_episodes: 1104
num_examples: 1104
88 changes: 88 additions & 0 deletions parlai/tasks/glue/test/glue_ax_train.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
acts:
- - episode_done: true
hypothesis: The cat did not sit on the mat.
id: huggingface
label_candidates:
- entailment
- neutral
- contradiction
labels:
- contradiction
premise: The cat sat on the mat.
text: 'The cat sat on the mat.

The cat did not sit on the mat.'
- - episode_done: true
hypothesis: The cat sat on the mat.
id: huggingface
label_candidates:
- entailment
- neutral
- contradiction
labels:
- contradiction
premise: The cat did not sit on the mat.
text: 'The cat did not sit on the mat.

The cat sat on the mat.'
- - episode_done: true
hypothesis: When you've got snow, it's really hard to learn a snow sport so we
looked at all the different ways I could mimic being on snow without actually
being on snow.
id: huggingface
label_candidates:
- entailment
- neutral
- contradiction
labels:
- contradiction
premise: When you've got no snow, it's really hard to learn a snow sport so we
looked at all the different ways I could mimic being on snow without actually
being on snow.
text: 'When you''ve got no snow, it''s really hard to learn a snow sport so we
looked at all the different ways I could mimic being on snow without actually
being on snow.

When you''ve got snow, it''s really hard to learn a snow sport so we looked
at all the different ways I could mimic being on snow without actually being
on snow.'
- - episode_done: true
hypothesis: When you've got no snow, it's really hard to learn a snow sport so
we looked at all the different ways I could mimic being on snow without actually
being on snow.
id: huggingface
label_candidates:
- entailment
- neutral
- contradiction
labels:
- contradiction
premise: When you've got snow, it's really hard to learn a snow sport so we looked
at all the different ways I could mimic being on snow without actually being
on snow.
text: 'When you''ve got snow, it''s really hard to learn a snow sport so we looked
at all the different ways I could mimic being on snow without actually being
on snow.

When you''ve got no snow, it''s really hard to learn a snow sport so we looked
at all the different ways I could mimic being on snow without actually being
on snow.'
- - episode_done: true
hypothesis: Out of the box, Ouya doesn't support media apps such as Twitch.tv
and XBMC media player.
id: huggingface
label_candidates:
- entailment
- neutral
- contradiction
labels:
- contradiction
premise: Out of the box, Ouya supports media apps such as Twitch.tv and XBMC media
player.
text: 'Out of the box, Ouya supports media apps such as Twitch.tv and XBMC media
player.

Out of the box, Ouya doesn''t support media apps such as Twitch.tv and XBMC
media player.'
num_episodes: 1104
num_examples: 1104
Loading