This repository has been archived by the owner on Nov 3, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Wizard of Wikipedia, knowledge source page title. #3845
Merged
Merged
Changes from all commits
Commits
Show all changes
6 commits
Select commit
Hold shift + click to select a range
4781c08
titles teacher
mojtaba-komeili ea4d32e
teacher test
mojtaba-komeili 5308e84
teacehr doc string
mojtaba-komeili 3210967
flag for skipping no titles
mojtaba-komeili 27d7e42
addressing PR comments
mojtaba-komeili 2d823cf
regenrated the teacher test yml files
mojtaba-komeili File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,6 +24,7 @@ | |
import copy | ||
from parlai.core.teachers import FixedDialogTeacher, MultiTaskTeacher | ||
from parlai.utils.io import PathManager | ||
from parlai.utils import logging | ||
from parlai.utils.misc import warn_once | ||
from .build import build | ||
|
||
|
@@ -757,6 +758,113 @@ def get(self, episode_idx, entry_idx=0): | |
return a | ||
|
||
|
||
class WikiPageTitleTeacher(WizardDialogKnowledgeTeacher): | ||
""" | ||
Generates the title of Wikipedia page used as source of knowledge. | ||
|
||
The context provided by this teacher (`text`) is the conversation history, with chosen topic removed. | ||
The label is the title of the Wikipedia page of the passage that wizard selected for crafting | ||
the next utterance; in other words, the source of knowledge for this utterance. | ||
""" | ||
|
||
def __init__(self, opt, shared=None): | ||
self.opt = copy.deepcopy(opt) | ||
self.opt['label_type'] = 'response' | ||
super().__init__(self.opt, shared=shared) | ||
self.id = 'WikiPageTitleTeacher' | ||
self._conv_history_len = self.opt['conversation_history_length'] | ||
if not (self._conv_history_len > 0 or self._conv_history_len == -1): | ||
logging.warning( | ||
f'"{self._conv_history_len}" is an invalid value for --conversation-history-length flag.' | ||
' Changing it to default of -1 (include the entire message history).' | ||
) | ||
self._conv_history_len = -1 | ||
self._skip_no_title = self.opt['skip_no_title'] | ||
if not shared: | ||
self._preprocess_data() | ||
else: | ||
self.titles_data = shared['titles_data'] | ||
|
||
@classmethod | ||
def add_cmdline_args(cls, parser, partial_opt=None): | ||
super().add_cmdline_args(parser, partial_opt=partial_opt) | ||
agent = parser.add_argument_group('Wikipedia Page Title Arguments') | ||
agent.add_argument( | ||
'--conversation-history-length', | ||
type=int, | ||
default=-1, | ||
help='Number of previous utterances to keep in context, 0 (default) includes all', | ||
) | ||
agent.add_argument( | ||
'--skip-no-title', | ||
type='bool', | ||
default=True, | ||
help=( | ||
'Whether to skip the example if no passage was selected. If `false` ' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i wonder if this teacher would be more useful with this flag default to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree, that might make more sense. |
||
f'uses `{TOKEN_NOCHOSEN}` instead of title if no knowledge source was selected.' | ||
), | ||
) | ||
return parser | ||
|
||
def share(self): | ||
shared = super().share() | ||
shared['titles_data'] = self.titles_data | ||
return shared | ||
|
||
def _generate_messages(self, hist, action): | ||
include_hist = ( | ||
hist[-self._conv_history_len :] if self._conv_history_len > 0 else hist | ||
) | ||
context = '\n'.join(include_hist) | ||
return Message( | ||
{ | ||
'id': "Wikipedia Title Teacher", | ||
'text': context, | ||
'labels': [action["title"]], | ||
'episode_done': True, | ||
} | ||
) | ||
|
||
def _should_include(self, act): | ||
return not (self._skip_no_title and act['labels'][0] == TOKEN_NOCHOSEN) | ||
|
||
def _preprocess_data(self): | ||
data = [] | ||
for episode_idx in range(super().num_episodes()): | ||
dialog_history = [] | ||
ex_idx = 0 | ||
while True: | ||
a = super().get(episode_idx, ex_idx) | ||
text_parts = a['text'].split('\n') | ||
if ex_idx == 0: | ||
# throwing away chosen_topic | ||
text_parts = text_parts[1:] | ||
if text_parts: | ||
dialog_history.append(text_parts[0]) | ||
title_act = self._generate_messages(dialog_history, a) | ||
if self._should_include(title_act): | ||
data.append(title_act) | ||
if a['episode_done']: | ||
break | ||
ex_idx += 1 | ||
dialog_history.append(a['labels'][0]) | ||
|
||
logging.info( | ||
f'{len(data)} title generation examples generated ' | ||
f'from {super().num_examples()} original examples' | ||
) | ||
self.titles_data = data | ||
|
||
def num_episodes(self): | ||
return len(self.titles_data) | ||
|
||
def num_examples(self): | ||
return self.num_episodes() | ||
|
||
def get(self, episode_idx, entry_idx=0): | ||
return self.titles_data[episode_idx] | ||
|
||
|
||
#################################################### | ||
# # | ||
# Doc Reader Teachers # | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
46 changes: 46 additions & 0 deletions
46
parlai/tasks/wizard_of_wikipedia/test/wizard_of_wikipedia_wiki_page_title_test.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
acts: | ||
- - episode_done: true | ||
eval_labels: | ||
- Royal Blue (train) | ||
id: WikiPageTitleTeacher | ||
text: 'Blue is my favorite primary color. | ||
|
||
Blue is always nice. I like royal blue.' | ||
- - episode_done: true | ||
eval_labels: | ||
- Blue Skies (1946 film) | ||
id: WikiPageTitleTeacher | ||
text: 'Blue is my favorite primary color. | ||
|
||
Blue is always nice. I like royal blue. | ||
|
||
I once road on The Royal Blue train from New York to D.C | ||
|
||
Oh that sounds really nice. I bet there was a lot of scenery and blue skies.' | ||
- - episode_done: true | ||
eval_labels: | ||
- Cinematography | ||
id: WikiPageTitleTeacher | ||
text: Hi buddy, What you think about cinematography | ||
- - episode_done: true | ||
eval_labels: | ||
- Cinematography | ||
id: WikiPageTitleTeacher | ||
text: "Hi buddy, What you think about cinematography\nCinematography,is a type\ | ||
\ of motion picture , captured electronically by means of an image \nYes buddy,\ | ||
\ Images captured with an electronic image-sensor, produces an electrical charge.The\ | ||
\ word \"cinematography\" is based on the Greek words meaning movement, motion." | ||
- - episode_done: true | ||
eval_labels: | ||
- Photography | ||
id: WikiPageTitleTeacher | ||
text: "Hi buddy, What you think about cinematography\nCinematography,is a type\ | ||
\ of motion picture , captured electronically by means of an image \nYes buddy,\ | ||
\ Images captured with an electronic image-sensor, produces an electrical charge.The\ | ||
\ word \"cinematography\" is based on the Greek words meaning movement, motion.\n\ | ||
It works by lens used to repeatedly focus the light reflected from objects into\ | ||
\ real images on the light-sensitive surface .\n Muybridge sequence of a horse\ | ||
\ galloping In the 1830s, moving images were produced on revolving drums and\ | ||
\ disks" | ||
num_episodes: 3181 | ||
num_examples: 3181 |
48 changes: 48 additions & 0 deletions
48
parlai/tasks/wizard_of_wikipedia/test/wizard_of_wikipedia_wiki_page_title_train.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
acts: | ||
- - episode_done: true | ||
id: WikiPageTitleTeacher | ||
labels: | ||
- Science fiction film | ||
text: 'I think science fiction is an amazing genre for anything. Future science, | ||
technology, time travel, FTL travel, they''re all such interesting concepts. | ||
|
||
I''m a huge fan of science fiction myself! ' | ||
- - episode_done: true | ||
id: WikiPageTitleTeacher | ||
labels: | ||
- Time travel in fiction | ||
text: "I think science fiction is an amazing genre for anything. Future science,\ | ||
\ technology, time travel, FTL travel, they're all such interesting concepts.\n\ | ||
I'm a huge fan of science fiction myself! \nAwesome! I really love how sci-fi\ | ||
\ storytellers focus on political/social/philosophical issues that would still\ | ||
\ be around even in the future. Makes them relatable.\nI agree. One of my favorite\ | ||
\ forms of science fiction is anything related to time travel! I find it fascinating." | ||
- - episode_done: true | ||
id: WikiPageTitleTeacher | ||
labels: | ||
- Science fiction | ||
text: "I think science fiction is an amazing genre for anything. Future science,\ | ||
\ technology, time travel, FTL travel, they're all such interesting concepts.\n\ | ||
I'm a huge fan of science fiction myself! \nAwesome! I really love how sci-fi\ | ||
\ storytellers focus on political/social/philosophical issues that would still\ | ||
\ be around even in the future. Makes them relatable.\nI agree. One of my favorite\ | ||
\ forms of science fiction is anything related to time travel! I find it fascinating.\n\ | ||
It's not quite sci-fi, but my favorite version of time travel is in Harry Potter\ | ||
\ and the Prisoner of Azkaban. Breaks zero logical rules.\nAnd that's difficult\ | ||
\ to do when dealing with time travel. I actually haven't seen the latest Harry\ | ||
\ Potter movies. Guess it's time to check them out!" | ||
- - episode_done: true | ||
id: WikiPageTitleTeacher | ||
labels: | ||
- Internet access | ||
text: 'Can you imagine the world without internet access? ' | ||
- - episode_done: true | ||
id: WikiPageTitleTeacher | ||
labels: | ||
- Internet access | ||
text: "Can you imagine the world without internet access? \nNo I could not! I\ | ||
\ couldn't imagine living when internet access was rare and very few people\ | ||
\ had it!\nOh me either! It seems like such a long time ago. I wonder when Internet\ | ||
\ was first created?" | ||
num_episodes: 60797 | ||
num_examples: 60797 |
61 changes: 61 additions & 0 deletions
61
parlai/tasks/wizard_of_wikipedia/test/wizard_of_wikipedia_wiki_page_title_valid.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
acts: | ||
- - episode_done: true | ||
eval_labels: | ||
- Gardening | ||
id: WikiPageTitleTeacher | ||
text: I like Gardening, even when I've only been doing it for a short time. | ||
- - episode_done: true | ||
eval_labels: | ||
- Gardening | ||
id: WikiPageTitleTeacher | ||
text: 'I like Gardening, even when I''ve only been doing it for a short time. | ||
|
||
I live on a farm, we garden all year long, it is very relaxing. | ||
|
||
That sounds great. I''ve always thought that I would love living in a farm, | ||
but I;ve always lived in the city. What do you mostly plant?' | ||
- - episode_done: true | ||
eval_labels: | ||
- Gardening | ||
id: WikiPageTitleTeacher | ||
text: 'I like Gardening, even when I''ve only been doing it for a short time. | ||
|
||
I live on a farm, we garden all year long, it is very relaxing. | ||
|
||
That sounds great. I''ve always thought that I would love living in a farm, | ||
but I;ve always lived in the city. What do you mostly plant? | ||
|
||
I have planted several fruits trees, tomatoes, jalepenos, bell peppers, onions, | ||
Garlic, and potatoes mostly. | ||
|
||
Great, I love the idea of growing my own vegetables and fruits! Do you have | ||
animals in the farm?' | ||
- - episode_done: true | ||
eval_labels: | ||
- Gardening | ||
id: WikiPageTitleTeacher | ||
text: 'I like Gardening, even when I''ve only been doing it for a short time. | ||
|
||
I live on a farm, we garden all year long, it is very relaxing. | ||
|
||
That sounds great. I''ve always thought that I would love living in a farm, | ||
but I;ve always lived in the city. What do you mostly plant? | ||
|
||
I have planted several fruits trees, tomatoes, jalepenos, bell peppers, onions, | ||
Garlic, and potatoes mostly. | ||
|
||
Great, I love the idea of growing my own vegetables and fruits! Do you have | ||
animals in the farm? | ||
|
||
yes i do. Cows, chickens, Micro pigs, Guinneas, We also do forest growing also. | ||
we plants large pine trees. | ||
|
||
Wow, it sounds amazing, the Micro-pigs are so cute! are they trainable to be | ||
well behaved?' | ||
- - episode_done: true | ||
eval_labels: | ||
- Bob Ross | ||
id: WikiPageTitleTeacher | ||
text: I would like to know more about bob ross | ||
num_episodes: 3236 | ||
num_examples: 3236 |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can we make the default -1? and have that map to including all? making 0 mean includes all doesn't seem right (as one could argue this should include... none)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I changed default to
-1
, and added something to change any non-positive value to that.